1 //===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This pass does misc. AMDGPU optimizations on IR before instruction 12 /// selection. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "AMDGPUTargetMachine.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Analysis/DivergenceAnalysis.h" 21 #include "llvm/Analysis/Loads.h" 22 #include "llvm/CodeGen/Passes.h" 23 #include "llvm/CodeGen/TargetPassConfig.h" 24 #include "llvm/IR/Attributes.h" 25 #include "llvm/IR/BasicBlock.h" 26 #include "llvm/IR/Constants.h" 27 #include "llvm/IR/DerivedTypes.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/IR/IRBuilder.h" 30 #include "llvm/IR/InstVisitor.h" 31 #include "llvm/IR/InstrTypes.h" 32 #include "llvm/IR/Instruction.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/IntrinsicInst.h" 35 #include "llvm/IR/Intrinsics.h" 36 #include "llvm/IR/LLVMContext.h" 37 #include "llvm/IR/Operator.h" 38 #include "llvm/IR/Type.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Pass.h" 41 #include "llvm/Support/Casting.h" 42 #include <cassert> 43 #include <iterator> 44 45 #define DEBUG_TYPE "amdgpu-codegenprepare" 46 47 using namespace llvm; 48 49 namespace { 50 51 static cl::opt<bool> WidenLoads( 52 "amdgpu-codegenprepare-widen-constant-loads", 53 cl::desc("Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"), 54 cl::ReallyHidden, 55 cl::init(true)); 56 57 class AMDGPUCodeGenPrepare : public FunctionPass, 58 public InstVisitor<AMDGPUCodeGenPrepare, bool> { 59 const SISubtarget *ST = nullptr; 60 DivergenceAnalysis *DA = nullptr; 61 Module *Mod = nullptr; 62 bool HasUnsafeFPMath = false; 63 AMDGPUAS AMDGPUASI; 64 65 /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to 66 /// binary operation \p V. 67 /// 68 /// \returns Binary operation \p V. 69 /// \returns \p T's base element bit width. 70 unsigned getBaseElementBitWidth(const Type *T) const; 71 72 /// \returns Equivalent 32 bit integer type for given type \p T. For example, 73 /// if \p T is i7, then i32 is returned; if \p T is <3 x i12>, then <3 x i32> 74 /// is returned. 75 Type *getI32Ty(IRBuilder<> &B, const Type *T) const; 76 77 /// \returns True if binary operation \p I is a signed binary operation, false 78 /// otherwise. 79 bool isSigned(const BinaryOperator &I) const; 80 81 /// \returns True if the condition of 'select' operation \p I comes from a 82 /// signed 'icmp' operation, false otherwise. 83 bool isSigned(const SelectInst &I) const; 84 85 /// \returns True if type \p T needs to be promoted to 32 bit integer type, 86 /// false otherwise. 87 bool needsPromotionToI32(const Type *T) const; 88 89 /// Promotes uniform binary operation \p I to equivalent 32 bit binary 90 /// operation. 91 /// 92 /// \details \p I's base element bit width must be greater than 1 and less 93 /// than or equal 16. Promotion is done by sign or zero extending operands to 94 /// 32 bits, replacing \p I with equivalent 32 bit binary operation, and 95 /// truncating the result of 32 bit binary operation back to \p I's original 96 /// type. Division operation is not promoted. 97 /// 98 /// \returns True if \p I is promoted to equivalent 32 bit binary operation, 99 /// false otherwise. 100 bool promoteUniformOpToI32(BinaryOperator &I) const; 101 102 /// Promotes uniform 'icmp' operation \p I to 32 bit 'icmp' operation. 103 /// 104 /// \details \p I's base element bit width must be greater than 1 and less 105 /// than or equal 16. Promotion is done by sign or zero extending operands to 106 /// 32 bits, and replacing \p I with 32 bit 'icmp' operation. 107 /// 108 /// \returns True. 109 bool promoteUniformOpToI32(ICmpInst &I) const; 110 111 /// Promotes uniform 'select' operation \p I to 32 bit 'select' 112 /// operation. 113 /// 114 /// \details \p I's base element bit width must be greater than 1 and less 115 /// than or equal 16. Promotion is done by sign or zero extending operands to 116 /// 32 bits, replacing \p I with 32 bit 'select' operation, and truncating the 117 /// result of 32 bit 'select' operation back to \p I's original type. 118 /// 119 /// \returns True. 120 bool promoteUniformOpToI32(SelectInst &I) const; 121 122 /// Promotes uniform 'bitreverse' intrinsic \p I to 32 bit 'bitreverse' 123 /// intrinsic. 124 /// 125 /// \details \p I's base element bit width must be greater than 1 and less 126 /// than or equal 16. Promotion is done by zero extending the operand to 32 127 /// bits, replacing \p I with 32 bit 'bitreverse' intrinsic, shifting the 128 /// result of 32 bit 'bitreverse' intrinsic to the right with zero fill (the 129 /// shift amount is 32 minus \p I's base element bit width), and truncating 130 /// the result of the shift operation back to \p I's original type. 131 /// 132 /// \returns True. 133 bool promoteUniformBitreverseToI32(IntrinsicInst &I) const; 134 /// Widen a scalar load. 135 /// 136 /// \details \p Widen scalar load for uniform, small type loads from constant 137 // memory / to a full 32-bits and then truncate the input to allow a scalar 138 // load instead of a vector load. 139 // 140 /// \returns True. 141 142 bool canWidenScalarExtLoad(LoadInst &I) const; 143 144 public: 145 static char ID; 146 147 AMDGPUCodeGenPrepare() : FunctionPass(ID) {} 148 149 bool visitFDiv(BinaryOperator &I); 150 151 bool visitInstruction(Instruction &I) { return false; } 152 bool visitBinaryOperator(BinaryOperator &I); 153 bool visitLoadInst(LoadInst &I); 154 bool visitICmpInst(ICmpInst &I); 155 bool visitSelectInst(SelectInst &I); 156 157 bool visitIntrinsicInst(IntrinsicInst &I); 158 bool visitBitreverseIntrinsicInst(IntrinsicInst &I); 159 160 bool doInitialization(Module &M) override; 161 bool runOnFunction(Function &F) override; 162 163 StringRef getPassName() const override { return "AMDGPU IR optimizations"; } 164 165 void getAnalysisUsage(AnalysisUsage &AU) const override { 166 AU.addRequired<DivergenceAnalysis>(); 167 AU.setPreservesAll(); 168 } 169 }; 170 171 } // end anonymous namespace 172 173 unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const { 174 assert(needsPromotionToI32(T) && "T does not need promotion to i32"); 175 176 if (T->isIntegerTy()) 177 return T->getIntegerBitWidth(); 178 return cast<VectorType>(T)->getElementType()->getIntegerBitWidth(); 179 } 180 181 Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const { 182 assert(needsPromotionToI32(T) && "T does not need promotion to i32"); 183 184 if (T->isIntegerTy()) 185 return B.getInt32Ty(); 186 return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements()); 187 } 188 189 bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const { 190 return I.getOpcode() == Instruction::AShr || 191 I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem; 192 } 193 194 bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const { 195 return isa<ICmpInst>(I.getOperand(0)) ? 196 cast<ICmpInst>(I.getOperand(0))->isSigned() : false; 197 } 198 199 bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const { 200 const IntegerType *IntTy = dyn_cast<IntegerType>(T); 201 if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16) 202 return true; 203 204 if (const VectorType *VT = dyn_cast<VectorType>(T)) { 205 // TODO: The set of packed operations is more limited, so may want to 206 // promote some anyway. 207 if (ST->hasVOP3PInsts()) 208 return false; 209 210 return needsPromotionToI32(VT->getElementType()); 211 } 212 213 return false; 214 } 215 216 // Return true if the op promoted to i32 should have nsw set. 217 static bool promotedOpIsNSW(const Instruction &I) { 218 switch (I.getOpcode()) { 219 case Instruction::Shl: 220 case Instruction::Add: 221 case Instruction::Sub: 222 return true; 223 case Instruction::Mul: 224 return I.hasNoUnsignedWrap(); 225 default: 226 return false; 227 } 228 } 229 230 // Return true if the op promoted to i32 should have nuw set. 231 static bool promotedOpIsNUW(const Instruction &I) { 232 switch (I.getOpcode()) { 233 case Instruction::Shl: 234 case Instruction::Add: 235 case Instruction::Mul: 236 return true; 237 case Instruction::Sub: 238 return I.hasNoUnsignedWrap(); 239 default: 240 return false; 241 } 242 } 243 244 bool AMDGPUCodeGenPrepare::canWidenScalarExtLoad(LoadInst &I) const { 245 Type *Ty = I.getType(); 246 const DataLayout &DL = Mod->getDataLayout(); 247 int TySize = DL.getTypeSizeInBits(Ty); 248 unsigned Align = I.getAlignment() ? 249 I.getAlignment() : DL.getABITypeAlignment(Ty); 250 251 return I.isSimple() && TySize < 32 && Align >= 4 && DA->isUniform(&I); 252 } 253 254 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const { 255 assert(needsPromotionToI32(I.getType()) && 256 "I does not need promotion to i32"); 257 258 if (I.getOpcode() == Instruction::SDiv || 259 I.getOpcode() == Instruction::UDiv) 260 return false; 261 262 IRBuilder<> Builder(&I); 263 Builder.SetCurrentDebugLocation(I.getDebugLoc()); 264 265 Type *I32Ty = getI32Ty(Builder, I.getType()); 266 Value *ExtOp0 = nullptr; 267 Value *ExtOp1 = nullptr; 268 Value *ExtRes = nullptr; 269 Value *TruncRes = nullptr; 270 271 if (isSigned(I)) { 272 ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty); 273 ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty); 274 } else { 275 ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty); 276 ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); 277 } 278 279 ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1); 280 if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) { 281 if (promotedOpIsNSW(cast<Instruction>(I))) 282 Inst->setHasNoSignedWrap(); 283 284 if (promotedOpIsNUW(cast<Instruction>(I))) 285 Inst->setHasNoUnsignedWrap(); 286 287 if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) 288 Inst->setIsExact(ExactOp->isExact()); 289 } 290 291 TruncRes = Builder.CreateTrunc(ExtRes, I.getType()); 292 293 I.replaceAllUsesWith(TruncRes); 294 I.eraseFromParent(); 295 296 return true; 297 } 298 299 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(ICmpInst &I) const { 300 assert(needsPromotionToI32(I.getOperand(0)->getType()) && 301 "I does not need promotion to i32"); 302 303 IRBuilder<> Builder(&I); 304 Builder.SetCurrentDebugLocation(I.getDebugLoc()); 305 306 Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType()); 307 Value *ExtOp0 = nullptr; 308 Value *ExtOp1 = nullptr; 309 Value *NewICmp = nullptr; 310 311 if (I.isSigned()) { 312 ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty); 313 ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty); 314 } else { 315 ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty); 316 ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); 317 } 318 NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1); 319 320 I.replaceAllUsesWith(NewICmp); 321 I.eraseFromParent(); 322 323 return true; 324 } 325 326 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(SelectInst &I) const { 327 assert(needsPromotionToI32(I.getType()) && 328 "I does not need promotion to i32"); 329 330 IRBuilder<> Builder(&I); 331 Builder.SetCurrentDebugLocation(I.getDebugLoc()); 332 333 Type *I32Ty = getI32Ty(Builder, I.getType()); 334 Value *ExtOp1 = nullptr; 335 Value *ExtOp2 = nullptr; 336 Value *ExtRes = nullptr; 337 Value *TruncRes = nullptr; 338 339 if (isSigned(I)) { 340 ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty); 341 ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty); 342 } else { 343 ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); 344 ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty); 345 } 346 ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2); 347 TruncRes = Builder.CreateTrunc(ExtRes, I.getType()); 348 349 I.replaceAllUsesWith(TruncRes); 350 I.eraseFromParent(); 351 352 return true; 353 } 354 355 bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32( 356 IntrinsicInst &I) const { 357 assert(I.getIntrinsicID() == Intrinsic::bitreverse && 358 "I must be bitreverse intrinsic"); 359 assert(needsPromotionToI32(I.getType()) && 360 "I does not need promotion to i32"); 361 362 IRBuilder<> Builder(&I); 363 Builder.SetCurrentDebugLocation(I.getDebugLoc()); 364 365 Type *I32Ty = getI32Ty(Builder, I.getType()); 366 Function *I32 = 367 Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty }); 368 Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty); 369 Value *ExtRes = Builder.CreateCall(I32, { ExtOp }); 370 Value *LShrOp = 371 Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType())); 372 Value *TruncRes = 373 Builder.CreateTrunc(LShrOp, I.getType()); 374 375 I.replaceAllUsesWith(TruncRes); 376 I.eraseFromParent(); 377 378 return true; 379 } 380 381 static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) { 382 const ConstantFP *CNum = dyn_cast<ConstantFP>(Num); 383 if (!CNum) 384 return HasDenormals; 385 386 if (UnsafeDiv) 387 return true; 388 389 bool IsOne = CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0); 390 391 // Reciprocal f32 is handled separately without denormals. 392 return HasDenormals ^ IsOne; 393 } 394 395 // Insert an intrinsic for fast fdiv for safe math situations where we can 396 // reduce precision. Leave fdiv for situations where the generic node is 397 // expected to be optimized. 398 bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { 399 Type *Ty = FDiv.getType(); 400 401 if (!Ty->getScalarType()->isFloatTy()) 402 return false; 403 404 MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath); 405 if (!FPMath) 406 return false; 407 408 const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv); 409 float ULP = FPOp->getFPAccuracy(); 410 if (ULP < 2.5f) 411 return false; 412 413 FastMathFlags FMF = FPOp->getFastMathFlags(); 414 bool UnsafeDiv = HasUnsafeFPMath || FMF.isFast() || 415 FMF.allowReciprocal(); 416 417 // With UnsafeDiv node will be optimized to just rcp and mul. 418 if (UnsafeDiv) 419 return false; 420 421 IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath); 422 Builder.setFastMathFlags(FMF); 423 Builder.SetCurrentDebugLocation(FDiv.getDebugLoc()); 424 425 Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast); 426 427 Value *Num = FDiv.getOperand(0); 428 Value *Den = FDiv.getOperand(1); 429 430 Value *NewFDiv = nullptr; 431 432 bool HasDenormals = ST->hasFP32Denormals(); 433 if (VectorType *VT = dyn_cast<VectorType>(Ty)) { 434 NewFDiv = UndefValue::get(VT); 435 436 // FIXME: Doesn't do the right thing for cases where the vector is partially 437 // constant. This works when the scalarizer pass is run first. 438 for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) { 439 Value *NumEltI = Builder.CreateExtractElement(Num, I); 440 Value *DenEltI = Builder.CreateExtractElement(Den, I); 441 Value *NewElt; 442 443 if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasDenormals)) { 444 NewElt = Builder.CreateFDiv(NumEltI, DenEltI); 445 } else { 446 NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI }); 447 } 448 449 NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I); 450 } 451 } else { 452 if (!shouldKeepFDivF32(Num, UnsafeDiv, HasDenormals)) 453 NewFDiv = Builder.CreateCall(Decl, { Num, Den }); 454 } 455 456 if (NewFDiv) { 457 FDiv.replaceAllUsesWith(NewFDiv); 458 NewFDiv->takeName(&FDiv); 459 FDiv.eraseFromParent(); 460 } 461 462 return !!NewFDiv; 463 } 464 465 static bool hasUnsafeFPMath(const Function &F) { 466 Attribute Attr = F.getFnAttribute("unsafe-fp-math"); 467 return Attr.getValueAsString() == "true"; 468 } 469 470 bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) { 471 bool Changed = false; 472 473 if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) && 474 DA->isUniform(&I)) 475 Changed |= promoteUniformOpToI32(I); 476 477 return Changed; 478 } 479 480 bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) { 481 if (!WidenLoads) 482 return false; 483 484 if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || 485 I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) && 486 canWidenScalarExtLoad(I)) { 487 IRBuilder<> Builder(&I); 488 Builder.SetCurrentDebugLocation(I.getDebugLoc()); 489 490 Type *I32Ty = Builder.getInt32Ty(); 491 Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace()); 492 Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT); 493 LoadInst *WidenLoad = Builder.CreateLoad(BitCast); 494 WidenLoad->copyMetadata(I); 495 496 // If we have range metadata, we need to convert the type, and not make 497 // assumptions about the high bits. 498 if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) { 499 ConstantInt *Lower = 500 mdconst::extract<ConstantInt>(Range->getOperand(0)); 501 502 if (Lower->getValue().isNullValue()) { 503 WidenLoad->setMetadata(LLVMContext::MD_range, nullptr); 504 } else { 505 Metadata *LowAndHigh[] = { 506 ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))), 507 // Don't make assumptions about the high bits. 508 ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0)) 509 }; 510 511 WidenLoad->setMetadata(LLVMContext::MD_range, 512 MDNode::get(Mod->getContext(), LowAndHigh)); 513 } 514 } 515 516 int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType()); 517 Type *IntNTy = Builder.getIntNTy(TySize); 518 Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy); 519 Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType()); 520 I.replaceAllUsesWith(ValOrig); 521 I.eraseFromParent(); 522 return true; 523 } 524 525 return false; 526 } 527 528 bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) { 529 bool Changed = false; 530 531 if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) && 532 DA->isUniform(&I)) 533 Changed |= promoteUniformOpToI32(I); 534 535 return Changed; 536 } 537 538 bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) { 539 bool Changed = false; 540 541 if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) && 542 DA->isUniform(&I)) 543 Changed |= promoteUniformOpToI32(I); 544 545 return Changed; 546 } 547 548 bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) { 549 switch (I.getIntrinsicID()) { 550 case Intrinsic::bitreverse: 551 return visitBitreverseIntrinsicInst(I); 552 default: 553 return false; 554 } 555 } 556 557 bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) { 558 bool Changed = false; 559 560 if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) && 561 DA->isUniform(&I)) 562 Changed |= promoteUniformBitreverseToI32(I); 563 564 return Changed; 565 } 566 567 bool AMDGPUCodeGenPrepare::doInitialization(Module &M) { 568 Mod = &M; 569 return false; 570 } 571 572 bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { 573 if (skipFunction(F)) 574 return false; 575 576 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 577 if (!TPC) 578 return false; 579 580 const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>(); 581 ST = &TM.getSubtarget<SISubtarget>(F); 582 DA = &getAnalysis<DivergenceAnalysis>(); 583 HasUnsafeFPMath = hasUnsafeFPMath(F); 584 AMDGPUASI = TM.getAMDGPUAS(); 585 586 bool MadeChange = false; 587 588 for (BasicBlock &BB : F) { 589 BasicBlock::iterator Next; 590 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) { 591 Next = std::next(I); 592 MadeChange |= visit(*I); 593 } 594 } 595 596 return MadeChange; 597 } 598 599 INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE, 600 "AMDGPU IR optimizations", false, false) 601 INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) 602 INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations", 603 false, false) 604 605 char AMDGPUCodeGenPrepare::ID = 0; 606 607 FunctionPass *llvm::createAMDGPUCodeGenPreparePass() { 608 return new AMDGPUCodeGenPrepare(); 609 } 610