xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (revision 12269dda5c07c25e4ba9b214dffa2988e73598fd)
1 //===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass does misc. AMDGPU optimizations on IR before instruction
12 /// selection.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Analysis/DivergenceAnalysis.h"
21 #include "llvm/Analysis/Loads.h"
22 #include "llvm/CodeGen/Passes.h"
23 #include "llvm/CodeGen/TargetPassConfig.h"
24 #include "llvm/IR/Attributes.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstVisitor.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Intrinsics.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Casting.h"
42 #include <cassert>
43 #include <iterator>
44 
45 #define DEBUG_TYPE "amdgpu-codegenprepare"
46 
47 using namespace llvm;
48 
49 namespace {
50 
51 static cl::opt<bool> WidenLoads(
52   "amdgpu-codegenprepare-widen-constant-loads",
53   cl::desc("Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"),
54   cl::ReallyHidden,
55   cl::init(true));
56 
57 class AMDGPUCodeGenPrepare : public FunctionPass,
58                              public InstVisitor<AMDGPUCodeGenPrepare, bool> {
59   const SISubtarget *ST = nullptr;
60   DivergenceAnalysis *DA = nullptr;
61   Module *Mod = nullptr;
62   bool HasUnsafeFPMath = false;
63   AMDGPUAS AMDGPUASI;
64 
65   /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
66   /// binary operation \p V.
67   ///
68   /// \returns Binary operation \p V.
69   /// \returns \p T's base element bit width.
70   unsigned getBaseElementBitWidth(const Type *T) const;
71 
72   /// \returns Equivalent 32 bit integer type for given type \p T. For example,
73   /// if \p T is i7, then i32 is returned; if \p T is <3 x i12>, then <3 x i32>
74   /// is returned.
75   Type *getI32Ty(IRBuilder<> &B, const Type *T) const;
76 
77   /// \returns True if binary operation \p I is a signed binary operation, false
78   /// otherwise.
79   bool isSigned(const BinaryOperator &I) const;
80 
81   /// \returns True if the condition of 'select' operation \p I comes from a
82   /// signed 'icmp' operation, false otherwise.
83   bool isSigned(const SelectInst &I) const;
84 
85   /// \returns True if type \p T needs to be promoted to 32 bit integer type,
86   /// false otherwise.
87   bool needsPromotionToI32(const Type *T) const;
88 
89   /// Promotes uniform binary operation \p I to equivalent 32 bit binary
90   /// operation.
91   ///
92   /// \details \p I's base element bit width must be greater than 1 and less
93   /// than or equal 16. Promotion is done by sign or zero extending operands to
94   /// 32 bits, replacing \p I with equivalent 32 bit binary operation, and
95   /// truncating the result of 32 bit binary operation back to \p I's original
96   /// type. Division operation is not promoted.
97   ///
98   /// \returns True if \p I is promoted to equivalent 32 bit binary operation,
99   /// false otherwise.
100   bool promoteUniformOpToI32(BinaryOperator &I) const;
101 
102   /// Promotes uniform 'icmp' operation \p I to 32 bit 'icmp' operation.
103   ///
104   /// \details \p I's base element bit width must be greater than 1 and less
105   /// than or equal 16. Promotion is done by sign or zero extending operands to
106   /// 32 bits, and replacing \p I with 32 bit 'icmp' operation.
107   ///
108   /// \returns True.
109   bool promoteUniformOpToI32(ICmpInst &I) const;
110 
111   /// Promotes uniform 'select' operation \p I to 32 bit 'select'
112   /// operation.
113   ///
114   /// \details \p I's base element bit width must be greater than 1 and less
115   /// than or equal 16. Promotion is done by sign or zero extending operands to
116   /// 32 bits, replacing \p I with 32 bit 'select' operation, and truncating the
117   /// result of 32 bit 'select' operation back to \p I's original type.
118   ///
119   /// \returns True.
120   bool promoteUniformOpToI32(SelectInst &I) const;
121 
122   /// Promotes uniform 'bitreverse' intrinsic \p I to 32 bit 'bitreverse'
123   /// intrinsic.
124   ///
125   /// \details \p I's base element bit width must be greater than 1 and less
126   /// than or equal 16. Promotion is done by zero extending the operand to 32
127   /// bits, replacing \p I with 32 bit 'bitreverse' intrinsic, shifting the
128   /// result of 32 bit 'bitreverse' intrinsic to the right with zero fill (the
129   /// shift amount is 32 minus \p I's base element bit width), and truncating
130   /// the result of the shift operation back to \p I's original type.
131   ///
132   /// \returns True.
133   bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
134   /// Widen a scalar load.
135   ///
136   /// \details \p Widen scalar load for uniform, small type loads from constant
137   //  memory / to a full 32-bits and then truncate the input to allow a scalar
138   //  load instead of a vector load.
139   //
140   /// \returns True.
141 
142   bool canWidenScalarExtLoad(LoadInst &I) const;
143 
144 public:
145   static char ID;
146 
147   AMDGPUCodeGenPrepare() : FunctionPass(ID) {}
148 
149   bool visitFDiv(BinaryOperator &I);
150 
151   bool visitInstruction(Instruction &I) { return false; }
152   bool visitBinaryOperator(BinaryOperator &I);
153   bool visitLoadInst(LoadInst &I);
154   bool visitICmpInst(ICmpInst &I);
155   bool visitSelectInst(SelectInst &I);
156 
157   bool visitIntrinsicInst(IntrinsicInst &I);
158   bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
159 
160   bool doInitialization(Module &M) override;
161   bool runOnFunction(Function &F) override;
162 
163   StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
164 
165   void getAnalysisUsage(AnalysisUsage &AU) const override {
166     AU.addRequired<DivergenceAnalysis>();
167     AU.setPreservesAll();
168  }
169 };
170 
171 } // end anonymous namespace
172 
173 unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
174   assert(needsPromotionToI32(T) && "T does not need promotion to i32");
175 
176   if (T->isIntegerTy())
177     return T->getIntegerBitWidth();
178   return cast<VectorType>(T)->getElementType()->getIntegerBitWidth();
179 }
180 
181 Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const {
182   assert(needsPromotionToI32(T) && "T does not need promotion to i32");
183 
184   if (T->isIntegerTy())
185     return B.getInt32Ty();
186   return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements());
187 }
188 
189 bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
190   return I.getOpcode() == Instruction::AShr ||
191       I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem;
192 }
193 
194 bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
195   return isa<ICmpInst>(I.getOperand(0)) ?
196       cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
197 }
198 
199 bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
200   const IntegerType *IntTy = dyn_cast<IntegerType>(T);
201   if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16)
202     return true;
203 
204   if (const VectorType *VT = dyn_cast<VectorType>(T)) {
205     // TODO: The set of packed operations is more limited, so may want to
206     // promote some anyway.
207     if (ST->hasVOP3PInsts())
208       return false;
209 
210     return needsPromotionToI32(VT->getElementType());
211   }
212 
213   return false;
214 }
215 
216 // Return true if the op promoted to i32 should have nsw set.
217 static bool promotedOpIsNSW(const Instruction &I) {
218   switch (I.getOpcode()) {
219   case Instruction::Shl:
220   case Instruction::Add:
221   case Instruction::Sub:
222     return true;
223   case Instruction::Mul:
224     return I.hasNoUnsignedWrap();
225   default:
226     return false;
227   }
228 }
229 
230 // Return true if the op promoted to i32 should have nuw set.
231 static bool promotedOpIsNUW(const Instruction &I) {
232   switch (I.getOpcode()) {
233   case Instruction::Shl:
234   case Instruction::Add:
235   case Instruction::Mul:
236     return true;
237   case Instruction::Sub:
238     return I.hasNoUnsignedWrap();
239   default:
240     return false;
241   }
242 }
243 
244 bool AMDGPUCodeGenPrepare::canWidenScalarExtLoad(LoadInst &I) const {
245   Type *Ty = I.getType();
246   const DataLayout &DL = Mod->getDataLayout();
247   int TySize = DL.getTypeSizeInBits(Ty);
248   unsigned Align = I.getAlignment() ?
249                    I.getAlignment() : DL.getABITypeAlignment(Ty);
250 
251   return I.isSimple() && TySize < 32 && Align >= 4 && DA->isUniform(&I);
252 }
253 
254 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
255   assert(needsPromotionToI32(I.getType()) &&
256          "I does not need promotion to i32");
257 
258   if (I.getOpcode() == Instruction::SDiv ||
259       I.getOpcode() == Instruction::UDiv)
260     return false;
261 
262   IRBuilder<> Builder(&I);
263   Builder.SetCurrentDebugLocation(I.getDebugLoc());
264 
265   Type *I32Ty = getI32Ty(Builder, I.getType());
266   Value *ExtOp0 = nullptr;
267   Value *ExtOp1 = nullptr;
268   Value *ExtRes = nullptr;
269   Value *TruncRes = nullptr;
270 
271   if (isSigned(I)) {
272     ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
273     ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
274   } else {
275     ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
276     ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
277   }
278 
279   ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
280   if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) {
281     if (promotedOpIsNSW(cast<Instruction>(I)))
282       Inst->setHasNoSignedWrap();
283 
284     if (promotedOpIsNUW(cast<Instruction>(I)))
285       Inst->setHasNoUnsignedWrap();
286 
287     if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
288       Inst->setIsExact(ExactOp->isExact());
289   }
290 
291   TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
292 
293   I.replaceAllUsesWith(TruncRes);
294   I.eraseFromParent();
295 
296   return true;
297 }
298 
299 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(ICmpInst &I) const {
300   assert(needsPromotionToI32(I.getOperand(0)->getType()) &&
301          "I does not need promotion to i32");
302 
303   IRBuilder<> Builder(&I);
304   Builder.SetCurrentDebugLocation(I.getDebugLoc());
305 
306   Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType());
307   Value *ExtOp0 = nullptr;
308   Value *ExtOp1 = nullptr;
309   Value *NewICmp  = nullptr;
310 
311   if (I.isSigned()) {
312     ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
313     ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
314   } else {
315     ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
316     ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
317   }
318   NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
319 
320   I.replaceAllUsesWith(NewICmp);
321   I.eraseFromParent();
322 
323   return true;
324 }
325 
326 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(SelectInst &I) const {
327   assert(needsPromotionToI32(I.getType()) &&
328          "I does not need promotion to i32");
329 
330   IRBuilder<> Builder(&I);
331   Builder.SetCurrentDebugLocation(I.getDebugLoc());
332 
333   Type *I32Ty = getI32Ty(Builder, I.getType());
334   Value *ExtOp1 = nullptr;
335   Value *ExtOp2 = nullptr;
336   Value *ExtRes = nullptr;
337   Value *TruncRes = nullptr;
338 
339   if (isSigned(I)) {
340     ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
341     ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
342   } else {
343     ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
344     ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
345   }
346   ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
347   TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
348 
349   I.replaceAllUsesWith(TruncRes);
350   I.eraseFromParent();
351 
352   return true;
353 }
354 
355 bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
356     IntrinsicInst &I) const {
357   assert(I.getIntrinsicID() == Intrinsic::bitreverse &&
358          "I must be bitreverse intrinsic");
359   assert(needsPromotionToI32(I.getType()) &&
360          "I does not need promotion to i32");
361 
362   IRBuilder<> Builder(&I);
363   Builder.SetCurrentDebugLocation(I.getDebugLoc());
364 
365   Type *I32Ty = getI32Ty(Builder, I.getType());
366   Function *I32 =
367       Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty });
368   Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty);
369   Value *ExtRes = Builder.CreateCall(I32, { ExtOp });
370   Value *LShrOp =
371       Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType()));
372   Value *TruncRes =
373       Builder.CreateTrunc(LShrOp, I.getType());
374 
375   I.replaceAllUsesWith(TruncRes);
376   I.eraseFromParent();
377 
378   return true;
379 }
380 
381 static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
382   const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
383   if (!CNum)
384     return HasDenormals;
385 
386   if (UnsafeDiv)
387     return true;
388 
389   bool IsOne = CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0);
390 
391   // Reciprocal f32 is handled separately without denormals.
392   return HasDenormals ^ IsOne;
393 }
394 
395 // Insert an intrinsic for fast fdiv for safe math situations where we can
396 // reduce precision. Leave fdiv for situations where the generic node is
397 // expected to be optimized.
398 bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
399   Type *Ty = FDiv.getType();
400 
401   if (!Ty->getScalarType()->isFloatTy())
402     return false;
403 
404   MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
405   if (!FPMath)
406     return false;
407 
408   const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
409   float ULP = FPOp->getFPAccuracy();
410   if (ULP < 2.5f)
411     return false;
412 
413   FastMathFlags FMF = FPOp->getFastMathFlags();
414   bool UnsafeDiv = HasUnsafeFPMath || FMF.isFast() ||
415                                       FMF.allowReciprocal();
416 
417   // With UnsafeDiv node will be optimized to just rcp and mul.
418   if (UnsafeDiv)
419     return false;
420 
421   IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
422   Builder.setFastMathFlags(FMF);
423   Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
424 
425   Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
426 
427   Value *Num = FDiv.getOperand(0);
428   Value *Den = FDiv.getOperand(1);
429 
430   Value *NewFDiv = nullptr;
431 
432   bool HasDenormals = ST->hasFP32Denormals();
433   if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
434     NewFDiv = UndefValue::get(VT);
435 
436     // FIXME: Doesn't do the right thing for cases where the vector is partially
437     // constant. This works when the scalarizer pass is run first.
438     for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
439       Value *NumEltI = Builder.CreateExtractElement(Num, I);
440       Value *DenEltI = Builder.CreateExtractElement(Den, I);
441       Value *NewElt;
442 
443       if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasDenormals)) {
444         NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
445       } else {
446         NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
447       }
448 
449       NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
450     }
451   } else {
452     if (!shouldKeepFDivF32(Num, UnsafeDiv, HasDenormals))
453       NewFDiv = Builder.CreateCall(Decl, { Num, Den });
454   }
455 
456   if (NewFDiv) {
457     FDiv.replaceAllUsesWith(NewFDiv);
458     NewFDiv->takeName(&FDiv);
459     FDiv.eraseFromParent();
460   }
461 
462   return !!NewFDiv;
463 }
464 
465 static bool hasUnsafeFPMath(const Function &F) {
466   Attribute Attr = F.getFnAttribute("unsafe-fp-math");
467   return Attr.getValueAsString() == "true";
468 }
469 
470 bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
471   bool Changed = false;
472 
473   if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
474       DA->isUniform(&I))
475     Changed |= promoteUniformOpToI32(I);
476 
477   return Changed;
478 }
479 
480 bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
481   if (!WidenLoads)
482     return false;
483 
484   if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
485        I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
486       canWidenScalarExtLoad(I)) {
487     IRBuilder<> Builder(&I);
488     Builder.SetCurrentDebugLocation(I.getDebugLoc());
489 
490     Type *I32Ty = Builder.getInt32Ty();
491     Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
492     Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
493     LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
494     WidenLoad->copyMetadata(I);
495 
496     // If we have range metadata, we need to convert the type, and not make
497     // assumptions about the high bits.
498     if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) {
499       ConstantInt *Lower =
500         mdconst::extract<ConstantInt>(Range->getOperand(0));
501 
502       if (Lower->getValue().isNullValue()) {
503         WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
504       } else {
505         Metadata *LowAndHigh[] = {
506           ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))),
507           // Don't make assumptions about the high bits.
508           ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0))
509         };
510 
511         WidenLoad->setMetadata(LLVMContext::MD_range,
512                                MDNode::get(Mod->getContext(), LowAndHigh));
513       }
514     }
515 
516     int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
517     Type *IntNTy = Builder.getIntNTy(TySize);
518     Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
519     Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
520     I.replaceAllUsesWith(ValOrig);
521     I.eraseFromParent();
522     return true;
523   }
524 
525   return false;
526 }
527 
528 bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
529   bool Changed = false;
530 
531   if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
532       DA->isUniform(&I))
533     Changed |= promoteUniformOpToI32(I);
534 
535   return Changed;
536 }
537 
538 bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
539   bool Changed = false;
540 
541   if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
542       DA->isUniform(&I))
543     Changed |= promoteUniformOpToI32(I);
544 
545   return Changed;
546 }
547 
548 bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
549   switch (I.getIntrinsicID()) {
550   case Intrinsic::bitreverse:
551     return visitBitreverseIntrinsicInst(I);
552   default:
553     return false;
554   }
555 }
556 
557 bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
558   bool Changed = false;
559 
560   if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
561       DA->isUniform(&I))
562     Changed |= promoteUniformBitreverseToI32(I);
563 
564   return Changed;
565 }
566 
567 bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
568   Mod = &M;
569   return false;
570 }
571 
572 bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
573   if (skipFunction(F))
574     return false;
575 
576   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
577   if (!TPC)
578     return false;
579 
580   const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
581   ST = &TM.getSubtarget<SISubtarget>(F);
582   DA = &getAnalysis<DivergenceAnalysis>();
583   HasUnsafeFPMath = hasUnsafeFPMath(F);
584   AMDGPUASI = TM.getAMDGPUAS();
585 
586   bool MadeChange = false;
587 
588   for (BasicBlock &BB : F) {
589     BasicBlock::iterator Next;
590     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) {
591       Next = std::next(I);
592       MadeChange |= visit(*I);
593     }
594   }
595 
596   return MadeChange;
597 }
598 
599 INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
600                       "AMDGPU IR optimizations", false, false)
601 INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
602 INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
603                     false, false)
604 
605 char AMDGPUCodeGenPrepare::ID = 0;
606 
607 FunctionPass *llvm::createAMDGPUCodeGenPreparePass() {
608   return new AMDGPUCodeGenPrepare();
609 }
610