1 //===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This pass does misc. AMDGPU optimizations on IR before instruction 12 /// selection. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "AMDGPUIntrinsicInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "AMDGPUTargetMachine.h" 20 21 #include "llvm/Analysis/DivergenceAnalysis.h" 22 #include "llvm/CodeGen/Passes.h" 23 #include "llvm/IR/InstVisitor.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #define DEBUG_TYPE "amdgpu-codegenprepare" 29 30 using namespace llvm; 31 32 namespace { 33 34 class AMDGPUCodeGenPrepare : public FunctionPass, 35 public InstVisitor<AMDGPUCodeGenPrepare, bool> { 36 const GCNTargetMachine *TM; 37 const SISubtarget *ST; 38 DivergenceAnalysis *DA; 39 Module *Mod; 40 bool HasUnsafeFPMath; 41 42 public: 43 static char ID; 44 AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) : 45 FunctionPass(ID), 46 TM(static_cast<const GCNTargetMachine *>(TM)), 47 ST(nullptr), 48 DA(nullptr), 49 Mod(nullptr), 50 HasUnsafeFPMath(false) { } 51 52 bool visitFDiv(BinaryOperator &I); 53 54 bool visitInstruction(Instruction &I) { 55 return false; 56 } 57 58 bool doInitialization(Module &M) override; 59 bool runOnFunction(Function &F) override; 60 61 const char *getPassName() const override { 62 return "AMDGPU IR optimizations"; 63 } 64 65 void getAnalysisUsage(AnalysisUsage &AU) const override { 66 AU.addRequired<DivergenceAnalysis>(); 67 AU.setPreservesAll(); 68 } 69 }; 70 71 } // End anonymous namespace 72 73 static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) { 74 const ConstantFP *CNum = dyn_cast<ConstantFP>(Num); 75 if (!CNum) 76 return false; 77 78 // Reciprocal f32 is handled separately without denormals. 79 return UnsafeDiv && CNum->isExactlyValue(+1.0); 80 } 81 82 // Insert an intrinsic for fast fdiv for safe math situations where we can 83 // reduce precision. Leave fdiv for situations where the generic node is 84 // expected to be optimized. 85 bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { 86 Type *Ty = FDiv.getType(); 87 88 // TODO: Handle half 89 if (!Ty->getScalarType()->isFloatTy()) 90 return false; 91 92 MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath); 93 if (!FPMath) 94 return false; 95 96 const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv); 97 float ULP = FPOp->getFPAccuracy(); 98 if (ULP < 2.5f) 99 return false; 100 101 FastMathFlags FMF = FPOp->getFastMathFlags(); 102 bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() || 103 FMF.allowReciprocal(); 104 if (ST->hasFP32Denormals() && !UnsafeDiv) 105 return false; 106 107 IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath); 108 Builder.setFastMathFlags(FMF); 109 Builder.SetCurrentDebugLocation(FDiv.getDebugLoc()); 110 111 const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo(); 112 Function *Decl 113 = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {}); 114 115 Value *Num = FDiv.getOperand(0); 116 Value *Den = FDiv.getOperand(1); 117 118 Value *NewFDiv = nullptr; 119 120 if (VectorType *VT = dyn_cast<VectorType>(Ty)) { 121 NewFDiv = UndefValue::get(VT); 122 123 // FIXME: Doesn't do the right thing for cases where the vector is partially 124 // constant. This works when the scalarizer pass is run first. 125 for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) { 126 Value *NumEltI = Builder.CreateExtractElement(Num, I); 127 Value *DenEltI = Builder.CreateExtractElement(Den, I); 128 Value *NewElt; 129 130 if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) { 131 NewElt = Builder.CreateFDiv(NumEltI, DenEltI); 132 } else { 133 NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI }); 134 } 135 136 NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I); 137 } 138 } else { 139 if (!shouldKeepFDivF32(Num, UnsafeDiv)) 140 NewFDiv = Builder.CreateCall(Decl, { Num, Den }); 141 } 142 143 if (NewFDiv) { 144 FDiv.replaceAllUsesWith(NewFDiv); 145 NewFDiv->takeName(&FDiv); 146 FDiv.eraseFromParent(); 147 } 148 149 return true; 150 } 151 152 static bool hasUnsafeFPMath(const Function &F) { 153 Attribute Attr = F.getFnAttribute("unsafe-fp-math"); 154 return Attr.getValueAsString() == "true"; 155 } 156 157 bool AMDGPUCodeGenPrepare::doInitialization(Module &M) { 158 Mod = &M; 159 return false; 160 } 161 162 bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { 163 if (!TM || skipFunction(F)) 164 return false; 165 166 ST = &TM->getSubtarget<SISubtarget>(F); 167 DA = &getAnalysis<DivergenceAnalysis>(); 168 HasUnsafeFPMath = hasUnsafeFPMath(F); 169 170 bool MadeChange = false; 171 172 for (BasicBlock &BB : F) { 173 BasicBlock::iterator Next; 174 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) { 175 Next = std::next(I); 176 MadeChange |= visit(*I); 177 } 178 } 179 180 return MadeChange; 181 } 182 183 INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE, 184 "AMDGPU IR optimizations", false, false) 185 INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) 186 INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, 187 "AMDGPU IR optimizations", false, false) 188 189 char AMDGPUCodeGenPrepare::ID = 0; 190 191 FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM) { 192 return new AMDGPUCodeGenPrepare(TM); 193 } 194