1*5f757f3fSDimitry Andric //===- AMDGPUImageIntrinsicOptimizer.cpp ----------------------------------===// 2*5f757f3fSDimitry Andric // 3*5f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5f757f3fSDimitry Andric // 7*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 8*5f757f3fSDimitry Andric // 9*5f757f3fSDimitry Andric // This pass tries to combine multiple image_load intrinsics with dim=2dmsaa 10*5f757f3fSDimitry Andric // or dim=2darraymsaa into a single image_msaa_load intrinsic if: 11*5f757f3fSDimitry Andric // 12*5f757f3fSDimitry Andric // - they refer to the same vaddr except for sample_id, 13*5f757f3fSDimitry Andric // - they use a constant sample_id and they fall into the same group, 14*5f757f3fSDimitry Andric // - they have the same dmask and the number of intrinsics and the number of 15*5f757f3fSDimitry Andric // vaddr/vdata dword transfers is reduced by the combine. 16*5f757f3fSDimitry Andric // 17*5f757f3fSDimitry Andric // Examples for the tradeoff (all are assuming 2DMsaa for vaddr): 18*5f757f3fSDimitry Andric // 19*5f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 20*5f757f3fSDimitry Andric // | popcount | a16 | d16 | #load | vaddr / | #msaa_load | vaddr / | combine? | 21*5f757f3fSDimitry Andric // | (dmask) | | | | vdata | | vdata | | 22*5f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 23*5f757f3fSDimitry Andric // | 1 | 0 | 0 | 4 | 12 / 4 | 1 | 3 / 4 | yes | 24*5f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 25*5f757f3fSDimitry Andric // | 1 | 0 | 0 | 2 | 6 / 2 | 1 | 3 / 4 | yes? | 26*5f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 27*5f757f3fSDimitry Andric // | 2 | 0 | 0 | 4 | 12 / 8 | 2 | 6 / 8 | yes | 28*5f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 29*5f757f3fSDimitry Andric // | 2 | 0 | 0 | 2 | 6 / 4 | 2 | 6 / 8 | no | 30*5f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 31*5f757f3fSDimitry Andric // | 1 | 0 | 1 | 2 | 6 / 2 | 1 | 3 / 2 | yes | 32*5f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 33*5f757f3fSDimitry Andric // 34*5f757f3fSDimitry Andric // Some cases are of questionable benefit, like the one marked with "yes?" 35*5f757f3fSDimitry Andric // above: fewer intrinsics and fewer vaddr and fewer total transfers between SP 36*5f757f3fSDimitry Andric // and TX, but higher vdata. We start by erring on the side of converting these 37*5f757f3fSDimitry Andric // to MSAA_LOAD. 38*5f757f3fSDimitry Andric // 39*5f757f3fSDimitry Andric // clang-format off 40*5f757f3fSDimitry Andric // 41*5f757f3fSDimitry Andric // This pass will combine intrinsics such as (not neccessarily consecutive): 42*5f757f3fSDimitry Andric // call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 43*5f757f3fSDimitry Andric // call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) 44*5f757f3fSDimitry Andric // call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) 45*5f757f3fSDimitry Andric // call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) 46*5f757f3fSDimitry Andric // ==> 47*5f757f3fSDimitry Andric // call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 48*5f757f3fSDimitry Andric // 49*5f757f3fSDimitry Andric // clang-format on 50*5f757f3fSDimitry Andric // 51*5f757f3fSDimitry Andric // Future improvements: 52*5f757f3fSDimitry Andric // 53*5f757f3fSDimitry Andric // - We may occasionally not want to do the combine if it increases the maximum 54*5f757f3fSDimitry Andric // register pressure. 55*5f757f3fSDimitry Andric // 56*5f757f3fSDimitry Andric // - Ensure clausing when multiple MSAA_LOAD are generated. 57*5f757f3fSDimitry Andric // 58*5f757f3fSDimitry Andric // Note: Even though the image_msaa_load intrinsic already exists on gfx10, this 59*5f757f3fSDimitry Andric // combine only applies to gfx11, due to a limitation in gfx10: the gfx10 60*5f757f3fSDimitry Andric // IMAGE_MSAA_LOAD only works correctly with single-channel texture formats, and 61*5f757f3fSDimitry Andric // we don't know the format at compile time. 62*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 63*5f757f3fSDimitry Andric 64*5f757f3fSDimitry Andric #include "AMDGPU.h" 65*5f757f3fSDimitry Andric #include "AMDGPUInstrInfo.h" 66*5f757f3fSDimitry Andric #include "AMDGPUTargetMachine.h" 67*5f757f3fSDimitry Andric #include "llvm/IR/Function.h" 68*5f757f3fSDimitry Andric #include "llvm/IR/IRBuilder.h" 69*5f757f3fSDimitry Andric #include "llvm/IR/IntrinsicInst.h" 70*5f757f3fSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 71*5f757f3fSDimitry Andric #include "llvm/Pass.h" 72*5f757f3fSDimitry Andric #include "llvm/Support/raw_ostream.h" 73*5f757f3fSDimitry Andric 74*5f757f3fSDimitry Andric using namespace llvm; 75*5f757f3fSDimitry Andric 76*5f757f3fSDimitry Andric #define DEBUG_TYPE "amdgpu-image-intrinsic-opt" 77*5f757f3fSDimitry Andric 78*5f757f3fSDimitry Andric namespace { 79*5f757f3fSDimitry Andric class AMDGPUImageIntrinsicOptimizer : public FunctionPass { 80*5f757f3fSDimitry Andric const TargetMachine *TM; 81*5f757f3fSDimitry Andric 82*5f757f3fSDimitry Andric public: 83*5f757f3fSDimitry Andric static char ID; 84*5f757f3fSDimitry Andric 85*5f757f3fSDimitry Andric AMDGPUImageIntrinsicOptimizer(const TargetMachine *TM = nullptr) 86*5f757f3fSDimitry Andric : FunctionPass(ID), TM(TM) {} 87*5f757f3fSDimitry Andric 88*5f757f3fSDimitry Andric bool runOnFunction(Function &F) override; 89*5f757f3fSDimitry Andric 90*5f757f3fSDimitry Andric }; // End of class AMDGPUImageIntrinsicOptimizer 91*5f757f3fSDimitry Andric } // End anonymous namespace 92*5f757f3fSDimitry Andric 93*5f757f3fSDimitry Andric INITIALIZE_PASS(AMDGPUImageIntrinsicOptimizer, DEBUG_TYPE, 94*5f757f3fSDimitry Andric "AMDGPU Image Intrinsic Optimizer", false, false) 95*5f757f3fSDimitry Andric 96*5f757f3fSDimitry Andric char AMDGPUImageIntrinsicOptimizer::ID = 0; 97*5f757f3fSDimitry Andric 98*5f757f3fSDimitry Andric void addInstToMergeableList( 99*5f757f3fSDimitry Andric IntrinsicInst *II, 100*5f757f3fSDimitry Andric SmallVector<SmallVector<IntrinsicInst *, 4>> &MergeableInsts, 101*5f757f3fSDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) { 102*5f757f3fSDimitry Andric for (SmallVector<IntrinsicInst *, 4> &IIList : MergeableInsts) { 103*5f757f3fSDimitry Andric // Check Dim. 104*5f757f3fSDimitry Andric if (IIList.front()->getIntrinsicID() != II->getIntrinsicID()) 105*5f757f3fSDimitry Andric continue; 106*5f757f3fSDimitry Andric 107*5f757f3fSDimitry Andric // Check D16. 108*5f757f3fSDimitry Andric if (IIList.front()->getType() != II->getType()) 109*5f757f3fSDimitry Andric continue; 110*5f757f3fSDimitry Andric 111*5f757f3fSDimitry Andric // Check all arguments (DMask, VAddr, RSrc etc). 112*5f757f3fSDimitry Andric bool AllEqual = true; 113*5f757f3fSDimitry Andric assert(IIList.front()->arg_size() == II->arg_size()); 114*5f757f3fSDimitry Andric for (int I = 1, E = II->arg_size(); AllEqual && I != E; ++I) { 115*5f757f3fSDimitry Andric Value *ArgList = IIList.front()->getArgOperand(I); 116*5f757f3fSDimitry Andric Value *Arg = II->getArgOperand(I); 117*5f757f3fSDimitry Andric if (I == ImageDimIntr->VAddrEnd - 1) { 118*5f757f3fSDimitry Andric // Check FragId group. 119*5f757f3fSDimitry Andric auto FragIdList = cast<ConstantInt>(IIList.front()->getArgOperand(I)); 120*5f757f3fSDimitry Andric auto FragId = cast<ConstantInt>(II->getArgOperand(I)); 121*5f757f3fSDimitry Andric AllEqual = FragIdList->getValue().udiv(4) == FragId->getValue().udiv(4); 122*5f757f3fSDimitry Andric } else { 123*5f757f3fSDimitry Andric // Check all arguments except FragId. 124*5f757f3fSDimitry Andric AllEqual = ArgList == Arg; 125*5f757f3fSDimitry Andric } 126*5f757f3fSDimitry Andric } 127*5f757f3fSDimitry Andric if (!AllEqual) 128*5f757f3fSDimitry Andric continue; 129*5f757f3fSDimitry Andric 130*5f757f3fSDimitry Andric // Add to the list. 131*5f757f3fSDimitry Andric IIList.emplace_back(II); 132*5f757f3fSDimitry Andric return; 133*5f757f3fSDimitry Andric } 134*5f757f3fSDimitry Andric 135*5f757f3fSDimitry Andric // Similar instruction not found, so add a new list. 136*5f757f3fSDimitry Andric MergeableInsts.emplace_back(1, II); 137*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "New: " << *II << "\n"); 138*5f757f3fSDimitry Andric } 139*5f757f3fSDimitry Andric 140*5f757f3fSDimitry Andric // Collect list of all instructions we know how to merge in a subset of the 141*5f757f3fSDimitry Andric // block. It returns an iterator to the instruction after the last one analyzed. 142*5f757f3fSDimitry Andric BasicBlock::iterator collectMergeableInsts( 143*5f757f3fSDimitry Andric BasicBlock::iterator I, BasicBlock::iterator E, 144*5f757f3fSDimitry Andric SmallVector<SmallVector<IntrinsicInst *, 4>> &MergeableInsts) { 145*5f757f3fSDimitry Andric for (; I != E; ++I) { 146*5f757f3fSDimitry Andric // Don't combine if there is a store in the middle or if there is a memory 147*5f757f3fSDimitry Andric // barrier. 148*5f757f3fSDimitry Andric if (I->mayHaveSideEffects()) { 149*5f757f3fSDimitry Andric ++I; 150*5f757f3fSDimitry Andric break; 151*5f757f3fSDimitry Andric } 152*5f757f3fSDimitry Andric 153*5f757f3fSDimitry Andric // Ignore non-intrinsics. 154*5f757f3fSDimitry Andric if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 155*5f757f3fSDimitry Andric Intrinsic::ID IntrinID = II->getIntrinsicID(); 156*5f757f3fSDimitry Andric 157*5f757f3fSDimitry Andric // Ignore other intrinsics. 158*5f757f3fSDimitry Andric if (IntrinID != Intrinsic::amdgcn_image_load_2dmsaa && 159*5f757f3fSDimitry Andric IntrinID != Intrinsic::amdgcn_image_load_2darraymsaa) 160*5f757f3fSDimitry Andric continue; 161*5f757f3fSDimitry Andric 162*5f757f3fSDimitry Andric // Check for constant FragId. 163*5f757f3fSDimitry Andric const auto *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinID); 164*5f757f3fSDimitry Andric const uint8_t FragIdIndex = ImageDimIntr->VAddrEnd - 1; 165*5f757f3fSDimitry Andric if (!isa<ConstantInt>(II->getArgOperand(FragIdIndex))) 166*5f757f3fSDimitry Andric continue; 167*5f757f3fSDimitry Andric 168*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Merge: " << *II << "\n"); 169*5f757f3fSDimitry Andric addInstToMergeableList(II, MergeableInsts, ImageDimIntr); 170*5f757f3fSDimitry Andric } 171*5f757f3fSDimitry Andric } 172*5f757f3fSDimitry Andric 173*5f757f3fSDimitry Andric return I; 174*5f757f3fSDimitry Andric } 175*5f757f3fSDimitry Andric 176*5f757f3fSDimitry Andric bool optimizeSection(ArrayRef<SmallVector<IntrinsicInst *, 4>> MergeableInsts) { 177*5f757f3fSDimitry Andric bool Modified = false; 178*5f757f3fSDimitry Andric 179*5f757f3fSDimitry Andric SmallVector<Instruction *, 4> InstrsToErase; 180*5f757f3fSDimitry Andric for (const auto &IIList : MergeableInsts) { 181*5f757f3fSDimitry Andric if (IIList.size() <= 1) 182*5f757f3fSDimitry Andric continue; 183*5f757f3fSDimitry Andric 184*5f757f3fSDimitry Andric // Assume the arguments are unchanged and later override them, if needed. 185*5f757f3fSDimitry Andric SmallVector<Value *, 16> Args(IIList.front()->args()); 186*5f757f3fSDimitry Andric 187*5f757f3fSDimitry Andric // Validate function argument and return types, extracting overloaded 188*5f757f3fSDimitry Andric // types along the way. 189*5f757f3fSDimitry Andric SmallVector<Type *, 6> OverloadTys; 190*5f757f3fSDimitry Andric Function *F = IIList.front()->getCalledFunction(); 191*5f757f3fSDimitry Andric if (!Intrinsic::getIntrinsicSignature(F, OverloadTys)) 192*5f757f3fSDimitry Andric continue; 193*5f757f3fSDimitry Andric 194*5f757f3fSDimitry Andric Intrinsic::ID IntrinID = IIList.front()->getIntrinsicID(); 195*5f757f3fSDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = 196*5f757f3fSDimitry Andric AMDGPU::getImageDimIntrinsicInfo(IntrinID); 197*5f757f3fSDimitry Andric 198*5f757f3fSDimitry Andric Type *EltTy = IIList.front()->getType()->getScalarType(); 199*5f757f3fSDimitry Andric Type *NewTy = FixedVectorType::get(EltTy, 4); 200*5f757f3fSDimitry Andric OverloadTys[0] = NewTy; 201*5f757f3fSDimitry Andric bool isD16 = EltTy->isHalfTy(); 202*5f757f3fSDimitry Andric 203*5f757f3fSDimitry Andric ConstantInt *DMask = cast<ConstantInt>( 204*5f757f3fSDimitry Andric IIList.front()->getArgOperand(ImageDimIntr->DMaskIndex)); 205*5f757f3fSDimitry Andric unsigned DMaskVal = DMask->getZExtValue() & 0xf; 206*5f757f3fSDimitry Andric unsigned NumElts = popcount(DMaskVal); 207*5f757f3fSDimitry Andric 208*5f757f3fSDimitry Andric // Number of instructions and the number of vaddr/vdata dword transfers 209*5f757f3fSDimitry Andric // should be reduced. 210*5f757f3fSDimitry Andric unsigned NumLoads = IIList.size(); 211*5f757f3fSDimitry Andric unsigned NumMsaas = NumElts; 212*5f757f3fSDimitry Andric unsigned NumVAddrLoads = 3 * NumLoads; 213*5f757f3fSDimitry Andric unsigned NumVDataLoads = divideCeil(NumElts, isD16 ? 2 : 1) * NumLoads; 214*5f757f3fSDimitry Andric unsigned NumVAddrMsaas = 3 * NumMsaas; 215*5f757f3fSDimitry Andric unsigned NumVDataMsaas = divideCeil(4, isD16 ? 2 : 1) * NumMsaas; 216*5f757f3fSDimitry Andric 217*5f757f3fSDimitry Andric if (NumLoads < NumMsaas || 218*5f757f3fSDimitry Andric (NumVAddrLoads + NumVDataLoads < NumVAddrMsaas + NumVDataMsaas)) 219*5f757f3fSDimitry Andric continue; 220*5f757f3fSDimitry Andric 221*5f757f3fSDimitry Andric const uint8_t FragIdIndex = ImageDimIntr->VAddrEnd - 1; 222*5f757f3fSDimitry Andric auto FragId = cast<ConstantInt>(IIList.front()->getArgOperand(FragIdIndex)); 223*5f757f3fSDimitry Andric const APInt &NewFragIdVal = FragId->getValue().udiv(4) * 4; 224*5f757f3fSDimitry Andric 225*5f757f3fSDimitry Andric // Create the new instructions. 226*5f757f3fSDimitry Andric IRBuilder<> B(IIList.front()); 227*5f757f3fSDimitry Andric 228*5f757f3fSDimitry Andric // Create the new image_msaa_load intrinsic. 229*5f757f3fSDimitry Andric SmallVector<Instruction *, 4> NewCalls; 230*5f757f3fSDimitry Andric while (DMaskVal != 0) { 231*5f757f3fSDimitry Andric unsigned NewMaskVal = 1 << countr_zero(DMaskVal); 232*5f757f3fSDimitry Andric 233*5f757f3fSDimitry Andric Intrinsic::ID NewIntrinID; 234*5f757f3fSDimitry Andric if (IntrinID == Intrinsic::amdgcn_image_load_2dmsaa) 235*5f757f3fSDimitry Andric NewIntrinID = Intrinsic::amdgcn_image_msaa_load_2dmsaa; 236*5f757f3fSDimitry Andric else 237*5f757f3fSDimitry Andric NewIntrinID = Intrinsic::amdgcn_image_msaa_load_2darraymsaa; 238*5f757f3fSDimitry Andric 239*5f757f3fSDimitry Andric Function *NewIntrin = Intrinsic::getDeclaration( 240*5f757f3fSDimitry Andric IIList.front()->getModule(), NewIntrinID, OverloadTys); 241*5f757f3fSDimitry Andric Args[ImageDimIntr->DMaskIndex] = 242*5f757f3fSDimitry Andric ConstantInt::get(DMask->getType(), NewMaskVal); 243*5f757f3fSDimitry Andric Args[FragIdIndex] = ConstantInt::get(FragId->getType(), NewFragIdVal); 244*5f757f3fSDimitry Andric CallInst *NewCall = B.CreateCall(NewIntrin, Args); 245*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Optimize: " << *NewCall << "\n"); 246*5f757f3fSDimitry Andric 247*5f757f3fSDimitry Andric NewCalls.push_back(NewCall); 248*5f757f3fSDimitry Andric DMaskVal -= NewMaskVal; 249*5f757f3fSDimitry Andric } 250*5f757f3fSDimitry Andric 251*5f757f3fSDimitry Andric // Create the new extractelement instructions. 252*5f757f3fSDimitry Andric for (auto &II : IIList) { 253*5f757f3fSDimitry Andric Value *VecOp = nullptr; 254*5f757f3fSDimitry Andric auto Idx = cast<ConstantInt>(II->getArgOperand(FragIdIndex)); 255*5f757f3fSDimitry Andric B.SetCurrentDebugLocation(II->getDebugLoc()); 256*5f757f3fSDimitry Andric if (NumElts == 1) { 257*5f757f3fSDimitry Andric VecOp = B.CreateExtractElement(NewCalls[0], Idx->getValue().urem(4)); 258*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Add: " << *VecOp << "\n"); 259*5f757f3fSDimitry Andric } else { 260*5f757f3fSDimitry Andric VecOp = UndefValue::get(II->getType()); 261*5f757f3fSDimitry Andric for (unsigned I = 0; I < NumElts; ++I) { 262*5f757f3fSDimitry Andric VecOp = B.CreateInsertElement( 263*5f757f3fSDimitry Andric VecOp, 264*5f757f3fSDimitry Andric B.CreateExtractElement(NewCalls[I], Idx->getValue().urem(4)), I); 265*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Add: " << *VecOp << "\n"); 266*5f757f3fSDimitry Andric } 267*5f757f3fSDimitry Andric } 268*5f757f3fSDimitry Andric 269*5f757f3fSDimitry Andric // Replace the old instruction. 270*5f757f3fSDimitry Andric II->replaceAllUsesWith(VecOp); 271*5f757f3fSDimitry Andric VecOp->takeName(II); 272*5f757f3fSDimitry Andric InstrsToErase.push_back(II); 273*5f757f3fSDimitry Andric } 274*5f757f3fSDimitry Andric 275*5f757f3fSDimitry Andric Modified = true; 276*5f757f3fSDimitry Andric } 277*5f757f3fSDimitry Andric 278*5f757f3fSDimitry Andric for (auto I : InstrsToErase) 279*5f757f3fSDimitry Andric I->eraseFromParent(); 280*5f757f3fSDimitry Andric 281*5f757f3fSDimitry Andric return Modified; 282*5f757f3fSDimitry Andric } 283*5f757f3fSDimitry Andric 284*5f757f3fSDimitry Andric static bool imageIntrinsicOptimizerImpl(Function &F, const TargetMachine *TM) { 285*5f757f3fSDimitry Andric if (!TM) 286*5f757f3fSDimitry Andric return false; 287*5f757f3fSDimitry Andric 288*5f757f3fSDimitry Andric // This optimization only applies to GFX11 and beyond. 289*5f757f3fSDimitry Andric const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F); 290*5f757f3fSDimitry Andric if (!AMDGPU::isGFX11Plus(ST) || ST.hasMSAALoadDstSelBug()) 291*5f757f3fSDimitry Andric return false; 292*5f757f3fSDimitry Andric 293*5f757f3fSDimitry Andric Module *M = F.getParent(); 294*5f757f3fSDimitry Andric 295*5f757f3fSDimitry Andric // Early test to determine if the intrinsics are used. 296*5f757f3fSDimitry Andric if (std::none_of(M->begin(), M->end(), [](Function &F) { 297*5f757f3fSDimitry Andric return !F.users().empty() && 298*5f757f3fSDimitry Andric (F.getIntrinsicID() == Intrinsic::amdgcn_image_load_2dmsaa || 299*5f757f3fSDimitry Andric F.getIntrinsicID() == Intrinsic::amdgcn_image_load_2darraymsaa); 300*5f757f3fSDimitry Andric })) 301*5f757f3fSDimitry Andric return false; 302*5f757f3fSDimitry Andric 303*5f757f3fSDimitry Andric bool Modified = false; 304*5f757f3fSDimitry Andric for (auto &BB : F) { 305*5f757f3fSDimitry Andric BasicBlock::iterator SectionEnd; 306*5f757f3fSDimitry Andric for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; 307*5f757f3fSDimitry Andric I = SectionEnd) { 308*5f757f3fSDimitry Andric SmallVector<SmallVector<IntrinsicInst *, 4>> MergeableInsts; 309*5f757f3fSDimitry Andric 310*5f757f3fSDimitry Andric SectionEnd = collectMergeableInsts(I, E, MergeableInsts); 311*5f757f3fSDimitry Andric Modified |= optimizeSection(MergeableInsts); 312*5f757f3fSDimitry Andric } 313*5f757f3fSDimitry Andric } 314*5f757f3fSDimitry Andric 315*5f757f3fSDimitry Andric return Modified; 316*5f757f3fSDimitry Andric } 317*5f757f3fSDimitry Andric 318*5f757f3fSDimitry Andric bool AMDGPUImageIntrinsicOptimizer::runOnFunction(Function &F) { 319*5f757f3fSDimitry Andric if (skipFunction(F)) 320*5f757f3fSDimitry Andric return false; 321*5f757f3fSDimitry Andric 322*5f757f3fSDimitry Andric return imageIntrinsicOptimizerImpl(F, TM); 323*5f757f3fSDimitry Andric } 324*5f757f3fSDimitry Andric 325*5f757f3fSDimitry Andric FunctionPass * 326*5f757f3fSDimitry Andric llvm::createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *TM) { 327*5f757f3fSDimitry Andric return new AMDGPUImageIntrinsicOptimizer(TM); 328*5f757f3fSDimitry Andric } 329*5f757f3fSDimitry Andric 330*5f757f3fSDimitry Andric PreservedAnalyses 331*5f757f3fSDimitry Andric AMDGPUImageIntrinsicOptimizerPass::run(Function &F, 332*5f757f3fSDimitry Andric FunctionAnalysisManager &AM) { 333*5f757f3fSDimitry Andric 334*5f757f3fSDimitry Andric bool Changed = imageIntrinsicOptimizerImpl(F, &TM); 335*5f757f3fSDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 336*5f757f3fSDimitry Andric } 337