15f757f3fSDimitry Andric //===- AMDGPUImageIntrinsicOptimizer.cpp ----------------------------------===// 25f757f3fSDimitry Andric // 35f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65f757f3fSDimitry Andric // 75f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 85f757f3fSDimitry Andric // 95f757f3fSDimitry Andric // This pass tries to combine multiple image_load intrinsics with dim=2dmsaa 105f757f3fSDimitry Andric // or dim=2darraymsaa into a single image_msaa_load intrinsic if: 115f757f3fSDimitry Andric // 125f757f3fSDimitry Andric // - they refer to the same vaddr except for sample_id, 135f757f3fSDimitry Andric // - they use a constant sample_id and they fall into the same group, 145f757f3fSDimitry Andric // - they have the same dmask and the number of intrinsics and the number of 155f757f3fSDimitry Andric // vaddr/vdata dword transfers is reduced by the combine. 165f757f3fSDimitry Andric // 175f757f3fSDimitry Andric // Examples for the tradeoff (all are assuming 2DMsaa for vaddr): 185f757f3fSDimitry Andric // 195f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 205f757f3fSDimitry Andric // | popcount | a16 | d16 | #load | vaddr / | #msaa_load | vaddr / | combine? | 215f757f3fSDimitry Andric // | (dmask) | | | | vdata | | vdata | | 225f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 235f757f3fSDimitry Andric // | 1 | 0 | 0 | 4 | 12 / 4 | 1 | 3 / 4 | yes | 245f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 255f757f3fSDimitry Andric // | 1 | 0 | 0 | 2 | 6 / 2 | 1 | 3 / 4 | yes? | 265f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 275f757f3fSDimitry Andric // | 2 | 0 | 0 | 4 | 12 / 8 | 2 | 6 / 8 | yes | 285f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 295f757f3fSDimitry Andric // | 2 | 0 | 0 | 2 | 6 / 4 | 2 | 6 / 8 | no | 305f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 315f757f3fSDimitry Andric // | 1 | 0 | 1 | 2 | 6 / 2 | 1 | 3 / 2 | yes | 325f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+ 335f757f3fSDimitry Andric // 345f757f3fSDimitry Andric // Some cases are of questionable benefit, like the one marked with "yes?" 355f757f3fSDimitry Andric // above: fewer intrinsics and fewer vaddr and fewer total transfers between SP 365f757f3fSDimitry Andric // and TX, but higher vdata. We start by erring on the side of converting these 375f757f3fSDimitry Andric // to MSAA_LOAD. 385f757f3fSDimitry Andric // 395f757f3fSDimitry Andric // clang-format off 405f757f3fSDimitry Andric // 415f757f3fSDimitry Andric // This pass will combine intrinsics such as (not neccessarily consecutive): 425f757f3fSDimitry Andric // call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 435f757f3fSDimitry Andric // call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) 445f757f3fSDimitry Andric // call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) 455f757f3fSDimitry Andric // call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) 465f757f3fSDimitry Andric // ==> 475f757f3fSDimitry Andric // call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 485f757f3fSDimitry Andric // 495f757f3fSDimitry Andric // clang-format on 505f757f3fSDimitry Andric // 515f757f3fSDimitry Andric // Future improvements: 525f757f3fSDimitry Andric // 535f757f3fSDimitry Andric // - We may occasionally not want to do the combine if it increases the maximum 545f757f3fSDimitry Andric // register pressure. 555f757f3fSDimitry Andric // 565f757f3fSDimitry Andric // - Ensure clausing when multiple MSAA_LOAD are generated. 575f757f3fSDimitry Andric // 585f757f3fSDimitry Andric // Note: Even though the image_msaa_load intrinsic already exists on gfx10, this 595f757f3fSDimitry Andric // combine only applies to gfx11, due to a limitation in gfx10: the gfx10 605f757f3fSDimitry Andric // IMAGE_MSAA_LOAD only works correctly with single-channel texture formats, and 615f757f3fSDimitry Andric // we don't know the format at compile time. 625f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 635f757f3fSDimitry Andric 645f757f3fSDimitry Andric #include "AMDGPU.h" 655f757f3fSDimitry Andric #include "AMDGPUInstrInfo.h" 665f757f3fSDimitry Andric #include "AMDGPUTargetMachine.h" 675f757f3fSDimitry Andric #include "llvm/IR/Function.h" 685f757f3fSDimitry Andric #include "llvm/IR/IRBuilder.h" 695f757f3fSDimitry Andric #include "llvm/IR/IntrinsicInst.h" 705f757f3fSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 715f757f3fSDimitry Andric #include "llvm/Pass.h" 725f757f3fSDimitry Andric #include "llvm/Support/raw_ostream.h" 735f757f3fSDimitry Andric 745f757f3fSDimitry Andric using namespace llvm; 755f757f3fSDimitry Andric 765f757f3fSDimitry Andric #define DEBUG_TYPE "amdgpu-image-intrinsic-opt" 775f757f3fSDimitry Andric 785f757f3fSDimitry Andric namespace { 795f757f3fSDimitry Andric class AMDGPUImageIntrinsicOptimizer : public FunctionPass { 805f757f3fSDimitry Andric const TargetMachine *TM; 815f757f3fSDimitry Andric 825f757f3fSDimitry Andric public: 835f757f3fSDimitry Andric static char ID; 845f757f3fSDimitry Andric 855f757f3fSDimitry Andric AMDGPUImageIntrinsicOptimizer(const TargetMachine *TM = nullptr) 865f757f3fSDimitry Andric : FunctionPass(ID), TM(TM) {} 875f757f3fSDimitry Andric 885f757f3fSDimitry Andric bool runOnFunction(Function &F) override; 895f757f3fSDimitry Andric 905f757f3fSDimitry Andric }; // End of class AMDGPUImageIntrinsicOptimizer 915f757f3fSDimitry Andric } // End anonymous namespace 925f757f3fSDimitry Andric 935f757f3fSDimitry Andric INITIALIZE_PASS(AMDGPUImageIntrinsicOptimizer, DEBUG_TYPE, 945f757f3fSDimitry Andric "AMDGPU Image Intrinsic Optimizer", false, false) 955f757f3fSDimitry Andric 965f757f3fSDimitry Andric char AMDGPUImageIntrinsicOptimizer::ID = 0; 975f757f3fSDimitry Andric 985f757f3fSDimitry Andric void addInstToMergeableList( 995f757f3fSDimitry Andric IntrinsicInst *II, 1005f757f3fSDimitry Andric SmallVector<SmallVector<IntrinsicInst *, 4>> &MergeableInsts, 1015f757f3fSDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) { 1025f757f3fSDimitry Andric for (SmallVector<IntrinsicInst *, 4> &IIList : MergeableInsts) { 1035f757f3fSDimitry Andric // Check Dim. 1045f757f3fSDimitry Andric if (IIList.front()->getIntrinsicID() != II->getIntrinsicID()) 1055f757f3fSDimitry Andric continue; 1065f757f3fSDimitry Andric 1075f757f3fSDimitry Andric // Check D16. 1085f757f3fSDimitry Andric if (IIList.front()->getType() != II->getType()) 1095f757f3fSDimitry Andric continue; 1105f757f3fSDimitry Andric 1115f757f3fSDimitry Andric // Check all arguments (DMask, VAddr, RSrc etc). 1125f757f3fSDimitry Andric bool AllEqual = true; 1135f757f3fSDimitry Andric assert(IIList.front()->arg_size() == II->arg_size()); 1145f757f3fSDimitry Andric for (int I = 1, E = II->arg_size(); AllEqual && I != E; ++I) { 1155f757f3fSDimitry Andric Value *ArgList = IIList.front()->getArgOperand(I); 1165f757f3fSDimitry Andric Value *Arg = II->getArgOperand(I); 1175f757f3fSDimitry Andric if (I == ImageDimIntr->VAddrEnd - 1) { 1185f757f3fSDimitry Andric // Check FragId group. 1195f757f3fSDimitry Andric auto FragIdList = cast<ConstantInt>(IIList.front()->getArgOperand(I)); 1205f757f3fSDimitry Andric auto FragId = cast<ConstantInt>(II->getArgOperand(I)); 1215f757f3fSDimitry Andric AllEqual = FragIdList->getValue().udiv(4) == FragId->getValue().udiv(4); 1225f757f3fSDimitry Andric } else { 1235f757f3fSDimitry Andric // Check all arguments except FragId. 1245f757f3fSDimitry Andric AllEqual = ArgList == Arg; 1255f757f3fSDimitry Andric } 1265f757f3fSDimitry Andric } 1275f757f3fSDimitry Andric if (!AllEqual) 1285f757f3fSDimitry Andric continue; 1295f757f3fSDimitry Andric 1305f757f3fSDimitry Andric // Add to the list. 1315f757f3fSDimitry Andric IIList.emplace_back(II); 1325f757f3fSDimitry Andric return; 1335f757f3fSDimitry Andric } 1345f757f3fSDimitry Andric 1355f757f3fSDimitry Andric // Similar instruction not found, so add a new list. 1365f757f3fSDimitry Andric MergeableInsts.emplace_back(1, II); 1375f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "New: " << *II << "\n"); 1385f757f3fSDimitry Andric } 1395f757f3fSDimitry Andric 1405f757f3fSDimitry Andric // Collect list of all instructions we know how to merge in a subset of the 1415f757f3fSDimitry Andric // block. It returns an iterator to the instruction after the last one analyzed. 1425f757f3fSDimitry Andric BasicBlock::iterator collectMergeableInsts( 1435f757f3fSDimitry Andric BasicBlock::iterator I, BasicBlock::iterator E, 1445f757f3fSDimitry Andric SmallVector<SmallVector<IntrinsicInst *, 4>> &MergeableInsts) { 1455f757f3fSDimitry Andric for (; I != E; ++I) { 1465f757f3fSDimitry Andric // Don't combine if there is a store in the middle or if there is a memory 1475f757f3fSDimitry Andric // barrier. 1485f757f3fSDimitry Andric if (I->mayHaveSideEffects()) { 1495f757f3fSDimitry Andric ++I; 1505f757f3fSDimitry Andric break; 1515f757f3fSDimitry Andric } 1525f757f3fSDimitry Andric 1535f757f3fSDimitry Andric // Ignore non-intrinsics. 1545f757f3fSDimitry Andric if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 1555f757f3fSDimitry Andric Intrinsic::ID IntrinID = II->getIntrinsicID(); 1565f757f3fSDimitry Andric 1575f757f3fSDimitry Andric // Ignore other intrinsics. 1585f757f3fSDimitry Andric if (IntrinID != Intrinsic::amdgcn_image_load_2dmsaa && 1595f757f3fSDimitry Andric IntrinID != Intrinsic::amdgcn_image_load_2darraymsaa) 1605f757f3fSDimitry Andric continue; 1615f757f3fSDimitry Andric 1625f757f3fSDimitry Andric // Check for constant FragId. 1635f757f3fSDimitry Andric const auto *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinID); 1645f757f3fSDimitry Andric const uint8_t FragIdIndex = ImageDimIntr->VAddrEnd - 1; 1655f757f3fSDimitry Andric if (!isa<ConstantInt>(II->getArgOperand(FragIdIndex))) 1665f757f3fSDimitry Andric continue; 1675f757f3fSDimitry Andric 1685f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Merge: " << *II << "\n"); 1695f757f3fSDimitry Andric addInstToMergeableList(II, MergeableInsts, ImageDimIntr); 1705f757f3fSDimitry Andric } 1715f757f3fSDimitry Andric } 1725f757f3fSDimitry Andric 1735f757f3fSDimitry Andric return I; 1745f757f3fSDimitry Andric } 1755f757f3fSDimitry Andric 1765f757f3fSDimitry Andric bool optimizeSection(ArrayRef<SmallVector<IntrinsicInst *, 4>> MergeableInsts) { 1775f757f3fSDimitry Andric bool Modified = false; 1785f757f3fSDimitry Andric 1795f757f3fSDimitry Andric SmallVector<Instruction *, 4> InstrsToErase; 1805f757f3fSDimitry Andric for (const auto &IIList : MergeableInsts) { 1815f757f3fSDimitry Andric if (IIList.size() <= 1) 1825f757f3fSDimitry Andric continue; 1835f757f3fSDimitry Andric 1845f757f3fSDimitry Andric // Assume the arguments are unchanged and later override them, if needed. 1855f757f3fSDimitry Andric SmallVector<Value *, 16> Args(IIList.front()->args()); 1865f757f3fSDimitry Andric 1875f757f3fSDimitry Andric // Validate function argument and return types, extracting overloaded 1885f757f3fSDimitry Andric // types along the way. 1895f757f3fSDimitry Andric SmallVector<Type *, 6> OverloadTys; 1905f757f3fSDimitry Andric Function *F = IIList.front()->getCalledFunction(); 1915f757f3fSDimitry Andric if (!Intrinsic::getIntrinsicSignature(F, OverloadTys)) 1925f757f3fSDimitry Andric continue; 1935f757f3fSDimitry Andric 1945f757f3fSDimitry Andric Intrinsic::ID IntrinID = IIList.front()->getIntrinsicID(); 1955f757f3fSDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = 1965f757f3fSDimitry Andric AMDGPU::getImageDimIntrinsicInfo(IntrinID); 1975f757f3fSDimitry Andric 1985f757f3fSDimitry Andric Type *EltTy = IIList.front()->getType()->getScalarType(); 1995f757f3fSDimitry Andric Type *NewTy = FixedVectorType::get(EltTy, 4); 2005f757f3fSDimitry Andric OverloadTys[0] = NewTy; 2015f757f3fSDimitry Andric bool isD16 = EltTy->isHalfTy(); 2025f757f3fSDimitry Andric 2035f757f3fSDimitry Andric ConstantInt *DMask = cast<ConstantInt>( 2045f757f3fSDimitry Andric IIList.front()->getArgOperand(ImageDimIntr->DMaskIndex)); 2055f757f3fSDimitry Andric unsigned DMaskVal = DMask->getZExtValue() & 0xf; 2065f757f3fSDimitry Andric unsigned NumElts = popcount(DMaskVal); 2075f757f3fSDimitry Andric 2085f757f3fSDimitry Andric // Number of instructions and the number of vaddr/vdata dword transfers 2095f757f3fSDimitry Andric // should be reduced. 2105f757f3fSDimitry Andric unsigned NumLoads = IIList.size(); 2115f757f3fSDimitry Andric unsigned NumMsaas = NumElts; 2125f757f3fSDimitry Andric unsigned NumVAddrLoads = 3 * NumLoads; 2135f757f3fSDimitry Andric unsigned NumVDataLoads = divideCeil(NumElts, isD16 ? 2 : 1) * NumLoads; 2145f757f3fSDimitry Andric unsigned NumVAddrMsaas = 3 * NumMsaas; 2155f757f3fSDimitry Andric unsigned NumVDataMsaas = divideCeil(4, isD16 ? 2 : 1) * NumMsaas; 2165f757f3fSDimitry Andric 2175f757f3fSDimitry Andric if (NumLoads < NumMsaas || 2185f757f3fSDimitry Andric (NumVAddrLoads + NumVDataLoads < NumVAddrMsaas + NumVDataMsaas)) 2195f757f3fSDimitry Andric continue; 2205f757f3fSDimitry Andric 2215f757f3fSDimitry Andric const uint8_t FragIdIndex = ImageDimIntr->VAddrEnd - 1; 2225f757f3fSDimitry Andric auto FragId = cast<ConstantInt>(IIList.front()->getArgOperand(FragIdIndex)); 2235f757f3fSDimitry Andric const APInt &NewFragIdVal = FragId->getValue().udiv(4) * 4; 2245f757f3fSDimitry Andric 2255f757f3fSDimitry Andric // Create the new instructions. 2265f757f3fSDimitry Andric IRBuilder<> B(IIList.front()); 2275f757f3fSDimitry Andric 2285f757f3fSDimitry Andric // Create the new image_msaa_load intrinsic. 2295f757f3fSDimitry Andric SmallVector<Instruction *, 4> NewCalls; 2305f757f3fSDimitry Andric while (DMaskVal != 0) { 2315f757f3fSDimitry Andric unsigned NewMaskVal = 1 << countr_zero(DMaskVal); 2325f757f3fSDimitry Andric 2335f757f3fSDimitry Andric Intrinsic::ID NewIntrinID; 2345f757f3fSDimitry Andric if (IntrinID == Intrinsic::amdgcn_image_load_2dmsaa) 2355f757f3fSDimitry Andric NewIntrinID = Intrinsic::amdgcn_image_msaa_load_2dmsaa; 2365f757f3fSDimitry Andric else 2375f757f3fSDimitry Andric NewIntrinID = Intrinsic::amdgcn_image_msaa_load_2darraymsaa; 2385f757f3fSDimitry Andric 2395f757f3fSDimitry Andric Function *NewIntrin = Intrinsic::getDeclaration( 2405f757f3fSDimitry Andric IIList.front()->getModule(), NewIntrinID, OverloadTys); 2415f757f3fSDimitry Andric Args[ImageDimIntr->DMaskIndex] = 2425f757f3fSDimitry Andric ConstantInt::get(DMask->getType(), NewMaskVal); 2435f757f3fSDimitry Andric Args[FragIdIndex] = ConstantInt::get(FragId->getType(), NewFragIdVal); 2445f757f3fSDimitry Andric CallInst *NewCall = B.CreateCall(NewIntrin, Args); 2455f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Optimize: " << *NewCall << "\n"); 2465f757f3fSDimitry Andric 2475f757f3fSDimitry Andric NewCalls.push_back(NewCall); 2485f757f3fSDimitry Andric DMaskVal -= NewMaskVal; 2495f757f3fSDimitry Andric } 2505f757f3fSDimitry Andric 2515f757f3fSDimitry Andric // Create the new extractelement instructions. 2525f757f3fSDimitry Andric for (auto &II : IIList) { 2535f757f3fSDimitry Andric Value *VecOp = nullptr; 2545f757f3fSDimitry Andric auto Idx = cast<ConstantInt>(II->getArgOperand(FragIdIndex)); 2555f757f3fSDimitry Andric B.SetCurrentDebugLocation(II->getDebugLoc()); 2565f757f3fSDimitry Andric if (NumElts == 1) { 2575f757f3fSDimitry Andric VecOp = B.CreateExtractElement(NewCalls[0], Idx->getValue().urem(4)); 2585f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Add: " << *VecOp << "\n"); 2595f757f3fSDimitry Andric } else { 2605f757f3fSDimitry Andric VecOp = UndefValue::get(II->getType()); 2615f757f3fSDimitry Andric for (unsigned I = 0; I < NumElts; ++I) { 2625f757f3fSDimitry Andric VecOp = B.CreateInsertElement( 2635f757f3fSDimitry Andric VecOp, 2645f757f3fSDimitry Andric B.CreateExtractElement(NewCalls[I], Idx->getValue().urem(4)), I); 2655f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Add: " << *VecOp << "\n"); 2665f757f3fSDimitry Andric } 2675f757f3fSDimitry Andric } 2685f757f3fSDimitry Andric 2695f757f3fSDimitry Andric // Replace the old instruction. 2705f757f3fSDimitry Andric II->replaceAllUsesWith(VecOp); 2715f757f3fSDimitry Andric VecOp->takeName(II); 2725f757f3fSDimitry Andric InstrsToErase.push_back(II); 2735f757f3fSDimitry Andric } 2745f757f3fSDimitry Andric 2755f757f3fSDimitry Andric Modified = true; 2765f757f3fSDimitry Andric } 2775f757f3fSDimitry Andric 2785f757f3fSDimitry Andric for (auto I : InstrsToErase) 2795f757f3fSDimitry Andric I->eraseFromParent(); 2805f757f3fSDimitry Andric 2815f757f3fSDimitry Andric return Modified; 2825f757f3fSDimitry Andric } 2835f757f3fSDimitry Andric 2845f757f3fSDimitry Andric static bool imageIntrinsicOptimizerImpl(Function &F, const TargetMachine *TM) { 2855f757f3fSDimitry Andric if (!TM) 2865f757f3fSDimitry Andric return false; 2875f757f3fSDimitry Andric 2885f757f3fSDimitry Andric // This optimization only applies to GFX11 and beyond. 2895f757f3fSDimitry Andric const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F); 2905f757f3fSDimitry Andric if (!AMDGPU::isGFX11Plus(ST) || ST.hasMSAALoadDstSelBug()) 2915f757f3fSDimitry Andric return false; 2925f757f3fSDimitry Andric 2935f757f3fSDimitry Andric Module *M = F.getParent(); 2945f757f3fSDimitry Andric 2955f757f3fSDimitry Andric // Early test to determine if the intrinsics are used. 296*0fca6ea1SDimitry Andric if (llvm::none_of(*M, [](Function &F) { 2975f757f3fSDimitry Andric return !F.users().empty() && 2985f757f3fSDimitry Andric (F.getIntrinsicID() == Intrinsic::amdgcn_image_load_2dmsaa || 2995f757f3fSDimitry Andric F.getIntrinsicID() == Intrinsic::amdgcn_image_load_2darraymsaa); 3005f757f3fSDimitry Andric })) 3015f757f3fSDimitry Andric return false; 3025f757f3fSDimitry Andric 3035f757f3fSDimitry Andric bool Modified = false; 3045f757f3fSDimitry Andric for (auto &BB : F) { 3055f757f3fSDimitry Andric BasicBlock::iterator SectionEnd; 3065f757f3fSDimitry Andric for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; 3075f757f3fSDimitry Andric I = SectionEnd) { 3085f757f3fSDimitry Andric SmallVector<SmallVector<IntrinsicInst *, 4>> MergeableInsts; 3095f757f3fSDimitry Andric 3105f757f3fSDimitry Andric SectionEnd = collectMergeableInsts(I, E, MergeableInsts); 3115f757f3fSDimitry Andric Modified |= optimizeSection(MergeableInsts); 3125f757f3fSDimitry Andric } 3135f757f3fSDimitry Andric } 3145f757f3fSDimitry Andric 3155f757f3fSDimitry Andric return Modified; 3165f757f3fSDimitry Andric } 3175f757f3fSDimitry Andric 3185f757f3fSDimitry Andric bool AMDGPUImageIntrinsicOptimizer::runOnFunction(Function &F) { 3195f757f3fSDimitry Andric if (skipFunction(F)) 3205f757f3fSDimitry Andric return false; 3215f757f3fSDimitry Andric 3225f757f3fSDimitry Andric return imageIntrinsicOptimizerImpl(F, TM); 3235f757f3fSDimitry Andric } 3245f757f3fSDimitry Andric 3255f757f3fSDimitry Andric FunctionPass * 3265f757f3fSDimitry Andric llvm::createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *TM) { 3275f757f3fSDimitry Andric return new AMDGPUImageIntrinsicOptimizer(TM); 3285f757f3fSDimitry Andric } 3295f757f3fSDimitry Andric 3305f757f3fSDimitry Andric PreservedAnalyses 3315f757f3fSDimitry Andric AMDGPUImageIntrinsicOptimizerPass::run(Function &F, 3325f757f3fSDimitry Andric FunctionAnalysisManager &AM) { 3335f757f3fSDimitry Andric 3345f757f3fSDimitry Andric bool Changed = imageIntrinsicOptimizerImpl(F, &TM); 3355f757f3fSDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 3365f757f3fSDimitry Andric } 337