xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
15f757f3fSDimitry Andric //===- AMDGPUImageIntrinsicOptimizer.cpp ----------------------------------===//
25f757f3fSDimitry Andric //
35f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65f757f3fSDimitry Andric //
75f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
85f757f3fSDimitry Andric //
95f757f3fSDimitry Andric // This pass tries to combine multiple image_load intrinsics with dim=2dmsaa
105f757f3fSDimitry Andric // or dim=2darraymsaa into a single image_msaa_load intrinsic if:
115f757f3fSDimitry Andric //
125f757f3fSDimitry Andric // - they refer to the same vaddr except for sample_id,
135f757f3fSDimitry Andric // - they use a constant sample_id and they fall into the same group,
145f757f3fSDimitry Andric // - they have the same dmask and the number of intrinsics and the number of
155f757f3fSDimitry Andric //   vaddr/vdata dword transfers is reduced by the combine.
165f757f3fSDimitry Andric //
175f757f3fSDimitry Andric // Examples for the tradeoff (all are assuming 2DMsaa for vaddr):
185f757f3fSDimitry Andric //
195f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+
205f757f3fSDimitry Andric // | popcount | a16 | d16 | #load | vaddr / | #msaa_load | vaddr / | combine? |
215f757f3fSDimitry Andric // |  (dmask) |     |     |       | vdata   |            | vdata   |          |
225f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+
235f757f3fSDimitry Andric // |        1 |   0 |   0 |     4 |  12 / 4 |          1 |   3 / 4 | yes      |
245f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+
255f757f3fSDimitry Andric // |        1 |   0 |   0 |     2 |   6 / 2 |          1 |   3 / 4 | yes?     |
265f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+
275f757f3fSDimitry Andric // |        2 |   0 |   0 |     4 |  12 / 8 |          2 |   6 / 8 | yes      |
285f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+
295f757f3fSDimitry Andric // |        2 |   0 |   0 |     2 |   6 / 4 |          2 |   6 / 8 | no       |
305f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+
315f757f3fSDimitry Andric // |        1 |   0 |   1 |     2 |   6 / 2 |          1 |   3 / 2 | yes      |
325f757f3fSDimitry Andric // +----------+-----+-----+-------+---------+------------+---------+----------+
335f757f3fSDimitry Andric //
345f757f3fSDimitry Andric // Some cases are of questionable benefit, like the one marked with "yes?"
355f757f3fSDimitry Andric // above: fewer intrinsics and fewer vaddr and fewer total transfers between SP
365f757f3fSDimitry Andric // and TX, but higher vdata. We start by erring on the side of converting these
375f757f3fSDimitry Andric // to MSAA_LOAD.
385f757f3fSDimitry Andric //
395f757f3fSDimitry Andric // clang-format off
405f757f3fSDimitry Andric //
415f757f3fSDimitry Andric // This pass will combine intrinsics such as (not neccessarily consecutive):
425f757f3fSDimitry Andric //  call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
435f757f3fSDimitry Andric //  call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
445f757f3fSDimitry Andric //  call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
455f757f3fSDimitry Andric //  call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
465f757f3fSDimitry Andric // ==>
475f757f3fSDimitry Andric //  call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
485f757f3fSDimitry Andric //
495f757f3fSDimitry Andric // clang-format on
505f757f3fSDimitry Andric //
515f757f3fSDimitry Andric // Future improvements:
525f757f3fSDimitry Andric //
535f757f3fSDimitry Andric // - We may occasionally not want to do the combine if it increases the maximum
545f757f3fSDimitry Andric //   register pressure.
555f757f3fSDimitry Andric //
565f757f3fSDimitry Andric // - Ensure clausing when multiple MSAA_LOAD are generated.
575f757f3fSDimitry Andric //
585f757f3fSDimitry Andric // Note: Even though the image_msaa_load intrinsic already exists on gfx10, this
595f757f3fSDimitry Andric // combine only applies to gfx11, due to a limitation in gfx10: the gfx10
605f757f3fSDimitry Andric // IMAGE_MSAA_LOAD only works correctly with single-channel texture formats, and
615f757f3fSDimitry Andric // we don't know the format at compile time.
625f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
635f757f3fSDimitry Andric 
645f757f3fSDimitry Andric #include "AMDGPU.h"
655f757f3fSDimitry Andric #include "AMDGPUInstrInfo.h"
665f757f3fSDimitry Andric #include "AMDGPUTargetMachine.h"
675f757f3fSDimitry Andric #include "llvm/IR/Function.h"
685f757f3fSDimitry Andric #include "llvm/IR/IRBuilder.h"
695f757f3fSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
705f757f3fSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
715f757f3fSDimitry Andric #include "llvm/Pass.h"
725f757f3fSDimitry Andric #include "llvm/Support/raw_ostream.h"
735f757f3fSDimitry Andric 
745f757f3fSDimitry Andric using namespace llvm;
755f757f3fSDimitry Andric 
765f757f3fSDimitry Andric #define DEBUG_TYPE "amdgpu-image-intrinsic-opt"
775f757f3fSDimitry Andric 
785f757f3fSDimitry Andric namespace {
795f757f3fSDimitry Andric class AMDGPUImageIntrinsicOptimizer : public FunctionPass {
805f757f3fSDimitry Andric   const TargetMachine *TM;
815f757f3fSDimitry Andric 
825f757f3fSDimitry Andric public:
835f757f3fSDimitry Andric   static char ID;
845f757f3fSDimitry Andric 
855f757f3fSDimitry Andric   AMDGPUImageIntrinsicOptimizer(const TargetMachine *TM = nullptr)
865f757f3fSDimitry Andric       : FunctionPass(ID), TM(TM) {}
875f757f3fSDimitry Andric 
885f757f3fSDimitry Andric   bool runOnFunction(Function &F) override;
895f757f3fSDimitry Andric 
905f757f3fSDimitry Andric }; // End of class AMDGPUImageIntrinsicOptimizer
915f757f3fSDimitry Andric } // End anonymous namespace
925f757f3fSDimitry Andric 
935f757f3fSDimitry Andric INITIALIZE_PASS(AMDGPUImageIntrinsicOptimizer, DEBUG_TYPE,
945f757f3fSDimitry Andric                 "AMDGPU Image Intrinsic Optimizer", false, false)
955f757f3fSDimitry Andric 
965f757f3fSDimitry Andric char AMDGPUImageIntrinsicOptimizer::ID = 0;
975f757f3fSDimitry Andric 
985f757f3fSDimitry Andric void addInstToMergeableList(
995f757f3fSDimitry Andric     IntrinsicInst *II,
1005f757f3fSDimitry Andric     SmallVector<SmallVector<IntrinsicInst *, 4>> &MergeableInsts,
1015f757f3fSDimitry Andric     const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) {
1025f757f3fSDimitry Andric   for (SmallVector<IntrinsicInst *, 4> &IIList : MergeableInsts) {
1035f757f3fSDimitry Andric     // Check Dim.
1045f757f3fSDimitry Andric     if (IIList.front()->getIntrinsicID() != II->getIntrinsicID())
1055f757f3fSDimitry Andric       continue;
1065f757f3fSDimitry Andric 
1075f757f3fSDimitry Andric     // Check D16.
1085f757f3fSDimitry Andric     if (IIList.front()->getType() != II->getType())
1095f757f3fSDimitry Andric       continue;
1105f757f3fSDimitry Andric 
1115f757f3fSDimitry Andric     // Check all arguments (DMask, VAddr, RSrc etc).
1125f757f3fSDimitry Andric     bool AllEqual = true;
1135f757f3fSDimitry Andric     assert(IIList.front()->arg_size() == II->arg_size());
1145f757f3fSDimitry Andric     for (int I = 1, E = II->arg_size(); AllEqual && I != E; ++I) {
1155f757f3fSDimitry Andric       Value *ArgList = IIList.front()->getArgOperand(I);
1165f757f3fSDimitry Andric       Value *Arg = II->getArgOperand(I);
1175f757f3fSDimitry Andric       if (I == ImageDimIntr->VAddrEnd - 1) {
1185f757f3fSDimitry Andric         // Check FragId group.
1195f757f3fSDimitry Andric         auto FragIdList = cast<ConstantInt>(IIList.front()->getArgOperand(I));
1205f757f3fSDimitry Andric         auto FragId = cast<ConstantInt>(II->getArgOperand(I));
1215f757f3fSDimitry Andric         AllEqual = FragIdList->getValue().udiv(4) == FragId->getValue().udiv(4);
1225f757f3fSDimitry Andric       } else {
1235f757f3fSDimitry Andric         // Check all arguments except FragId.
1245f757f3fSDimitry Andric         AllEqual = ArgList == Arg;
1255f757f3fSDimitry Andric       }
1265f757f3fSDimitry Andric     }
1275f757f3fSDimitry Andric     if (!AllEqual)
1285f757f3fSDimitry Andric       continue;
1295f757f3fSDimitry Andric 
1305f757f3fSDimitry Andric     // Add to the list.
1315f757f3fSDimitry Andric     IIList.emplace_back(II);
1325f757f3fSDimitry Andric     return;
1335f757f3fSDimitry Andric   }
1345f757f3fSDimitry Andric 
1355f757f3fSDimitry Andric   // Similar instruction not found, so add a new list.
1365f757f3fSDimitry Andric   MergeableInsts.emplace_back(1, II);
1375f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "New: " << *II << "\n");
1385f757f3fSDimitry Andric }
1395f757f3fSDimitry Andric 
1405f757f3fSDimitry Andric // Collect list of all instructions we know how to merge in a subset of the
1415f757f3fSDimitry Andric // block. It returns an iterator to the instruction after the last one analyzed.
1425f757f3fSDimitry Andric BasicBlock::iterator collectMergeableInsts(
1435f757f3fSDimitry Andric     BasicBlock::iterator I, BasicBlock::iterator E,
1445f757f3fSDimitry Andric     SmallVector<SmallVector<IntrinsicInst *, 4>> &MergeableInsts) {
1455f757f3fSDimitry Andric   for (; I != E; ++I) {
1465f757f3fSDimitry Andric     // Don't combine if there is a store in the middle or if there is a memory
1475f757f3fSDimitry Andric     // barrier.
1485f757f3fSDimitry Andric     if (I->mayHaveSideEffects()) {
1495f757f3fSDimitry Andric       ++I;
1505f757f3fSDimitry Andric       break;
1515f757f3fSDimitry Andric     }
1525f757f3fSDimitry Andric 
1535f757f3fSDimitry Andric     // Ignore non-intrinsics.
1545f757f3fSDimitry Andric     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1555f757f3fSDimitry Andric       Intrinsic::ID IntrinID = II->getIntrinsicID();
1565f757f3fSDimitry Andric 
1575f757f3fSDimitry Andric       // Ignore other intrinsics.
1585f757f3fSDimitry Andric       if (IntrinID != Intrinsic::amdgcn_image_load_2dmsaa &&
1595f757f3fSDimitry Andric           IntrinID != Intrinsic::amdgcn_image_load_2darraymsaa)
1605f757f3fSDimitry Andric         continue;
1615f757f3fSDimitry Andric 
1625f757f3fSDimitry Andric       // Check for constant FragId.
1635f757f3fSDimitry Andric       const auto *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinID);
1645f757f3fSDimitry Andric       const uint8_t FragIdIndex = ImageDimIntr->VAddrEnd - 1;
1655f757f3fSDimitry Andric       if (!isa<ConstantInt>(II->getArgOperand(FragIdIndex)))
1665f757f3fSDimitry Andric         continue;
1675f757f3fSDimitry Andric 
1685f757f3fSDimitry Andric       LLVM_DEBUG(dbgs() << "Merge: " << *II << "\n");
1695f757f3fSDimitry Andric       addInstToMergeableList(II, MergeableInsts, ImageDimIntr);
1705f757f3fSDimitry Andric     }
1715f757f3fSDimitry Andric   }
1725f757f3fSDimitry Andric 
1735f757f3fSDimitry Andric   return I;
1745f757f3fSDimitry Andric }
1755f757f3fSDimitry Andric 
1765f757f3fSDimitry Andric bool optimizeSection(ArrayRef<SmallVector<IntrinsicInst *, 4>> MergeableInsts) {
1775f757f3fSDimitry Andric   bool Modified = false;
1785f757f3fSDimitry Andric 
1795f757f3fSDimitry Andric   SmallVector<Instruction *, 4> InstrsToErase;
1805f757f3fSDimitry Andric   for (const auto &IIList : MergeableInsts) {
1815f757f3fSDimitry Andric     if (IIList.size() <= 1)
1825f757f3fSDimitry Andric       continue;
1835f757f3fSDimitry Andric 
1845f757f3fSDimitry Andric     // Assume the arguments are unchanged and later override them, if needed.
1855f757f3fSDimitry Andric     SmallVector<Value *, 16> Args(IIList.front()->args());
1865f757f3fSDimitry Andric 
1875f757f3fSDimitry Andric     // Validate function argument and return types, extracting overloaded
1885f757f3fSDimitry Andric     // types along the way.
1895f757f3fSDimitry Andric     SmallVector<Type *, 6> OverloadTys;
1905f757f3fSDimitry Andric     Function *F = IIList.front()->getCalledFunction();
1915f757f3fSDimitry Andric     if (!Intrinsic::getIntrinsicSignature(F, OverloadTys))
1925f757f3fSDimitry Andric       continue;
1935f757f3fSDimitry Andric 
1945f757f3fSDimitry Andric     Intrinsic::ID IntrinID = IIList.front()->getIntrinsicID();
1955f757f3fSDimitry Andric     const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1965f757f3fSDimitry Andric         AMDGPU::getImageDimIntrinsicInfo(IntrinID);
1975f757f3fSDimitry Andric 
1985f757f3fSDimitry Andric     Type *EltTy = IIList.front()->getType()->getScalarType();
1995f757f3fSDimitry Andric     Type *NewTy = FixedVectorType::get(EltTy, 4);
2005f757f3fSDimitry Andric     OverloadTys[0] = NewTy;
2015f757f3fSDimitry Andric     bool isD16 = EltTy->isHalfTy();
2025f757f3fSDimitry Andric 
2035f757f3fSDimitry Andric     ConstantInt *DMask = cast<ConstantInt>(
2045f757f3fSDimitry Andric         IIList.front()->getArgOperand(ImageDimIntr->DMaskIndex));
2055f757f3fSDimitry Andric     unsigned DMaskVal = DMask->getZExtValue() & 0xf;
2065f757f3fSDimitry Andric     unsigned NumElts = popcount(DMaskVal);
2075f757f3fSDimitry Andric 
2085f757f3fSDimitry Andric     // Number of instructions and the number of vaddr/vdata dword transfers
2095f757f3fSDimitry Andric     // should be reduced.
2105f757f3fSDimitry Andric     unsigned NumLoads = IIList.size();
2115f757f3fSDimitry Andric     unsigned NumMsaas = NumElts;
2125f757f3fSDimitry Andric     unsigned NumVAddrLoads = 3 * NumLoads;
2135f757f3fSDimitry Andric     unsigned NumVDataLoads = divideCeil(NumElts, isD16 ? 2 : 1) * NumLoads;
2145f757f3fSDimitry Andric     unsigned NumVAddrMsaas = 3 * NumMsaas;
2155f757f3fSDimitry Andric     unsigned NumVDataMsaas = divideCeil(4, isD16 ? 2 : 1) * NumMsaas;
2165f757f3fSDimitry Andric 
2175f757f3fSDimitry Andric     if (NumLoads < NumMsaas ||
2185f757f3fSDimitry Andric         (NumVAddrLoads + NumVDataLoads < NumVAddrMsaas + NumVDataMsaas))
2195f757f3fSDimitry Andric       continue;
2205f757f3fSDimitry Andric 
2215f757f3fSDimitry Andric     const uint8_t FragIdIndex = ImageDimIntr->VAddrEnd - 1;
2225f757f3fSDimitry Andric     auto FragId = cast<ConstantInt>(IIList.front()->getArgOperand(FragIdIndex));
2235f757f3fSDimitry Andric     const APInt &NewFragIdVal = FragId->getValue().udiv(4) * 4;
2245f757f3fSDimitry Andric 
2255f757f3fSDimitry Andric     // Create the new instructions.
2265f757f3fSDimitry Andric     IRBuilder<> B(IIList.front());
2275f757f3fSDimitry Andric 
2285f757f3fSDimitry Andric     // Create the new image_msaa_load intrinsic.
2295f757f3fSDimitry Andric     SmallVector<Instruction *, 4> NewCalls;
2305f757f3fSDimitry Andric     while (DMaskVal != 0) {
2315f757f3fSDimitry Andric       unsigned NewMaskVal = 1 << countr_zero(DMaskVal);
2325f757f3fSDimitry Andric 
2335f757f3fSDimitry Andric       Intrinsic::ID NewIntrinID;
2345f757f3fSDimitry Andric       if (IntrinID == Intrinsic::amdgcn_image_load_2dmsaa)
2355f757f3fSDimitry Andric         NewIntrinID = Intrinsic::amdgcn_image_msaa_load_2dmsaa;
2365f757f3fSDimitry Andric       else
2375f757f3fSDimitry Andric         NewIntrinID = Intrinsic::amdgcn_image_msaa_load_2darraymsaa;
2385f757f3fSDimitry Andric 
2395f757f3fSDimitry Andric       Function *NewIntrin = Intrinsic::getDeclaration(
2405f757f3fSDimitry Andric           IIList.front()->getModule(), NewIntrinID, OverloadTys);
2415f757f3fSDimitry Andric       Args[ImageDimIntr->DMaskIndex] =
2425f757f3fSDimitry Andric           ConstantInt::get(DMask->getType(), NewMaskVal);
2435f757f3fSDimitry Andric       Args[FragIdIndex] = ConstantInt::get(FragId->getType(), NewFragIdVal);
2445f757f3fSDimitry Andric       CallInst *NewCall = B.CreateCall(NewIntrin, Args);
2455f757f3fSDimitry Andric       LLVM_DEBUG(dbgs() << "Optimize: " << *NewCall << "\n");
2465f757f3fSDimitry Andric 
2475f757f3fSDimitry Andric       NewCalls.push_back(NewCall);
2485f757f3fSDimitry Andric       DMaskVal -= NewMaskVal;
2495f757f3fSDimitry Andric     }
2505f757f3fSDimitry Andric 
2515f757f3fSDimitry Andric     // Create the new extractelement instructions.
2525f757f3fSDimitry Andric     for (auto &II : IIList) {
2535f757f3fSDimitry Andric       Value *VecOp = nullptr;
2545f757f3fSDimitry Andric       auto Idx = cast<ConstantInt>(II->getArgOperand(FragIdIndex));
2555f757f3fSDimitry Andric       B.SetCurrentDebugLocation(II->getDebugLoc());
2565f757f3fSDimitry Andric       if (NumElts == 1) {
2575f757f3fSDimitry Andric         VecOp = B.CreateExtractElement(NewCalls[0], Idx->getValue().urem(4));
2585f757f3fSDimitry Andric         LLVM_DEBUG(dbgs() << "Add: " << *VecOp << "\n");
2595f757f3fSDimitry Andric       } else {
2605f757f3fSDimitry Andric         VecOp = UndefValue::get(II->getType());
2615f757f3fSDimitry Andric         for (unsigned I = 0; I < NumElts; ++I) {
2625f757f3fSDimitry Andric           VecOp = B.CreateInsertElement(
2635f757f3fSDimitry Andric               VecOp,
2645f757f3fSDimitry Andric               B.CreateExtractElement(NewCalls[I], Idx->getValue().urem(4)), I);
2655f757f3fSDimitry Andric           LLVM_DEBUG(dbgs() << "Add: " << *VecOp << "\n");
2665f757f3fSDimitry Andric         }
2675f757f3fSDimitry Andric       }
2685f757f3fSDimitry Andric 
2695f757f3fSDimitry Andric       // Replace the old instruction.
2705f757f3fSDimitry Andric       II->replaceAllUsesWith(VecOp);
2715f757f3fSDimitry Andric       VecOp->takeName(II);
2725f757f3fSDimitry Andric       InstrsToErase.push_back(II);
2735f757f3fSDimitry Andric     }
2745f757f3fSDimitry Andric 
2755f757f3fSDimitry Andric     Modified = true;
2765f757f3fSDimitry Andric   }
2775f757f3fSDimitry Andric 
2785f757f3fSDimitry Andric   for (auto I : InstrsToErase)
2795f757f3fSDimitry Andric     I->eraseFromParent();
2805f757f3fSDimitry Andric 
2815f757f3fSDimitry Andric   return Modified;
2825f757f3fSDimitry Andric }
2835f757f3fSDimitry Andric 
2845f757f3fSDimitry Andric static bool imageIntrinsicOptimizerImpl(Function &F, const TargetMachine *TM) {
2855f757f3fSDimitry Andric   if (!TM)
2865f757f3fSDimitry Andric     return false;
2875f757f3fSDimitry Andric 
2885f757f3fSDimitry Andric   // This optimization only applies to GFX11 and beyond.
2895f757f3fSDimitry Andric   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
2905f757f3fSDimitry Andric   if (!AMDGPU::isGFX11Plus(ST) || ST.hasMSAALoadDstSelBug())
2915f757f3fSDimitry Andric     return false;
2925f757f3fSDimitry Andric 
2935f757f3fSDimitry Andric   Module *M = F.getParent();
2945f757f3fSDimitry Andric 
2955f757f3fSDimitry Andric   // Early test to determine if the intrinsics are used.
296*0fca6ea1SDimitry Andric   if (llvm::none_of(*M, [](Function &F) {
2975f757f3fSDimitry Andric         return !F.users().empty() &&
2985f757f3fSDimitry Andric                (F.getIntrinsicID() == Intrinsic::amdgcn_image_load_2dmsaa ||
2995f757f3fSDimitry Andric                 F.getIntrinsicID() == Intrinsic::amdgcn_image_load_2darraymsaa);
3005f757f3fSDimitry Andric       }))
3015f757f3fSDimitry Andric     return false;
3025f757f3fSDimitry Andric 
3035f757f3fSDimitry Andric   bool Modified = false;
3045f757f3fSDimitry Andric   for (auto &BB : F) {
3055f757f3fSDimitry Andric     BasicBlock::iterator SectionEnd;
3065f757f3fSDimitry Andric     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;
3075f757f3fSDimitry Andric          I = SectionEnd) {
3085f757f3fSDimitry Andric       SmallVector<SmallVector<IntrinsicInst *, 4>> MergeableInsts;
3095f757f3fSDimitry Andric 
3105f757f3fSDimitry Andric       SectionEnd = collectMergeableInsts(I, E, MergeableInsts);
3115f757f3fSDimitry Andric       Modified |= optimizeSection(MergeableInsts);
3125f757f3fSDimitry Andric     }
3135f757f3fSDimitry Andric   }
3145f757f3fSDimitry Andric 
3155f757f3fSDimitry Andric   return Modified;
3165f757f3fSDimitry Andric }
3175f757f3fSDimitry Andric 
3185f757f3fSDimitry Andric bool AMDGPUImageIntrinsicOptimizer::runOnFunction(Function &F) {
3195f757f3fSDimitry Andric   if (skipFunction(F))
3205f757f3fSDimitry Andric     return false;
3215f757f3fSDimitry Andric 
3225f757f3fSDimitry Andric   return imageIntrinsicOptimizerImpl(F, TM);
3235f757f3fSDimitry Andric }
3245f757f3fSDimitry Andric 
3255f757f3fSDimitry Andric FunctionPass *
3265f757f3fSDimitry Andric llvm::createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *TM) {
3275f757f3fSDimitry Andric   return new AMDGPUImageIntrinsicOptimizer(TM);
3285f757f3fSDimitry Andric }
3295f757f3fSDimitry Andric 
3305f757f3fSDimitry Andric PreservedAnalyses
3315f757f3fSDimitry Andric AMDGPUImageIntrinsicOptimizerPass::run(Function &F,
3325f757f3fSDimitry Andric                                        FunctionAnalysisManager &AM) {
3335f757f3fSDimitry Andric 
3345f757f3fSDimitry Andric   bool Changed = imageIntrinsicOptimizerImpl(F, &TM);
3355f757f3fSDimitry Andric   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
3365f757f3fSDimitry Andric }
337