xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1e8d8bef9SDimitry Andric //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric // \file
10e8d8bef9SDimitry Andric // This file implements a TargetTransformInfo analysis pass specific to the
11e8d8bef9SDimitry Andric // AMDGPU target machine. It uses the target's detailed information to provide
12e8d8bef9SDimitry Andric // more precise answers to certain TTI queries, while letting the target
13e8d8bef9SDimitry Andric // independent and default TTI implementations handle the rest.
14e8d8bef9SDimitry Andric //
15e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
16e8d8bef9SDimitry Andric 
17e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h"
18e8d8bef9SDimitry Andric #include "AMDGPUTargetTransformInfo.h"
19e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
20bdd1243dSDimitry Andric #include "llvm/ADT/FloatingPointMode.h"
21e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
22e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h"
23bdd1243dSDimitry Andric #include <optional>
24e8d8bef9SDimitry Andric 
25e8d8bef9SDimitry Andric using namespace llvm;
2606c3fb27SDimitry Andric using namespace llvm::PatternMatch;
27e8d8bef9SDimitry Andric 
28e8d8bef9SDimitry Andric #define DEBUG_TYPE "AMDGPUtti"
29e8d8bef9SDimitry Andric 
30e8d8bef9SDimitry Andric namespace {
31e8d8bef9SDimitry Andric 
32e8d8bef9SDimitry Andric struct AMDGPUImageDMaskIntrinsic {
33e8d8bef9SDimitry Andric   unsigned Intr;
34e8d8bef9SDimitry Andric };
35e8d8bef9SDimitry Andric 
36e8d8bef9SDimitry Andric #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
37e8d8bef9SDimitry Andric #include "InstCombineTables.inc"
38e8d8bef9SDimitry Andric 
39e8d8bef9SDimitry Andric } // end anonymous namespace
40e8d8bef9SDimitry Andric 
41e8d8bef9SDimitry Andric // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
42e8d8bef9SDimitry Andric //
43e8d8bef9SDimitry Andric // A single NaN input is folded to minnum, so we rely on that folding for
44e8d8bef9SDimitry Andric // handling NaNs.
45e8d8bef9SDimitry Andric static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
46e8d8bef9SDimitry Andric                            const APFloat &Src2) {
47e8d8bef9SDimitry Andric   APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
48e8d8bef9SDimitry Andric 
49e8d8bef9SDimitry Andric   APFloat::cmpResult Cmp0 = Max3.compare(Src0);
50e8d8bef9SDimitry Andric   assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
51e8d8bef9SDimitry Andric   if (Cmp0 == APFloat::cmpEqual)
52e8d8bef9SDimitry Andric     return maxnum(Src1, Src2);
53e8d8bef9SDimitry Andric 
54e8d8bef9SDimitry Andric   APFloat::cmpResult Cmp1 = Max3.compare(Src1);
55e8d8bef9SDimitry Andric   assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
56e8d8bef9SDimitry Andric   if (Cmp1 == APFloat::cmpEqual)
57e8d8bef9SDimitry Andric     return maxnum(Src0, Src2);
58e8d8bef9SDimitry Andric 
59e8d8bef9SDimitry Andric   return maxnum(Src0, Src1);
60e8d8bef9SDimitry Andric }
61e8d8bef9SDimitry Andric 
62e8d8bef9SDimitry Andric // Check if a value can be converted to a 16-bit value without losing
63e8d8bef9SDimitry Andric // precision.
6404eeddc0SDimitry Andric // The value is expected to be either a float (IsFloat = true) or an unsigned
6504eeddc0SDimitry Andric // integer (IsFloat = false).
6604eeddc0SDimitry Andric static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
67e8d8bef9SDimitry Andric   Type *VTy = V.getType();
68e8d8bef9SDimitry Andric   if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
69e8d8bef9SDimitry Andric     // The value is already 16-bit, so we don't want to convert to 16-bit again!
70e8d8bef9SDimitry Andric     return false;
71e8d8bef9SDimitry Andric   }
7204eeddc0SDimitry Andric   if (IsFloat) {
73e8d8bef9SDimitry Andric     if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
7404eeddc0SDimitry Andric       // We need to check that if we cast the index down to a half, we do not
7504eeddc0SDimitry Andric       // lose precision.
76e8d8bef9SDimitry Andric       APFloat FloatValue(ConstFloat->getValueAPF());
77e8d8bef9SDimitry Andric       bool LosesInfo = true;
7804eeddc0SDimitry Andric       FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
7904eeddc0SDimitry Andric                          &LosesInfo);
80e8d8bef9SDimitry Andric       return !LosesInfo;
81e8d8bef9SDimitry Andric     }
8204eeddc0SDimitry Andric   } else {
8304eeddc0SDimitry Andric     if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
8404eeddc0SDimitry Andric       // We need to check that if we cast the index down to an i16, we do not
8504eeddc0SDimitry Andric       // lose precision.
8604eeddc0SDimitry Andric       APInt IntValue(ConstInt->getValue());
8704eeddc0SDimitry Andric       return IntValue.getActiveBits() <= 16;
8804eeddc0SDimitry Andric     }
8904eeddc0SDimitry Andric   }
9004eeddc0SDimitry Andric 
91e8d8bef9SDimitry Andric   Value *CastSrc;
9204eeddc0SDimitry Andric   bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
9304eeddc0SDimitry Andric                        : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
9404eeddc0SDimitry Andric   if (IsExt) {
95e8d8bef9SDimitry Andric     Type *CastSrcTy = CastSrc->getType();
96e8d8bef9SDimitry Andric     if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
97e8d8bef9SDimitry Andric       return true;
98e8d8bef9SDimitry Andric   }
99e8d8bef9SDimitry Andric 
100e8d8bef9SDimitry Andric   return false;
101e8d8bef9SDimitry Andric }
102e8d8bef9SDimitry Andric 
103e8d8bef9SDimitry Andric // Convert a value to 16-bit.
104e8d8bef9SDimitry Andric static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
105e8d8bef9SDimitry Andric   Type *VTy = V.getType();
106e8d8bef9SDimitry Andric   if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
107e8d8bef9SDimitry Andric     return cast<Instruction>(&V)->getOperand(0);
108e8d8bef9SDimitry Andric   if (VTy->isIntegerTy())
109e8d8bef9SDimitry Andric     return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
110e8d8bef9SDimitry Andric   if (VTy->isFloatingPointTy())
111e8d8bef9SDimitry Andric     return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
112e8d8bef9SDimitry Andric 
113e8d8bef9SDimitry Andric   llvm_unreachable("Should never be called!");
114e8d8bef9SDimitry Andric }
115e8d8bef9SDimitry Andric 
11681ad6265SDimitry Andric /// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
11781ad6265SDimitry Andric /// modified arguments (based on OldIntr) and replaces InstToReplace with
11881ad6265SDimitry Andric /// this newly created intrinsic call.
119bdd1243dSDimitry Andric static std::optional<Instruction *> modifyIntrinsicCall(
12081ad6265SDimitry Andric     IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
12181ad6265SDimitry Andric     InstCombiner &IC,
12204eeddc0SDimitry Andric     std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
12304eeddc0SDimitry Andric         Func) {
12404eeddc0SDimitry Andric   SmallVector<Type *, 4> ArgTys;
12581ad6265SDimitry Andric   if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
126bdd1243dSDimitry Andric     return std::nullopt;
12704eeddc0SDimitry Andric 
12881ad6265SDimitry Andric   SmallVector<Value *, 8> Args(OldIntr.args());
12904eeddc0SDimitry Andric 
13004eeddc0SDimitry Andric   // Modify arguments and types
13104eeddc0SDimitry Andric   Func(Args, ArgTys);
13204eeddc0SDimitry Andric 
13381ad6265SDimitry Andric   Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);
13404eeddc0SDimitry Andric 
13504eeddc0SDimitry Andric   CallInst *NewCall = IC.Builder.CreateCall(I, Args);
13681ad6265SDimitry Andric   NewCall->takeName(&OldIntr);
13781ad6265SDimitry Andric   NewCall->copyMetadata(OldIntr);
13804eeddc0SDimitry Andric   if (isa<FPMathOperator>(NewCall))
13981ad6265SDimitry Andric     NewCall->copyFastMathFlags(&OldIntr);
14004eeddc0SDimitry Andric 
14104eeddc0SDimitry Andric   // Erase and replace uses
14281ad6265SDimitry Andric   if (!InstToReplace.getType()->isVoidTy())
14381ad6265SDimitry Andric     IC.replaceInstUsesWith(InstToReplace, NewCall);
14481ad6265SDimitry Andric 
14581ad6265SDimitry Andric   bool RemoveOldIntr = &OldIntr != &InstToReplace;
14681ad6265SDimitry Andric 
14781ad6265SDimitry Andric   auto RetValue = IC.eraseInstFromFunction(InstToReplace);
14881ad6265SDimitry Andric   if (RemoveOldIntr)
14981ad6265SDimitry Andric     IC.eraseInstFromFunction(OldIntr);
15081ad6265SDimitry Andric 
15181ad6265SDimitry Andric   return RetValue;
15204eeddc0SDimitry Andric }
15304eeddc0SDimitry Andric 
154bdd1243dSDimitry Andric static std::optional<Instruction *>
155e8d8bef9SDimitry Andric simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
156e8d8bef9SDimitry Andric                              const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
157e8d8bef9SDimitry Andric                              IntrinsicInst &II, InstCombiner &IC) {
15804eeddc0SDimitry Andric   // Optimize _L to _LZ when _L is zero
15904eeddc0SDimitry Andric   if (const auto *LZMappingInfo =
16004eeddc0SDimitry Andric           AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
16104eeddc0SDimitry Andric     if (auto *ConstantLod =
16204eeddc0SDimitry Andric             dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
16304eeddc0SDimitry Andric       if (ConstantLod->isZero() || ConstantLod->isNegative()) {
16404eeddc0SDimitry Andric         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
16504eeddc0SDimitry Andric             AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
16604eeddc0SDimitry Andric                                                      ImageDimIntr->Dim);
16704eeddc0SDimitry Andric         return modifyIntrinsicCall(
16881ad6265SDimitry Andric             II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
16904eeddc0SDimitry Andric               Args.erase(Args.begin() + ImageDimIntr->LodIndex);
17004eeddc0SDimitry Andric             });
17104eeddc0SDimitry Andric       }
17204eeddc0SDimitry Andric     }
17304eeddc0SDimitry Andric   }
17404eeddc0SDimitry Andric 
17504eeddc0SDimitry Andric   // Optimize _mip away, when 'lod' is zero
17604eeddc0SDimitry Andric   if (const auto *MIPMappingInfo =
17704eeddc0SDimitry Andric           AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
17804eeddc0SDimitry Andric     if (auto *ConstantMip =
17904eeddc0SDimitry Andric             dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
18004eeddc0SDimitry Andric       if (ConstantMip->isZero()) {
18104eeddc0SDimitry Andric         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
18204eeddc0SDimitry Andric             AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
18304eeddc0SDimitry Andric                                                      ImageDimIntr->Dim);
18404eeddc0SDimitry Andric         return modifyIntrinsicCall(
18581ad6265SDimitry Andric             II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
18604eeddc0SDimitry Andric               Args.erase(Args.begin() + ImageDimIntr->MipIndex);
18704eeddc0SDimitry Andric             });
18804eeddc0SDimitry Andric       }
18904eeddc0SDimitry Andric     }
19004eeddc0SDimitry Andric   }
19104eeddc0SDimitry Andric 
19204eeddc0SDimitry Andric   // Optimize _bias away when 'bias' is zero
19304eeddc0SDimitry Andric   if (const auto *BiasMappingInfo =
19404eeddc0SDimitry Andric           AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) {
19504eeddc0SDimitry Andric     if (auto *ConstantBias =
19604eeddc0SDimitry Andric             dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
19704eeddc0SDimitry Andric       if (ConstantBias->isZero()) {
19804eeddc0SDimitry Andric         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
19904eeddc0SDimitry Andric             AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
20004eeddc0SDimitry Andric                                                      ImageDimIntr->Dim);
20104eeddc0SDimitry Andric         return modifyIntrinsicCall(
20281ad6265SDimitry Andric             II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
20304eeddc0SDimitry Andric               Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
20404eeddc0SDimitry Andric               ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
20504eeddc0SDimitry Andric             });
20604eeddc0SDimitry Andric       }
20704eeddc0SDimitry Andric     }
20804eeddc0SDimitry Andric   }
20904eeddc0SDimitry Andric 
21004eeddc0SDimitry Andric   // Optimize _offset away when 'offset' is zero
21104eeddc0SDimitry Andric   if (const auto *OffsetMappingInfo =
21204eeddc0SDimitry Andric           AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) {
21304eeddc0SDimitry Andric     if (auto *ConstantOffset =
21404eeddc0SDimitry Andric             dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) {
21504eeddc0SDimitry Andric       if (ConstantOffset->isZero()) {
21604eeddc0SDimitry Andric         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
21704eeddc0SDimitry Andric             AMDGPU::getImageDimIntrinsicByBaseOpcode(
21804eeddc0SDimitry Andric                 OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
21904eeddc0SDimitry Andric         return modifyIntrinsicCall(
22081ad6265SDimitry Andric             II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
22104eeddc0SDimitry Andric               Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
22204eeddc0SDimitry Andric             });
22304eeddc0SDimitry Andric       }
22404eeddc0SDimitry Andric     }
22504eeddc0SDimitry Andric   }
22604eeddc0SDimitry Andric 
22781ad6265SDimitry Andric   // Try to use D16
22881ad6265SDimitry Andric   if (ST->hasD16Images()) {
22981ad6265SDimitry Andric 
23081ad6265SDimitry Andric     const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
23181ad6265SDimitry Andric         AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
23281ad6265SDimitry Andric 
23381ad6265SDimitry Andric     if (BaseOpcode->HasD16) {
23481ad6265SDimitry Andric 
23581ad6265SDimitry Andric       // If the only use of image intrinsic is a fptrunc (with conversion to
23681ad6265SDimitry Andric       // half) then both fptrunc and image intrinsic will be replaced with image
23781ad6265SDimitry Andric       // intrinsic with D16 flag.
23881ad6265SDimitry Andric       if (II.hasOneUse()) {
23981ad6265SDimitry Andric         Instruction *User = II.user_back();
24081ad6265SDimitry Andric 
24181ad6265SDimitry Andric         if (User->getOpcode() == Instruction::FPTrunc &&
24281ad6265SDimitry Andric             User->getType()->getScalarType()->isHalfTy()) {
24381ad6265SDimitry Andric 
24481ad6265SDimitry Andric           return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC,
24581ad6265SDimitry Andric                                      [&](auto &Args, auto &ArgTys) {
24681ad6265SDimitry Andric                                        // Change return type of image intrinsic.
24781ad6265SDimitry Andric                                        // Set it to return type of fptrunc.
24881ad6265SDimitry Andric                                        ArgTys[0] = User->getType();
24981ad6265SDimitry Andric                                      });
25081ad6265SDimitry Andric         }
25181ad6265SDimitry Andric       }
25281ad6265SDimitry Andric     }
25381ad6265SDimitry Andric   }
25481ad6265SDimitry Andric 
25504eeddc0SDimitry Andric   // Try to use A16 or G16
256e8d8bef9SDimitry Andric   if (!ST->hasA16() && !ST->hasG16())
257bdd1243dSDimitry Andric     return std::nullopt;
258e8d8bef9SDimitry Andric 
25904eeddc0SDimitry Andric   // Address is interpreted as float if the instruction has a sampler or as
26004eeddc0SDimitry Andric   // unsigned int if there is no sampler.
26104eeddc0SDimitry Andric   bool HasSampler =
26204eeddc0SDimitry Andric       AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler;
263e8d8bef9SDimitry Andric   bool FloatCoord = false;
264e8d8bef9SDimitry Andric   // true means derivatives can be converted to 16 bit, coordinates not
265e8d8bef9SDimitry Andric   bool OnlyDerivatives = false;
266e8d8bef9SDimitry Andric 
267e8d8bef9SDimitry Andric   for (unsigned OperandIndex = ImageDimIntr->GradientStart;
268e8d8bef9SDimitry Andric        OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
269e8d8bef9SDimitry Andric     Value *Coord = II.getOperand(OperandIndex);
270e8d8bef9SDimitry Andric     // If the values are not derived from 16-bit values, we cannot optimize.
27104eeddc0SDimitry Andric     if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
272e8d8bef9SDimitry Andric       if (OperandIndex < ImageDimIntr->CoordStart ||
273e8d8bef9SDimitry Andric           ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
274bdd1243dSDimitry Andric         return std::nullopt;
275e8d8bef9SDimitry Andric       }
276e8d8bef9SDimitry Andric       // All gradients can be converted, so convert only them
277e8d8bef9SDimitry Andric       OnlyDerivatives = true;
278e8d8bef9SDimitry Andric       break;
279e8d8bef9SDimitry Andric     }
280e8d8bef9SDimitry Andric 
281e8d8bef9SDimitry Andric     assert(OperandIndex == ImageDimIntr->GradientStart ||
282e8d8bef9SDimitry Andric            FloatCoord == Coord->getType()->isFloatingPointTy());
283e8d8bef9SDimitry Andric     FloatCoord = Coord->getType()->isFloatingPointTy();
284e8d8bef9SDimitry Andric   }
285e8d8bef9SDimitry Andric 
28604eeddc0SDimitry Andric   if (!OnlyDerivatives && !ST->hasA16())
287e8d8bef9SDimitry Andric     OnlyDerivatives = true; // Only supports G16
28804eeddc0SDimitry Andric 
28904eeddc0SDimitry Andric   // Check if there is a bias parameter and if it can be converted to f16
29004eeddc0SDimitry Andric   if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
29104eeddc0SDimitry Andric     Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
29204eeddc0SDimitry Andric     assert(HasSampler &&
29304eeddc0SDimitry Andric            "Only image instructions with a sampler can have a bias");
29404eeddc0SDimitry Andric     if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
29504eeddc0SDimitry Andric       OnlyDerivatives = true;
296e8d8bef9SDimitry Andric   }
297e8d8bef9SDimitry Andric 
29804eeddc0SDimitry Andric   if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
29904eeddc0SDimitry Andric                                                ImageDimIntr->CoordStart))
300bdd1243dSDimitry Andric     return std::nullopt;
30104eeddc0SDimitry Andric 
302e8d8bef9SDimitry Andric   Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
303e8d8bef9SDimitry Andric                                : Type::getInt16Ty(II.getContext());
304e8d8bef9SDimitry Andric 
30504eeddc0SDimitry Andric   return modifyIntrinsicCall(
30681ad6265SDimitry Andric       II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
307e8d8bef9SDimitry Andric         ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
30804eeddc0SDimitry Andric         if (!OnlyDerivatives) {
309e8d8bef9SDimitry Andric           ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
310e8d8bef9SDimitry Andric 
31104eeddc0SDimitry Andric           // Change the bias type
31204eeddc0SDimitry Andric           if (ImageDimIntr->NumBiasArgs != 0)
31304eeddc0SDimitry Andric             ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
31404eeddc0SDimitry Andric         }
315e8d8bef9SDimitry Andric 
316e8d8bef9SDimitry Andric         unsigned EndIndex =
317e8d8bef9SDimitry Andric             OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
318e8d8bef9SDimitry Andric         for (unsigned OperandIndex = ImageDimIntr->GradientStart;
319e8d8bef9SDimitry Andric              OperandIndex < EndIndex; OperandIndex++) {
320e8d8bef9SDimitry Andric           Args[OperandIndex] =
321e8d8bef9SDimitry Andric               convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
322e8d8bef9SDimitry Andric         }
323e8d8bef9SDimitry Andric 
32404eeddc0SDimitry Andric         // Convert the bias
32504eeddc0SDimitry Andric         if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
32604eeddc0SDimitry Andric           Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
32704eeddc0SDimitry Andric           Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
32804eeddc0SDimitry Andric         }
32904eeddc0SDimitry Andric       });
330e8d8bef9SDimitry Andric }
331e8d8bef9SDimitry Andric 
33206c3fb27SDimitry Andric bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Instruction &I,
33306c3fb27SDimitry Andric                                            const Value *Op0, const Value *Op1,
334e8d8bef9SDimitry Andric                                            InstCombiner &IC) const {
335e8d8bef9SDimitry Andric   // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
336e8d8bef9SDimitry Andric   // infinity, gives +0.0. If we can prove we don't have one of the special
337e8d8bef9SDimitry Andric   // cases then we can use a normal multiply instead.
338e8d8bef9SDimitry Andric   // TODO: Create and use isKnownFiniteNonZero instead of just matching
339e8d8bef9SDimitry Andric   // constants here.
340e8d8bef9SDimitry Andric   if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
341e8d8bef9SDimitry Andric       match(Op1, PatternMatch::m_FiniteNonZero())) {
342e8d8bef9SDimitry Andric     // One operand is not zero or infinity or NaN.
343e8d8bef9SDimitry Andric     return true;
344e8d8bef9SDimitry Andric   }
34506c3fb27SDimitry Andric 
346*0fca6ea1SDimitry Andric   SimplifyQuery SQ = IC.getSimplifyQuery().getWithInstruction(&I);
347*0fca6ea1SDimitry Andric   if (isKnownNeverInfOrNaN(Op0, /*Depth=*/0, SQ) &&
348*0fca6ea1SDimitry Andric       isKnownNeverInfOrNaN(Op1, /*Depth=*/0, SQ)) {
349e8d8bef9SDimitry Andric     // Neither operand is infinity or NaN.
350e8d8bef9SDimitry Andric     return true;
351e8d8bef9SDimitry Andric   }
352e8d8bef9SDimitry Andric   return false;
353e8d8bef9SDimitry Andric }
354e8d8bef9SDimitry Andric 
35506c3fb27SDimitry Andric /// Match an fpext from half to float, or a constant we can convert.
35606c3fb27SDimitry Andric static bool matchFPExtFromF16(Value *Arg, Value *&FPExtSrc) {
35706c3fb27SDimitry Andric   if (match(Arg, m_OneUse(m_FPExt(m_Value(FPExtSrc)))))
35806c3fb27SDimitry Andric     return FPExtSrc->getType()->isHalfTy();
35906c3fb27SDimitry Andric 
36006c3fb27SDimitry Andric   ConstantFP *CFP;
36106c3fb27SDimitry Andric   if (match(Arg, m_ConstantFP(CFP))) {
36206c3fb27SDimitry Andric     bool LosesInfo;
36306c3fb27SDimitry Andric     APFloat Val(CFP->getValueAPF());
36406c3fb27SDimitry Andric     Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &LosesInfo);
36506c3fb27SDimitry Andric     if (LosesInfo)
36606c3fb27SDimitry Andric       return false;
36706c3fb27SDimitry Andric 
36806c3fb27SDimitry Andric     FPExtSrc = ConstantFP::get(Type::getHalfTy(Arg->getContext()), Val);
36906c3fb27SDimitry Andric     return true;
37006c3fb27SDimitry Andric   }
37106c3fb27SDimitry Andric 
37206c3fb27SDimitry Andric   return false;
37306c3fb27SDimitry Andric }
37406c3fb27SDimitry Andric 
37506c3fb27SDimitry Andric // Trim all zero components from the end of the vector \p UseV and return
37606c3fb27SDimitry Andric // an appropriate bitset with known elements.
37706c3fb27SDimitry Andric static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV,
37806c3fb27SDimitry Andric                                        Instruction *I) {
37906c3fb27SDimitry Andric   auto *VTy = cast<FixedVectorType>(UseV->getType());
38006c3fb27SDimitry Andric   unsigned VWidth = VTy->getNumElements();
38106c3fb27SDimitry Andric   APInt DemandedElts = APInt::getAllOnes(VWidth);
38206c3fb27SDimitry Andric 
38306c3fb27SDimitry Andric   for (int i = VWidth - 1; i > 0; --i) {
38406c3fb27SDimitry Andric     auto *Elt = findScalarElement(UseV, i);
38506c3fb27SDimitry Andric     if (!Elt)
38606c3fb27SDimitry Andric       break;
38706c3fb27SDimitry Andric 
38806c3fb27SDimitry Andric     if (auto *ConstElt = dyn_cast<Constant>(Elt)) {
38906c3fb27SDimitry Andric       if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt))
39006c3fb27SDimitry Andric         break;
39106c3fb27SDimitry Andric     } else {
39206c3fb27SDimitry Andric       break;
39306c3fb27SDimitry Andric     }
39406c3fb27SDimitry Andric 
39506c3fb27SDimitry Andric     DemandedElts.clearBit(i);
39606c3fb27SDimitry Andric   }
39706c3fb27SDimitry Andric 
39806c3fb27SDimitry Andric   return DemandedElts;
39906c3fb27SDimitry Andric }
40006c3fb27SDimitry Andric 
4017a6dacacSDimitry Andric // Trim elements of the end of the vector \p V, if they are
4027a6dacacSDimitry Andric // equal to the first element of the vector.
4037a6dacacSDimitry Andric static APInt defaultComponentBroadcast(Value *V) {
4047a6dacacSDimitry Andric   auto *VTy = cast<FixedVectorType>(V->getType());
4057a6dacacSDimitry Andric   unsigned VWidth = VTy->getNumElements();
4067a6dacacSDimitry Andric   APInt DemandedElts = APInt::getAllOnes(VWidth);
4077a6dacacSDimitry Andric   Value *FirstComponent = findScalarElement(V, 0);
4087a6dacacSDimitry Andric 
4097a6dacacSDimitry Andric   SmallVector<int> ShuffleMask;
4107a6dacacSDimitry Andric   if (auto *SVI = dyn_cast<ShuffleVectorInst>(V))
4117a6dacacSDimitry Andric     SVI->getShuffleMask(ShuffleMask);
4127a6dacacSDimitry Andric 
4137a6dacacSDimitry Andric   for (int I = VWidth - 1; I > 0; --I) {
4147a6dacacSDimitry Andric     if (ShuffleMask.empty()) {
4157a6dacacSDimitry Andric       auto *Elt = findScalarElement(V, I);
4167a6dacacSDimitry Andric       if (!Elt || (Elt != FirstComponent && !isa<UndefValue>(Elt)))
4177a6dacacSDimitry Andric         break;
4187a6dacacSDimitry Andric     } else {
4197a6dacacSDimitry Andric       // Detect identical elements in the shufflevector result, even though
4207a6dacacSDimitry Andric       // findScalarElement cannot tell us what that element is.
4217a6dacacSDimitry Andric       if (ShuffleMask[I] != ShuffleMask[0] && ShuffleMask[I] != PoisonMaskElem)
4227a6dacacSDimitry Andric         break;
4237a6dacacSDimitry Andric     }
4247a6dacacSDimitry Andric     DemandedElts.clearBit(I);
4257a6dacacSDimitry Andric   }
4267a6dacacSDimitry Andric 
4277a6dacacSDimitry Andric   return DemandedElts;
4287a6dacacSDimitry Andric }
4297a6dacacSDimitry Andric 
43006c3fb27SDimitry Andric static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
43106c3fb27SDimitry Andric                                                     IntrinsicInst &II,
43206c3fb27SDimitry Andric                                                     APInt DemandedElts,
43306c3fb27SDimitry Andric                                                     int DMaskIdx = -1,
43406c3fb27SDimitry Andric                                                     bool IsLoad = true);
43506c3fb27SDimitry Andric 
4365f757f3fSDimitry Andric /// Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
4375f757f3fSDimitry Andric static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp) {
4385f757f3fSDimitry Andric   return (SqrtOp->getType()->isFloatTy() &&
4395f757f3fSDimitry Andric           (SqrtOp->hasApproxFunc() || SqrtOp->getFPAccuracy() >= 1.0f)) ||
4405f757f3fSDimitry Andric          SqrtOp->getType()->isHalfTy();
4415f757f3fSDimitry Andric }
4425f757f3fSDimitry Andric 
443bdd1243dSDimitry Andric std::optional<Instruction *>
444e8d8bef9SDimitry Andric GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
445e8d8bef9SDimitry Andric   Intrinsic::ID IID = II.getIntrinsicID();
446e8d8bef9SDimitry Andric   switch (IID) {
447e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_rcp: {
448e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
449e8d8bef9SDimitry Andric 
450e8d8bef9SDimitry Andric     // TODO: Move to ConstantFolding/InstSimplify?
451e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
452e8d8bef9SDimitry Andric       Type *Ty = II.getType();
453e8d8bef9SDimitry Andric       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
454e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, QNaN);
455e8d8bef9SDimitry Andric     }
456e8d8bef9SDimitry Andric 
457e8d8bef9SDimitry Andric     if (II.isStrictFP())
458e8d8bef9SDimitry Andric       break;
459e8d8bef9SDimitry Andric 
460e8d8bef9SDimitry Andric     if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
461e8d8bef9SDimitry Andric       const APFloat &ArgVal = C->getValueAPF();
462e8d8bef9SDimitry Andric       APFloat Val(ArgVal.getSemantics(), 1);
463e8d8bef9SDimitry Andric       Val.divide(ArgVal, APFloat::rmNearestTiesToEven);
464e8d8bef9SDimitry Andric 
465e8d8bef9SDimitry Andric       // This is more precise than the instruction may give.
466e8d8bef9SDimitry Andric       //
467e8d8bef9SDimitry Andric       // TODO: The instruction always flushes denormal results (except for f16),
468e8d8bef9SDimitry Andric       // should this also?
469e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
470e8d8bef9SDimitry Andric     }
471e8d8bef9SDimitry Andric 
4725f757f3fSDimitry Andric     FastMathFlags FMF = cast<FPMathOperator>(II).getFastMathFlags();
4735f757f3fSDimitry Andric     if (!FMF.allowContract())
4745f757f3fSDimitry Andric       break;
4755f757f3fSDimitry Andric     auto *SrcCI = dyn_cast<IntrinsicInst>(Src);
4765f757f3fSDimitry Andric     if (!SrcCI)
4775f757f3fSDimitry Andric       break;
4785f757f3fSDimitry Andric 
4795f757f3fSDimitry Andric     auto IID = SrcCI->getIntrinsicID();
4805f757f3fSDimitry Andric     // llvm.amdgcn.rcp(llvm.amdgcn.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable
4815f757f3fSDimitry Andric     //
4825f757f3fSDimitry Andric     // llvm.amdgcn.rcp(llvm.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable and
4835f757f3fSDimitry Andric     // relaxed.
4845f757f3fSDimitry Andric     if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
4855f757f3fSDimitry Andric       const FPMathOperator *SqrtOp = cast<FPMathOperator>(SrcCI);
4865f757f3fSDimitry Andric       FastMathFlags InnerFMF = SqrtOp->getFastMathFlags();
4875f757f3fSDimitry Andric       if (!InnerFMF.allowContract() || !SrcCI->hasOneUse())
4885f757f3fSDimitry Andric         break;
4895f757f3fSDimitry Andric 
4905f757f3fSDimitry Andric       if (IID == Intrinsic::sqrt && !canContractSqrtToRsq(SqrtOp))
4915f757f3fSDimitry Andric         break;
4925f757f3fSDimitry Andric 
4935f757f3fSDimitry Andric       Function *NewDecl = Intrinsic::getDeclaration(
4945f757f3fSDimitry Andric           SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
4955f757f3fSDimitry Andric 
4965f757f3fSDimitry Andric       InnerFMF |= FMF;
4975f757f3fSDimitry Andric       II.setFastMathFlags(InnerFMF);
4985f757f3fSDimitry Andric 
4995f757f3fSDimitry Andric       II.setCalledFunction(NewDecl);
5005f757f3fSDimitry Andric       return IC.replaceOperand(II, 0, SrcCI->getArgOperand(0));
5015f757f3fSDimitry Andric     }
5025f757f3fSDimitry Andric 
503e8d8bef9SDimitry Andric     break;
504e8d8bef9SDimitry Andric   }
505bdd1243dSDimitry Andric   case Intrinsic::amdgcn_sqrt:
506e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_rsq: {
507e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
508e8d8bef9SDimitry Andric 
509e8d8bef9SDimitry Andric     // TODO: Move to ConstantFolding/InstSimplify?
510e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
511e8d8bef9SDimitry Andric       Type *Ty = II.getType();
512e8d8bef9SDimitry Andric       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
513e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, QNaN);
514e8d8bef9SDimitry Andric     }
515e8d8bef9SDimitry Andric 
5165f757f3fSDimitry Andric     // f16 amdgcn.sqrt is identical to regular sqrt.
5175f757f3fSDimitry Andric     if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
5185f757f3fSDimitry Andric       Function *NewDecl = Intrinsic::getDeclaration(
5195f757f3fSDimitry Andric           II.getModule(), Intrinsic::sqrt, {II.getType()});
5205f757f3fSDimitry Andric       II.setCalledFunction(NewDecl);
5215f757f3fSDimitry Andric       return &II;
5225f757f3fSDimitry Andric     }
5235f757f3fSDimitry Andric 
524e8d8bef9SDimitry Andric     break;
525e8d8bef9SDimitry Andric   }
52606c3fb27SDimitry Andric   case Intrinsic::amdgcn_log:
52706c3fb27SDimitry Andric   case Intrinsic::amdgcn_exp2: {
52806c3fb27SDimitry Andric     const bool IsLog = IID == Intrinsic::amdgcn_log;
52906c3fb27SDimitry Andric     const bool IsExp = IID == Intrinsic::amdgcn_exp2;
53006c3fb27SDimitry Andric     Value *Src = II.getArgOperand(0);
53106c3fb27SDimitry Andric     Type *Ty = II.getType();
53206c3fb27SDimitry Andric 
53306c3fb27SDimitry Andric     if (isa<PoisonValue>(Src))
53406c3fb27SDimitry Andric       return IC.replaceInstUsesWith(II, Src);
53506c3fb27SDimitry Andric 
53606c3fb27SDimitry Andric     if (IC.getSimplifyQuery().isUndefValue(Src))
53706c3fb27SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantFP::getNaN(Ty));
53806c3fb27SDimitry Andric 
53906c3fb27SDimitry Andric     if (ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
54006c3fb27SDimitry Andric       if (C->isInfinity()) {
54106c3fb27SDimitry Andric         // exp2(+inf) -> +inf
54206c3fb27SDimitry Andric         // log2(+inf) -> +inf
54306c3fb27SDimitry Andric         if (!C->isNegative())
54406c3fb27SDimitry Andric           return IC.replaceInstUsesWith(II, C);
54506c3fb27SDimitry Andric 
54606c3fb27SDimitry Andric         // exp2(-inf) -> 0
54706c3fb27SDimitry Andric         if (IsExp && C->isNegative())
54806c3fb27SDimitry Andric           return IC.replaceInstUsesWith(II, ConstantFP::getZero(Ty));
54906c3fb27SDimitry Andric       }
55006c3fb27SDimitry Andric 
55106c3fb27SDimitry Andric       if (II.isStrictFP())
55206c3fb27SDimitry Andric         break;
55306c3fb27SDimitry Andric 
55406c3fb27SDimitry Andric       if (C->isNaN()) {
55506c3fb27SDimitry Andric         Constant *Quieted = ConstantFP::get(Ty, C->getValue().makeQuiet());
55606c3fb27SDimitry Andric         return IC.replaceInstUsesWith(II, Quieted);
55706c3fb27SDimitry Andric       }
55806c3fb27SDimitry Andric 
55906c3fb27SDimitry Andric       // f32 instruction doesn't handle denormals, f16 does.
56006c3fb27SDimitry Andric       if (C->isZero() || (C->getValue().isDenormal() && Ty->isFloatTy())) {
56106c3fb27SDimitry Andric         Constant *FoldedValue = IsLog ? ConstantFP::getInfinity(Ty, true)
56206c3fb27SDimitry Andric                                       : ConstantFP::get(Ty, 1.0);
56306c3fb27SDimitry Andric         return IC.replaceInstUsesWith(II, FoldedValue);
56406c3fb27SDimitry Andric       }
56506c3fb27SDimitry Andric 
56606c3fb27SDimitry Andric       if (IsLog && C->isNegative())
56706c3fb27SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantFP::getNaN(Ty));
56806c3fb27SDimitry Andric 
56906c3fb27SDimitry Andric       // TODO: Full constant folding matching hardware behavior.
57006c3fb27SDimitry Andric     }
57106c3fb27SDimitry Andric 
57206c3fb27SDimitry Andric     break;
57306c3fb27SDimitry Andric   }
574e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_frexp_mant:
575e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_frexp_exp: {
576e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
577e8d8bef9SDimitry Andric     if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
578e8d8bef9SDimitry Andric       int Exp;
579e8d8bef9SDimitry Andric       APFloat Significand =
580e8d8bef9SDimitry Andric           frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
581e8d8bef9SDimitry Andric 
582e8d8bef9SDimitry Andric       if (IID == Intrinsic::amdgcn_frexp_mant) {
583e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(
584e8d8bef9SDimitry Andric             II, ConstantFP::get(II.getContext(), Significand));
585e8d8bef9SDimitry Andric       }
586e8d8bef9SDimitry Andric 
587e8d8bef9SDimitry Andric       // Match instruction special case behavior.
588e8d8bef9SDimitry Andric       if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
589e8d8bef9SDimitry Andric         Exp = 0;
590e8d8bef9SDimitry Andric 
591e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
592e8d8bef9SDimitry Andric     }
593e8d8bef9SDimitry Andric 
594e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
595e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
596e8d8bef9SDimitry Andric     }
597e8d8bef9SDimitry Andric 
598e8d8bef9SDimitry Andric     break;
599e8d8bef9SDimitry Andric   }
600e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_class: {
601e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
602e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
603e8d8bef9SDimitry Andric     const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
60406c3fb27SDimitry Andric     if (CMask) {
60506c3fb27SDimitry Andric       II.setCalledOperand(Intrinsic::getDeclaration(
60606c3fb27SDimitry Andric           II.getModule(), Intrinsic::is_fpclass, Src0->getType()));
60706c3fb27SDimitry Andric 
60806c3fb27SDimitry Andric       // Clamp any excess bits, as they're illegal for the generic intrinsic.
60906c3fb27SDimitry Andric       II.setArgOperand(1, ConstantInt::get(Src1->getType(),
61006c3fb27SDimitry Andric                                            CMask->getZExtValue() & fcAllFlags));
61106c3fb27SDimitry Andric       return &II;
612e8d8bef9SDimitry Andric     }
613e8d8bef9SDimitry Andric 
61406c3fb27SDimitry Andric     // Propagate poison.
61506c3fb27SDimitry Andric     if (isa<PoisonValue>(Src0) || isa<PoisonValue>(Src1))
61606c3fb27SDimitry Andric       return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType()));
617e8d8bef9SDimitry Andric 
61806c3fb27SDimitry Andric     // llvm.amdgcn.class(_, undef) -> false
61906c3fb27SDimitry Andric     if (IC.getSimplifyQuery().isUndefValue(Src1))
620e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
62106c3fb27SDimitry Andric 
62206c3fb27SDimitry Andric     // llvm.amdgcn.class(undef, mask) -> mask != 0
62306c3fb27SDimitry Andric     if (IC.getSimplifyQuery().isUndefValue(Src0)) {
62406c3fb27SDimitry Andric       Value *CmpMask = IC.Builder.CreateICmpNE(
62506c3fb27SDimitry Andric           Src1, ConstantInt::getNullValue(Src1->getType()));
62606c3fb27SDimitry Andric       return IC.replaceInstUsesWith(II, CmpMask);
627e8d8bef9SDimitry Andric     }
628e8d8bef9SDimitry Andric     break;
629e8d8bef9SDimitry Andric   }
630e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pkrtz: {
631e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
632e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
633e8d8bef9SDimitry Andric     if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
634e8d8bef9SDimitry Andric       if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
635e8d8bef9SDimitry Andric         const fltSemantics &HalfSem =
636e8d8bef9SDimitry Andric             II.getType()->getScalarType()->getFltSemantics();
637e8d8bef9SDimitry Andric         bool LosesInfo;
638e8d8bef9SDimitry Andric         APFloat Val0 = C0->getValueAPF();
639e8d8bef9SDimitry Andric         APFloat Val1 = C1->getValueAPF();
640e8d8bef9SDimitry Andric         Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
641e8d8bef9SDimitry Andric         Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
642e8d8bef9SDimitry Andric 
643e8d8bef9SDimitry Andric         Constant *Folded =
644e8d8bef9SDimitry Andric             ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
645e8d8bef9SDimitry Andric                                  ConstantFP::get(II.getContext(), Val1)});
646e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Folded);
647e8d8bef9SDimitry Andric       }
648e8d8bef9SDimitry Andric     }
649e8d8bef9SDimitry Andric 
650e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
651e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
652e8d8bef9SDimitry Andric     }
653e8d8bef9SDimitry Andric 
654e8d8bef9SDimitry Andric     break;
655e8d8bef9SDimitry Andric   }
656e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pknorm_i16:
657e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pknorm_u16:
658e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pk_i16:
659e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pk_u16: {
660e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
661e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
662e8d8bef9SDimitry Andric 
663e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
664e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
665e8d8bef9SDimitry Andric     }
666e8d8bef9SDimitry Andric 
667e8d8bef9SDimitry Andric     break;
668e8d8bef9SDimitry Andric   }
669e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_ubfe:
670e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_sbfe: {
671e8d8bef9SDimitry Andric     // Decompose simple cases into standard shifts.
672e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
673e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
674e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Src);
675e8d8bef9SDimitry Andric     }
676e8d8bef9SDimitry Andric 
677e8d8bef9SDimitry Andric     unsigned Width;
678e8d8bef9SDimitry Andric     Type *Ty = II.getType();
679e8d8bef9SDimitry Andric     unsigned IntSize = Ty->getIntegerBitWidth();
680e8d8bef9SDimitry Andric 
681e8d8bef9SDimitry Andric     ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
682e8d8bef9SDimitry Andric     if (CWidth) {
683e8d8bef9SDimitry Andric       Width = CWidth->getZExtValue();
684e8d8bef9SDimitry Andric       if ((Width & (IntSize - 1)) == 0) {
685e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(Ty));
686e8d8bef9SDimitry Andric       }
687e8d8bef9SDimitry Andric 
688e8d8bef9SDimitry Andric       // Hardware ignores high bits, so remove those.
689e8d8bef9SDimitry Andric       if (Width >= IntSize) {
690e8d8bef9SDimitry Andric         return IC.replaceOperand(
691e8d8bef9SDimitry Andric             II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
692e8d8bef9SDimitry Andric       }
693e8d8bef9SDimitry Andric     }
694e8d8bef9SDimitry Andric 
695e8d8bef9SDimitry Andric     unsigned Offset;
696e8d8bef9SDimitry Andric     ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
697e8d8bef9SDimitry Andric     if (COffset) {
698e8d8bef9SDimitry Andric       Offset = COffset->getZExtValue();
699e8d8bef9SDimitry Andric       if (Offset >= IntSize) {
700e8d8bef9SDimitry Andric         return IC.replaceOperand(
701e8d8bef9SDimitry Andric             II, 1,
702e8d8bef9SDimitry Andric             ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
703e8d8bef9SDimitry Andric       }
704e8d8bef9SDimitry Andric     }
705e8d8bef9SDimitry Andric 
706e8d8bef9SDimitry Andric     bool Signed = IID == Intrinsic::amdgcn_sbfe;
707e8d8bef9SDimitry Andric 
708e8d8bef9SDimitry Andric     if (!CWidth || !COffset)
709e8d8bef9SDimitry Andric       break;
710e8d8bef9SDimitry Andric 
711349cc55cSDimitry Andric     // The case of Width == 0 is handled above, which makes this transformation
712e8d8bef9SDimitry Andric     // safe.  If Width == 0, then the ashr and lshr instructions become poison
713e8d8bef9SDimitry Andric     // value since the shift amount would be equal to the bit size.
714e8d8bef9SDimitry Andric     assert(Width != 0);
715e8d8bef9SDimitry Andric 
716e8d8bef9SDimitry Andric     // TODO: This allows folding to undef when the hardware has specific
717e8d8bef9SDimitry Andric     // behavior?
718e8d8bef9SDimitry Andric     if (Offset + Width < IntSize) {
719e8d8bef9SDimitry Andric       Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
720e8d8bef9SDimitry Andric       Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
721e8d8bef9SDimitry Andric                                  : IC.Builder.CreateLShr(Shl, IntSize - Width);
722e8d8bef9SDimitry Andric       RightShift->takeName(&II);
723e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, RightShift);
724e8d8bef9SDimitry Andric     }
725e8d8bef9SDimitry Andric 
726e8d8bef9SDimitry Andric     Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
727e8d8bef9SDimitry Andric                                : IC.Builder.CreateLShr(Src, Offset);
728e8d8bef9SDimitry Andric 
729e8d8bef9SDimitry Andric     RightShift->takeName(&II);
730e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, RightShift);
731e8d8bef9SDimitry Andric   }
732e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_exp:
73381ad6265SDimitry Andric   case Intrinsic::amdgcn_exp_row:
734e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_exp_compr: {
735e8d8bef9SDimitry Andric     ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
736e8d8bef9SDimitry Andric     unsigned EnBits = En->getZExtValue();
737e8d8bef9SDimitry Andric     if (EnBits == 0xf)
738e8d8bef9SDimitry Andric       break; // All inputs enabled.
739e8d8bef9SDimitry Andric 
740e8d8bef9SDimitry Andric     bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
741e8d8bef9SDimitry Andric     bool Changed = false;
742e8d8bef9SDimitry Andric     for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
743e8d8bef9SDimitry Andric       if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
744e8d8bef9SDimitry Andric           (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
745e8d8bef9SDimitry Andric         Value *Src = II.getArgOperand(I + 2);
746e8d8bef9SDimitry Andric         if (!isa<UndefValue>(Src)) {
747e8d8bef9SDimitry Andric           IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
748e8d8bef9SDimitry Andric           Changed = true;
749e8d8bef9SDimitry Andric         }
750e8d8bef9SDimitry Andric       }
751e8d8bef9SDimitry Andric     }
752e8d8bef9SDimitry Andric 
753e8d8bef9SDimitry Andric     if (Changed) {
754e8d8bef9SDimitry Andric       return &II;
755e8d8bef9SDimitry Andric     }
756e8d8bef9SDimitry Andric 
757e8d8bef9SDimitry Andric     break;
758e8d8bef9SDimitry Andric   }
759e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fmed3: {
760e8d8bef9SDimitry Andric     // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
761e8d8bef9SDimitry Andric     // for the shader.
762e8d8bef9SDimitry Andric 
763e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
764e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
765e8d8bef9SDimitry Andric     Value *Src2 = II.getArgOperand(2);
766e8d8bef9SDimitry Andric 
767e8d8bef9SDimitry Andric     // Checking for NaN before canonicalization provides better fidelity when
768e8d8bef9SDimitry Andric     // mapping other operations onto fmed3 since the order of operands is
769e8d8bef9SDimitry Andric     // unchanged.
770*0fca6ea1SDimitry Andric     Value *V = nullptr;
771e8d8bef9SDimitry Andric     if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
772*0fca6ea1SDimitry Andric       V = IC.Builder.CreateMinNum(Src1, Src2);
773e8d8bef9SDimitry Andric     } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
774*0fca6ea1SDimitry Andric       V = IC.Builder.CreateMinNum(Src0, Src2);
775e8d8bef9SDimitry Andric     } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
776*0fca6ea1SDimitry Andric       V = IC.Builder.CreateMaxNum(Src0, Src1);
777e8d8bef9SDimitry Andric     }
778e8d8bef9SDimitry Andric 
779*0fca6ea1SDimitry Andric     if (V) {
780*0fca6ea1SDimitry Andric       if (auto *CI = dyn_cast<CallInst>(V)) {
781*0fca6ea1SDimitry Andric         CI->copyFastMathFlags(&II);
782*0fca6ea1SDimitry Andric         CI->takeName(&II);
783*0fca6ea1SDimitry Andric       }
784*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, V);
785e8d8bef9SDimitry Andric     }
786e8d8bef9SDimitry Andric 
787e8d8bef9SDimitry Andric     bool Swap = false;
788e8d8bef9SDimitry Andric     // Canonicalize constants to RHS operands.
789e8d8bef9SDimitry Andric     //
790e8d8bef9SDimitry Andric     // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
791e8d8bef9SDimitry Andric     if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
792e8d8bef9SDimitry Andric       std::swap(Src0, Src1);
793e8d8bef9SDimitry Andric       Swap = true;
794e8d8bef9SDimitry Andric     }
795e8d8bef9SDimitry Andric 
796e8d8bef9SDimitry Andric     if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
797e8d8bef9SDimitry Andric       std::swap(Src1, Src2);
798e8d8bef9SDimitry Andric       Swap = true;
799e8d8bef9SDimitry Andric     }
800e8d8bef9SDimitry Andric 
801e8d8bef9SDimitry Andric     if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
802e8d8bef9SDimitry Andric       std::swap(Src0, Src1);
803e8d8bef9SDimitry Andric       Swap = true;
804e8d8bef9SDimitry Andric     }
805e8d8bef9SDimitry Andric 
806e8d8bef9SDimitry Andric     if (Swap) {
807e8d8bef9SDimitry Andric       II.setArgOperand(0, Src0);
808e8d8bef9SDimitry Andric       II.setArgOperand(1, Src1);
809e8d8bef9SDimitry Andric       II.setArgOperand(2, Src2);
810e8d8bef9SDimitry Andric       return &II;
811e8d8bef9SDimitry Andric     }
812e8d8bef9SDimitry Andric 
813e8d8bef9SDimitry Andric     if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
814e8d8bef9SDimitry Andric       if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
815e8d8bef9SDimitry Andric         if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
816e8d8bef9SDimitry Andric           APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
817e8d8bef9SDimitry Andric                                        C2->getValueAPF());
818e8d8bef9SDimitry Andric           return IC.replaceInstUsesWith(
819e8d8bef9SDimitry Andric               II, ConstantFP::get(IC.Builder.getContext(), Result));
820e8d8bef9SDimitry Andric         }
821e8d8bef9SDimitry Andric       }
822e8d8bef9SDimitry Andric     }
823e8d8bef9SDimitry Andric 
82406c3fb27SDimitry Andric     if (!ST->hasMed3_16())
82506c3fb27SDimitry Andric       break;
82606c3fb27SDimitry Andric 
82706c3fb27SDimitry Andric     Value *X, *Y, *Z;
82806c3fb27SDimitry Andric 
82906c3fb27SDimitry Andric     // Repeat floating-point width reduction done for minnum/maxnum.
83006c3fb27SDimitry Andric     // fmed3((fpext X), (fpext Y), (fpext Z)) -> fpext (fmed3(X, Y, Z))
83106c3fb27SDimitry Andric     if (matchFPExtFromF16(Src0, X) && matchFPExtFromF16(Src1, Y) &&
83206c3fb27SDimitry Andric         matchFPExtFromF16(Src2, Z)) {
83306c3fb27SDimitry Andric       Value *NewCall = IC.Builder.CreateIntrinsic(IID, {X->getType()},
83406c3fb27SDimitry Andric                                                   {X, Y, Z}, &II, II.getName());
83506c3fb27SDimitry Andric       return new FPExtInst(NewCall, II.getType());
83606c3fb27SDimitry Andric     }
83706c3fb27SDimitry Andric 
838e8d8bef9SDimitry Andric     break;
839e8d8bef9SDimitry Andric   }
840e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_icmp:
841e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fcmp: {
842e8d8bef9SDimitry Andric     const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
843e8d8bef9SDimitry Andric     // Guard against invalid arguments.
844e8d8bef9SDimitry Andric     int64_t CCVal = CC->getZExtValue();
845e8d8bef9SDimitry Andric     bool IsInteger = IID == Intrinsic::amdgcn_icmp;
846e8d8bef9SDimitry Andric     if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
847e8d8bef9SDimitry Andric                        CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
848e8d8bef9SDimitry Andric         (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
849e8d8bef9SDimitry Andric                         CCVal > CmpInst::LAST_FCMP_PREDICATE)))
850e8d8bef9SDimitry Andric       break;
851e8d8bef9SDimitry Andric 
852e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
853e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
854e8d8bef9SDimitry Andric 
855e8d8bef9SDimitry Andric     if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
856e8d8bef9SDimitry Andric       if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
857*0fca6ea1SDimitry Andric         Constant *CCmp = ConstantFoldCompareInstOperands(
858*0fca6ea1SDimitry Andric             (ICmpInst::Predicate)CCVal, CSrc0, CSrc1, DL);
859*0fca6ea1SDimitry Andric         if (CCmp && CCmp->isNullValue()) {
860e8d8bef9SDimitry Andric           return IC.replaceInstUsesWith(
8615f757f3fSDimitry Andric               II, IC.Builder.CreateSExt(CCmp, II.getType()));
862e8d8bef9SDimitry Andric         }
863e8d8bef9SDimitry Andric 
864e8d8bef9SDimitry Andric         // The result of V_ICMP/V_FCMP assembly instructions (which this
865e8d8bef9SDimitry Andric         // intrinsic exposes) is one bit per thread, masked with the EXEC
866e8d8bef9SDimitry Andric         // register (which contains the bitmask of live threads). So a
867e8d8bef9SDimitry Andric         // comparison that always returns true is the same as a read of the
868e8d8bef9SDimitry Andric         // EXEC register.
869e8d8bef9SDimitry Andric         Function *NewF = Intrinsic::getDeclaration(
870e8d8bef9SDimitry Andric             II.getModule(), Intrinsic::read_register, II.getType());
871e8d8bef9SDimitry Andric         Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
872e8d8bef9SDimitry Andric         MDNode *MD = MDNode::get(II.getContext(), MDArgs);
873e8d8bef9SDimitry Andric         Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
874e8d8bef9SDimitry Andric         CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
875349cc55cSDimitry Andric         NewCall->addFnAttr(Attribute::Convergent);
876e8d8bef9SDimitry Andric         NewCall->takeName(&II);
877e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, NewCall);
878e8d8bef9SDimitry Andric       }
879e8d8bef9SDimitry Andric 
880e8d8bef9SDimitry Andric       // Canonicalize constants to RHS.
881e8d8bef9SDimitry Andric       CmpInst::Predicate SwapPred =
882e8d8bef9SDimitry Andric           CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
883e8d8bef9SDimitry Andric       II.setArgOperand(0, Src1);
884e8d8bef9SDimitry Andric       II.setArgOperand(1, Src0);
885e8d8bef9SDimitry Andric       II.setArgOperand(
886e8d8bef9SDimitry Andric           2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
887e8d8bef9SDimitry Andric       return &II;
888e8d8bef9SDimitry Andric     }
889e8d8bef9SDimitry Andric 
890e8d8bef9SDimitry Andric     if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
891e8d8bef9SDimitry Andric       break;
892e8d8bef9SDimitry Andric 
893e8d8bef9SDimitry Andric     // Canonicalize compare eq with true value to compare != 0
894e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
895e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
896e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
897e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
898e8d8bef9SDimitry Andric     Value *ExtSrc;
899e8d8bef9SDimitry Andric     if (CCVal == CmpInst::ICMP_EQ &&
900e8d8bef9SDimitry Andric         ((match(Src1, PatternMatch::m_One()) &&
901e8d8bef9SDimitry Andric           match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
902e8d8bef9SDimitry Andric          (match(Src1, PatternMatch::m_AllOnes()) &&
903e8d8bef9SDimitry Andric           match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
904e8d8bef9SDimitry Andric         ExtSrc->getType()->isIntegerTy(1)) {
905e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, ConstantInt::getNullValue(Src1->getType()));
906e8d8bef9SDimitry Andric       IC.replaceOperand(II, 2,
907e8d8bef9SDimitry Andric                         ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
908e8d8bef9SDimitry Andric       return &II;
909e8d8bef9SDimitry Andric     }
910e8d8bef9SDimitry Andric 
911e8d8bef9SDimitry Andric     CmpInst::Predicate SrcPred;
912e8d8bef9SDimitry Andric     Value *SrcLHS;
913e8d8bef9SDimitry Andric     Value *SrcRHS;
914e8d8bef9SDimitry Andric 
915e8d8bef9SDimitry Andric     // Fold compare eq/ne with 0 from a compare result as the predicate to the
916e8d8bef9SDimitry Andric     // intrinsic. The typical use is a wave vote function in the library, which
917e8d8bef9SDimitry Andric     // will be fed from a user code condition compared with 0. Fold in the
918e8d8bef9SDimitry Andric     // redundant compare.
919e8d8bef9SDimitry Andric 
920e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
921e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.[if]cmp(a, b, pred)
922e8d8bef9SDimitry Andric     //
923e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
924e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.[if]cmp(a, b, inv pred)
925e8d8bef9SDimitry Andric     if (match(Src1, PatternMatch::m_Zero()) &&
926e8d8bef9SDimitry Andric         match(Src0, PatternMatch::m_ZExtOrSExt(
927e8d8bef9SDimitry Andric                         m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
928e8d8bef9SDimitry Andric                               PatternMatch::m_Value(SrcRHS))))) {
929e8d8bef9SDimitry Andric       if (CCVal == CmpInst::ICMP_EQ)
930e8d8bef9SDimitry Andric         SrcPred = CmpInst::getInversePredicate(SrcPred);
931e8d8bef9SDimitry Andric 
932e8d8bef9SDimitry Andric       Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
933e8d8bef9SDimitry Andric                                  ? Intrinsic::amdgcn_fcmp
934e8d8bef9SDimitry Andric                                  : Intrinsic::amdgcn_icmp;
935e8d8bef9SDimitry Andric 
936e8d8bef9SDimitry Andric       Type *Ty = SrcLHS->getType();
937e8d8bef9SDimitry Andric       if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
938e8d8bef9SDimitry Andric         // Promote to next legal integer type.
939e8d8bef9SDimitry Andric         unsigned Width = CmpType->getBitWidth();
940e8d8bef9SDimitry Andric         unsigned NewWidth = Width;
941e8d8bef9SDimitry Andric 
942e8d8bef9SDimitry Andric         // Don't do anything for i1 comparisons.
943e8d8bef9SDimitry Andric         if (Width == 1)
944e8d8bef9SDimitry Andric           break;
945e8d8bef9SDimitry Andric 
946e8d8bef9SDimitry Andric         if (Width <= 16)
947e8d8bef9SDimitry Andric           NewWidth = 16;
948e8d8bef9SDimitry Andric         else if (Width <= 32)
949e8d8bef9SDimitry Andric           NewWidth = 32;
950e8d8bef9SDimitry Andric         else if (Width <= 64)
951e8d8bef9SDimitry Andric           NewWidth = 64;
952*0fca6ea1SDimitry Andric         else
953e8d8bef9SDimitry Andric           break; // Can't handle this.
954e8d8bef9SDimitry Andric 
955e8d8bef9SDimitry Andric         if (Width != NewWidth) {
956e8d8bef9SDimitry Andric           IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
957e8d8bef9SDimitry Andric           if (CmpInst::isSigned(SrcPred)) {
958e8d8bef9SDimitry Andric             SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
959e8d8bef9SDimitry Andric             SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
960e8d8bef9SDimitry Andric           } else {
961e8d8bef9SDimitry Andric             SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
962e8d8bef9SDimitry Andric             SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
963e8d8bef9SDimitry Andric           }
964e8d8bef9SDimitry Andric         }
965e8d8bef9SDimitry Andric       } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
966e8d8bef9SDimitry Andric         break;
967e8d8bef9SDimitry Andric 
968e8d8bef9SDimitry Andric       Function *NewF = Intrinsic::getDeclaration(
969e8d8bef9SDimitry Andric           II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
970e8d8bef9SDimitry Andric       Value *Args[] = {SrcLHS, SrcRHS,
971e8d8bef9SDimitry Andric                        ConstantInt::get(CC->getType(), SrcPred)};
972e8d8bef9SDimitry Andric       CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
973e8d8bef9SDimitry Andric       NewCall->takeName(&II);
974e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, NewCall);
975e8d8bef9SDimitry Andric     }
976e8d8bef9SDimitry Andric 
977e8d8bef9SDimitry Andric     break;
978e8d8bef9SDimitry Andric   }
97906c3fb27SDimitry Andric   case Intrinsic::amdgcn_mbcnt_hi: {
98006c3fb27SDimitry Andric     // exec_hi is all 0, so this is just a copy.
98106c3fb27SDimitry Andric     if (ST->isWave32())
98206c3fb27SDimitry Andric       return IC.replaceInstUsesWith(II, II.getArgOperand(1));
98306c3fb27SDimitry Andric     break;
98406c3fb27SDimitry Andric   }
985e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_ballot: {
986e8d8bef9SDimitry Andric     if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
987e8d8bef9SDimitry Andric       if (Src->isZero()) {
988e8d8bef9SDimitry Andric         // amdgcn.ballot(i1 0) is zero.
989e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
990e8d8bef9SDimitry Andric       }
991e8d8bef9SDimitry Andric     }
992*0fca6ea1SDimitry Andric     if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
993*0fca6ea1SDimitry Andric       // %b64 = call i64 ballot.i64(...)
994*0fca6ea1SDimitry Andric       // =>
995*0fca6ea1SDimitry Andric       // %b32 = call i32 ballot.i32(...)
996*0fca6ea1SDimitry Andric       // %b64 = zext i32 %b32 to i64
997*0fca6ea1SDimitry Andric       Value *Call = IC.Builder.CreateZExt(
998*0fca6ea1SDimitry Andric           IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot,
999*0fca6ea1SDimitry Andric                                      {IC.Builder.getInt32Ty()},
1000*0fca6ea1SDimitry Andric                                      {II.getArgOperand(0)}),
1001*0fca6ea1SDimitry Andric           II.getType());
1002*0fca6ea1SDimitry Andric       Call->takeName(&II);
1003*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, Call);
1004*0fca6ea1SDimitry Andric     }
1005e8d8bef9SDimitry Andric     break;
1006e8d8bef9SDimitry Andric   }
1007e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_wqm_vote: {
1008e8d8bef9SDimitry Andric     // wqm_vote is identity when the argument is constant.
1009e8d8bef9SDimitry Andric     if (!isa<Constant>(II.getArgOperand(0)))
1010e8d8bef9SDimitry Andric       break;
1011e8d8bef9SDimitry Andric 
1012e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, II.getArgOperand(0));
1013e8d8bef9SDimitry Andric   }
1014e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_kill: {
1015e8d8bef9SDimitry Andric     const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
1016e8d8bef9SDimitry Andric     if (!C || !C->getZExtValue())
1017e8d8bef9SDimitry Andric       break;
1018e8d8bef9SDimitry Andric 
1019e8d8bef9SDimitry Andric     // amdgcn.kill(i1 1) is a no-op
1020e8d8bef9SDimitry Andric     return IC.eraseInstFromFunction(II);
1021e8d8bef9SDimitry Andric   }
1022e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_update_dpp: {
1023e8d8bef9SDimitry Andric     Value *Old = II.getArgOperand(0);
1024e8d8bef9SDimitry Andric 
1025e8d8bef9SDimitry Andric     auto *BC = cast<ConstantInt>(II.getArgOperand(5));
1026e8d8bef9SDimitry Andric     auto *RM = cast<ConstantInt>(II.getArgOperand(3));
1027e8d8bef9SDimitry Andric     auto *BM = cast<ConstantInt>(II.getArgOperand(4));
1028e8d8bef9SDimitry Andric     if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
1029e8d8bef9SDimitry Andric         BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
1030e8d8bef9SDimitry Andric       break;
1031e8d8bef9SDimitry Andric 
1032e8d8bef9SDimitry Andric     // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
1033e8d8bef9SDimitry Andric     return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
1034e8d8bef9SDimitry Andric   }
1035e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_permlane16:
10365f757f3fSDimitry Andric   case Intrinsic::amdgcn_permlane16_var:
10375f757f3fSDimitry Andric   case Intrinsic::amdgcn_permlanex16:
10385f757f3fSDimitry Andric   case Intrinsic::amdgcn_permlanex16_var: {
1039e8d8bef9SDimitry Andric     // Discard vdst_in if it's not going to be read.
1040e8d8bef9SDimitry Andric     Value *VDstIn = II.getArgOperand(0);
1041e8d8bef9SDimitry Andric     if (isa<UndefValue>(VDstIn))
1042e8d8bef9SDimitry Andric       break;
1043e8d8bef9SDimitry Andric 
10445f757f3fSDimitry Andric     // FetchInvalid operand idx.
10455f757f3fSDimitry Andric     unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
10465f757f3fSDimitry Andric                           IID == Intrinsic::amdgcn_permlanex16)
10475f757f3fSDimitry Andric                              ? 4  /* for permlane16 and permlanex16 */
10485f757f3fSDimitry Andric                              : 3; /* for permlane16_var and permlanex16_var */
10495f757f3fSDimitry Andric 
10505f757f3fSDimitry Andric     // BoundCtrl operand idx.
10515f757f3fSDimitry Andric     // For permlane16 and permlanex16 it should be 5
10525f757f3fSDimitry Andric     // For Permlane16_var and permlanex16_var it should be 4
10535f757f3fSDimitry Andric     unsigned int BcIdx = FiIdx + 1;
10545f757f3fSDimitry Andric 
10555f757f3fSDimitry Andric     ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(FiIdx));
10565f757f3fSDimitry Andric     ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(BcIdx));
1057e8d8bef9SDimitry Andric     if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
1058e8d8bef9SDimitry Andric       break;
1059e8d8bef9SDimitry Andric 
1060e8d8bef9SDimitry Andric     return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
1061e8d8bef9SDimitry Andric   }
106281ad6265SDimitry Andric   case Intrinsic::amdgcn_permlane64:
106381ad6265SDimitry Andric     // A constant value is trivially uniform.
106481ad6265SDimitry Andric     if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
106581ad6265SDimitry Andric       return IC.replaceInstUsesWith(II, C);
106681ad6265SDimitry Andric     }
106781ad6265SDimitry Andric     break;
1068e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_readfirstlane:
1069e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_readlane: {
1070e8d8bef9SDimitry Andric     // A constant value is trivially uniform.
1071e8d8bef9SDimitry Andric     if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
1072e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, C);
1073e8d8bef9SDimitry Andric     }
1074e8d8bef9SDimitry Andric 
1075e8d8bef9SDimitry Andric     // The rest of these may not be safe if the exec may not be the same between
1076e8d8bef9SDimitry Andric     // the def and use.
1077e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
1078e8d8bef9SDimitry Andric     Instruction *SrcInst = dyn_cast<Instruction>(Src);
1079e8d8bef9SDimitry Andric     if (SrcInst && SrcInst->getParent() != II.getParent())
1080e8d8bef9SDimitry Andric       break;
1081e8d8bef9SDimitry Andric 
1082e8d8bef9SDimitry Andric     // readfirstlane (readfirstlane x) -> readfirstlane x
1083e8d8bef9SDimitry Andric     // readlane (readfirstlane x), y -> readfirstlane x
1084e8d8bef9SDimitry Andric     if (match(Src,
1085e8d8bef9SDimitry Andric               PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
1086e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Src);
1087e8d8bef9SDimitry Andric     }
1088e8d8bef9SDimitry Andric 
1089e8d8bef9SDimitry Andric     if (IID == Intrinsic::amdgcn_readfirstlane) {
1090e8d8bef9SDimitry Andric       // readfirstlane (readlane x, y) -> readlane x, y
1091e8d8bef9SDimitry Andric       if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
1092e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Src);
1093e8d8bef9SDimitry Andric       }
1094e8d8bef9SDimitry Andric     } else {
1095e8d8bef9SDimitry Andric       // readlane (readlane x, y), y -> readlane x, y
1096e8d8bef9SDimitry Andric       if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
1097e8d8bef9SDimitry Andric                          PatternMatch::m_Value(),
1098e8d8bef9SDimitry Andric                          PatternMatch::m_Specific(II.getArgOperand(1))))) {
1099e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Src);
1100e8d8bef9SDimitry Andric       }
1101e8d8bef9SDimitry Andric     }
1102e8d8bef9SDimitry Andric 
1103e8d8bef9SDimitry Andric     break;
1104e8d8bef9SDimitry Andric   }
1105*0fca6ea1SDimitry Andric   case Intrinsic::amdgcn_trig_preop: {
1106*0fca6ea1SDimitry Andric     // The intrinsic is declared with name mangling, but currently the
1107*0fca6ea1SDimitry Andric     // instruction only exists for f64
1108*0fca6ea1SDimitry Andric     if (!II.getType()->isDoubleTy())
1109*0fca6ea1SDimitry Andric       break;
1110*0fca6ea1SDimitry Andric 
1111*0fca6ea1SDimitry Andric     Value *Src = II.getArgOperand(0);
1112*0fca6ea1SDimitry Andric     Value *Segment = II.getArgOperand(1);
1113*0fca6ea1SDimitry Andric     if (isa<PoisonValue>(Src) || isa<PoisonValue>(Segment))
1114*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType()));
1115*0fca6ea1SDimitry Andric 
1116*0fca6ea1SDimitry Andric     if (isa<UndefValue>(Src)) {
1117*0fca6ea1SDimitry Andric       auto *QNaN = ConstantFP::get(
1118*0fca6ea1SDimitry Andric           II.getType(), APFloat::getQNaN(II.getType()->getFltSemantics()));
1119*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, QNaN);
1120*0fca6ea1SDimitry Andric     }
1121*0fca6ea1SDimitry Andric 
1122*0fca6ea1SDimitry Andric     const ConstantFP *Csrc = dyn_cast<ConstantFP>(Src);
1123*0fca6ea1SDimitry Andric     if (!Csrc)
1124*0fca6ea1SDimitry Andric       break;
1125*0fca6ea1SDimitry Andric 
1126*0fca6ea1SDimitry Andric     if (II.isStrictFP())
1127*0fca6ea1SDimitry Andric       break;
1128*0fca6ea1SDimitry Andric 
1129*0fca6ea1SDimitry Andric     const APFloat &Fsrc = Csrc->getValueAPF();
1130*0fca6ea1SDimitry Andric     if (Fsrc.isNaN()) {
1131*0fca6ea1SDimitry Andric       auto *Quieted = ConstantFP::get(II.getType(), Fsrc.makeQuiet());
1132*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, Quieted);
1133*0fca6ea1SDimitry Andric     }
1134*0fca6ea1SDimitry Andric 
1135*0fca6ea1SDimitry Andric     const ConstantInt *Cseg = dyn_cast<ConstantInt>(Segment);
1136*0fca6ea1SDimitry Andric     if (!Cseg)
1137*0fca6ea1SDimitry Andric       break;
1138*0fca6ea1SDimitry Andric 
1139*0fca6ea1SDimitry Andric     unsigned Exponent = (Fsrc.bitcastToAPInt().getZExtValue() >> 52) & 0x7ff;
1140*0fca6ea1SDimitry Andric     unsigned SegmentVal = Cseg->getValue().trunc(5).getZExtValue();
1141*0fca6ea1SDimitry Andric     unsigned Shift = SegmentVal * 53;
1142*0fca6ea1SDimitry Andric     if (Exponent > 1077)
1143*0fca6ea1SDimitry Andric       Shift += Exponent - 1077;
1144*0fca6ea1SDimitry Andric 
1145*0fca6ea1SDimitry Andric     // 2.0/PI table.
1146*0fca6ea1SDimitry Andric     static const uint32_t TwoByPi[] = {
1147*0fca6ea1SDimitry Andric         0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
1148*0fca6ea1SDimitry Andric         0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
1149*0fca6ea1SDimitry Andric         0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1150*0fca6ea1SDimitry Andric         0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
1151*0fca6ea1SDimitry Andric         0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
1152*0fca6ea1SDimitry Andric         0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
1153*0fca6ea1SDimitry Andric         0x56033046};
1154*0fca6ea1SDimitry Andric 
1155*0fca6ea1SDimitry Andric     // Return 0 for outbound segment (hardware behavior).
1156*0fca6ea1SDimitry Andric     unsigned Idx = Shift >> 5;
1157*0fca6ea1SDimitry Andric     if (Idx + 2 >= std::size(TwoByPi)) {
1158*0fca6ea1SDimitry Andric       APFloat Zero = APFloat::getZero(II.getType()->getFltSemantics());
1159*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantFP::get(II.getType(), Zero));
1160*0fca6ea1SDimitry Andric     }
1161*0fca6ea1SDimitry Andric 
1162*0fca6ea1SDimitry Andric     unsigned BShift = Shift & 0x1f;
1163*0fca6ea1SDimitry Andric     uint64_t Thi = Make_64(TwoByPi[Idx], TwoByPi[Idx + 1]);
1164*0fca6ea1SDimitry Andric     uint64_t Tlo = Make_64(TwoByPi[Idx + 2], 0);
1165*0fca6ea1SDimitry Andric     if (BShift)
1166*0fca6ea1SDimitry Andric       Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
1167*0fca6ea1SDimitry Andric     Thi = Thi >> 11;
1168*0fca6ea1SDimitry Andric     APFloat Result = APFloat((double)Thi);
1169*0fca6ea1SDimitry Andric 
1170*0fca6ea1SDimitry Andric     int Scale = -53 - Shift;
1171*0fca6ea1SDimitry Andric     if (Exponent >= 1968)
1172*0fca6ea1SDimitry Andric       Scale += 128;
1173*0fca6ea1SDimitry Andric 
1174*0fca6ea1SDimitry Andric     Result = scalbn(Result, Scale, RoundingMode::NearestTiesToEven);
1175*0fca6ea1SDimitry Andric     return IC.replaceInstUsesWith(II, ConstantFP::get(Src->getType(), Result));
1176*0fca6ea1SDimitry Andric   }
1177e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fmul_legacy: {
1178e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
1179e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
1180e8d8bef9SDimitry Andric 
1181e8d8bef9SDimitry Andric     // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1182e8d8bef9SDimitry Andric     // infinity, gives +0.0.
1183e8d8bef9SDimitry Andric     // TODO: Move to InstSimplify?
1184e8d8bef9SDimitry Andric     if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1185e8d8bef9SDimitry Andric         match(Op1, PatternMatch::m_AnyZeroFP()))
118606c3fb27SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType()));
1187e8d8bef9SDimitry Andric 
1188e8d8bef9SDimitry Andric     // If we can prove we don't have one of the special cases then we can use a
1189e8d8bef9SDimitry Andric     // normal fmul instruction instead.
119006c3fb27SDimitry Andric     if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) {
1191e8d8bef9SDimitry Andric       auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
1192e8d8bef9SDimitry Andric       FMul->takeName(&II);
1193e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, FMul);
1194e8d8bef9SDimitry Andric     }
1195e8d8bef9SDimitry Andric     break;
1196e8d8bef9SDimitry Andric   }
1197e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fma_legacy: {
1198e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
1199e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
1200e8d8bef9SDimitry Andric     Value *Op2 = II.getArgOperand(2);
1201e8d8bef9SDimitry Andric 
1202e8d8bef9SDimitry Andric     // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1203e8d8bef9SDimitry Andric     // infinity, gives +0.0.
1204e8d8bef9SDimitry Andric     // TODO: Move to InstSimplify?
1205e8d8bef9SDimitry Andric     if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1206e8d8bef9SDimitry Andric         match(Op1, PatternMatch::m_AnyZeroFP())) {
1207e8d8bef9SDimitry Andric       // It's tempting to just return Op2 here, but that would give the wrong
1208e8d8bef9SDimitry Andric       // result if Op2 was -0.0.
120906c3fb27SDimitry Andric       auto *Zero = ConstantFP::getZero(II.getType());
1210e8d8bef9SDimitry Andric       auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
1211e8d8bef9SDimitry Andric       FAdd->takeName(&II);
1212e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, FAdd);
1213e8d8bef9SDimitry Andric     }
1214e8d8bef9SDimitry Andric 
1215e8d8bef9SDimitry Andric     // If we can prove we don't have one of the special cases then we can use a
1216e8d8bef9SDimitry Andric     // normal fma instead.
121706c3fb27SDimitry Andric     if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) {
1218e8d8bef9SDimitry Andric       II.setCalledOperand(Intrinsic::getDeclaration(
1219e8d8bef9SDimitry Andric           II.getModule(), Intrinsic::fma, II.getType()));
1220e8d8bef9SDimitry Andric       return &II;
1221e8d8bef9SDimitry Andric     }
1222e8d8bef9SDimitry Andric     break;
1223e8d8bef9SDimitry Andric   }
12240eae32dcSDimitry Andric   case Intrinsic::amdgcn_is_shared:
12250eae32dcSDimitry Andric   case Intrinsic::amdgcn_is_private: {
12260eae32dcSDimitry Andric     if (isa<UndefValue>(II.getArgOperand(0)))
12270eae32dcSDimitry Andric       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
12280eae32dcSDimitry Andric 
12290eae32dcSDimitry Andric     if (isa<ConstantPointerNull>(II.getArgOperand(0)))
12300eae32dcSDimitry Andric       return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
12310eae32dcSDimitry Andric     break;
12320eae32dcSDimitry Andric   }
123306c3fb27SDimitry Andric   case Intrinsic::amdgcn_raw_buffer_store_format:
123406c3fb27SDimitry Andric   case Intrinsic::amdgcn_struct_buffer_store_format:
123506c3fb27SDimitry Andric   case Intrinsic::amdgcn_raw_tbuffer_store:
123606c3fb27SDimitry Andric   case Intrinsic::amdgcn_struct_tbuffer_store:
123706c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_1d:
123806c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_1darray:
123906c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_2d:
124006c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_2darray:
124106c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_2darraymsaa:
124206c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_2dmsaa:
124306c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_3d:
124406c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_cube:
124506c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_mip_1d:
124606c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_mip_1darray:
124706c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_mip_2d:
124806c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_mip_2darray:
124906c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_mip_3d:
125006c3fb27SDimitry Andric   case Intrinsic::amdgcn_image_store_mip_cube: {
125106c3fb27SDimitry Andric     if (!isa<FixedVectorType>(II.getArgOperand(0)->getType()))
125206c3fb27SDimitry Andric       break;
125306c3fb27SDimitry Andric 
12547a6dacacSDimitry Andric     APInt DemandedElts;
12557a6dacacSDimitry Andric     if (ST->hasDefaultComponentBroadcast())
12567a6dacacSDimitry Andric       DemandedElts = defaultComponentBroadcast(II.getArgOperand(0));
12577a6dacacSDimitry Andric     else if (ST->hasDefaultComponentZero())
12587a6dacacSDimitry Andric       DemandedElts = trimTrailingZerosInVector(IC, II.getArgOperand(0), &II);
12597a6dacacSDimitry Andric     else
12607a6dacacSDimitry Andric       break;
126106c3fb27SDimitry Andric 
126206c3fb27SDimitry Andric     int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1;
126306c3fb27SDimitry Andric     if (simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, DMaskIdx,
126406c3fb27SDimitry Andric                                               false)) {
126506c3fb27SDimitry Andric       return IC.eraseInstFromFunction(II);
126606c3fb27SDimitry Andric     }
126706c3fb27SDimitry Andric 
126806c3fb27SDimitry Andric     break;
126906c3fb27SDimitry Andric   }
127006c3fb27SDimitry Andric   }
1271e8d8bef9SDimitry Andric   if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1272e8d8bef9SDimitry Andric             AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
1273e8d8bef9SDimitry Andric     return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
1274e8d8bef9SDimitry Andric   }
1275bdd1243dSDimitry Andric   return std::nullopt;
1276e8d8bef9SDimitry Andric }
1277e8d8bef9SDimitry Andric 
1278e8d8bef9SDimitry Andric /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
1279e8d8bef9SDimitry Andric ///
128006c3fb27SDimitry Andric /// The result of simplifying amdgcn image and buffer store intrinsics is updating
128106c3fb27SDimitry Andric /// definitions of the intrinsics vector argument, not Uses of the result like
128206c3fb27SDimitry Andric /// image and buffer loads.
1283e8d8bef9SDimitry Andric /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
1284e8d8bef9SDimitry Andric ///       struct returns.
1285e8d8bef9SDimitry Andric static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
1286e8d8bef9SDimitry Andric                                                     IntrinsicInst &II,
1287e8d8bef9SDimitry Andric                                                     APInt DemandedElts,
128806c3fb27SDimitry Andric                                                     int DMaskIdx, bool IsLoad) {
1289e8d8bef9SDimitry Andric 
129006c3fb27SDimitry Andric   auto *IIVTy = cast<FixedVectorType>(IsLoad ? II.getType()
129106c3fb27SDimitry Andric                                              : II.getOperand(0)->getType());
1292e8d8bef9SDimitry Andric   unsigned VWidth = IIVTy->getNumElements();
1293e8d8bef9SDimitry Andric   if (VWidth == 1)
1294e8d8bef9SDimitry Andric     return nullptr;
1295bdd1243dSDimitry Andric   Type *EltTy = IIVTy->getElementType();
1296e8d8bef9SDimitry Andric 
1297e8d8bef9SDimitry Andric   IRBuilderBase::InsertPointGuard Guard(IC.Builder);
1298e8d8bef9SDimitry Andric   IC.Builder.SetInsertPoint(&II);
1299e8d8bef9SDimitry Andric 
1300e8d8bef9SDimitry Andric   // Assume the arguments are unchanged and later override them, if needed.
1301e8d8bef9SDimitry Andric   SmallVector<Value *, 16> Args(II.args());
1302e8d8bef9SDimitry Andric 
1303e8d8bef9SDimitry Andric   if (DMaskIdx < 0) {
1304e8d8bef9SDimitry Andric     // Buffer case.
1305e8d8bef9SDimitry Andric 
1306e8d8bef9SDimitry Andric     const unsigned ActiveBits = DemandedElts.getActiveBits();
130706c3fb27SDimitry Andric     const unsigned UnusedComponentsAtFront = DemandedElts.countr_zero();
1308e8d8bef9SDimitry Andric 
1309e8d8bef9SDimitry Andric     // Start assuming the prefix of elements is demanded, but possibly clear
1310e8d8bef9SDimitry Andric     // some other bits if there are trailing zeros (unused components at front)
1311e8d8bef9SDimitry Andric     // and update offset.
1312e8d8bef9SDimitry Andric     DemandedElts = (1 << ActiveBits) - 1;
1313e8d8bef9SDimitry Andric 
1314e8d8bef9SDimitry Andric     if (UnusedComponentsAtFront > 0) {
1315e8d8bef9SDimitry Andric       static const unsigned InvalidOffsetIdx = 0xf;
1316e8d8bef9SDimitry Andric 
1317e8d8bef9SDimitry Andric       unsigned OffsetIdx;
1318e8d8bef9SDimitry Andric       switch (II.getIntrinsicID()) {
1319e8d8bef9SDimitry Andric       case Intrinsic::amdgcn_raw_buffer_load:
132006c3fb27SDimitry Andric       case Intrinsic::amdgcn_raw_ptr_buffer_load:
1321e8d8bef9SDimitry Andric         OffsetIdx = 1;
1322e8d8bef9SDimitry Andric         break;
1323e8d8bef9SDimitry Andric       case Intrinsic::amdgcn_s_buffer_load:
1324e8d8bef9SDimitry Andric         // If resulting type is vec3, there is no point in trimming the
1325e8d8bef9SDimitry Andric         // load with updated offset, as the vec3 would most likely be widened to
1326e8d8bef9SDimitry Andric         // vec4 anyway during lowering.
1327e8d8bef9SDimitry Andric         if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1328e8d8bef9SDimitry Andric           OffsetIdx = InvalidOffsetIdx;
1329e8d8bef9SDimitry Andric         else
1330e8d8bef9SDimitry Andric           OffsetIdx = 1;
1331e8d8bef9SDimitry Andric         break;
1332e8d8bef9SDimitry Andric       case Intrinsic::amdgcn_struct_buffer_load:
133306c3fb27SDimitry Andric       case Intrinsic::amdgcn_struct_ptr_buffer_load:
1334e8d8bef9SDimitry Andric         OffsetIdx = 2;
1335e8d8bef9SDimitry Andric         break;
1336e8d8bef9SDimitry Andric       default:
1337e8d8bef9SDimitry Andric         // TODO: handle tbuffer* intrinsics.
1338e8d8bef9SDimitry Andric         OffsetIdx = InvalidOffsetIdx;
1339e8d8bef9SDimitry Andric         break;
1340e8d8bef9SDimitry Andric       }
1341e8d8bef9SDimitry Andric 
1342e8d8bef9SDimitry Andric       if (OffsetIdx != InvalidOffsetIdx) {
1343e8d8bef9SDimitry Andric         // Clear demanded bits and update the offset.
1344e8d8bef9SDimitry Andric         DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1345bdd1243dSDimitry Andric         auto *Offset = Args[OffsetIdx];
1346e8d8bef9SDimitry Andric         unsigned SingleComponentSizeInBits =
1347bdd1243dSDimitry Andric             IC.getDataLayout().getTypeSizeInBits(EltTy);
1348e8d8bef9SDimitry Andric         unsigned OffsetAdd =
1349e8d8bef9SDimitry Andric             UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1350e8d8bef9SDimitry Andric         auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
1351e8d8bef9SDimitry Andric         Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
1352e8d8bef9SDimitry Andric       }
1353e8d8bef9SDimitry Andric     }
1354e8d8bef9SDimitry Andric   } else {
1355e8d8bef9SDimitry Andric     // Image case.
1356e8d8bef9SDimitry Andric 
1357bdd1243dSDimitry Andric     ConstantInt *DMask = cast<ConstantInt>(Args[DMaskIdx]);
1358e8d8bef9SDimitry Andric     unsigned DMaskVal = DMask->getZExtValue() & 0xf;
1359e8d8bef9SDimitry Andric 
1360cb14a3feSDimitry Andric     // dmask 0 has special semantics, do not simplify.
1361cb14a3feSDimitry Andric     if (DMaskVal == 0)
1362cb14a3feSDimitry Andric       return nullptr;
1363cb14a3feSDimitry Andric 
1364e8d8bef9SDimitry Andric     // Mask off values that are undefined because the dmask doesn't cover them
1365bdd1243dSDimitry Andric     DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;
1366e8d8bef9SDimitry Andric 
1367e8d8bef9SDimitry Andric     unsigned NewDMaskVal = 0;
136806c3fb27SDimitry Andric     unsigned OrigLdStIdx = 0;
1369e8d8bef9SDimitry Andric     for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1370e8d8bef9SDimitry Andric       const unsigned Bit = 1 << SrcIdx;
1371e8d8bef9SDimitry Andric       if (!!(DMaskVal & Bit)) {
137206c3fb27SDimitry Andric         if (!!DemandedElts[OrigLdStIdx])
1373e8d8bef9SDimitry Andric           NewDMaskVal |= Bit;
137406c3fb27SDimitry Andric         OrigLdStIdx++;
1375e8d8bef9SDimitry Andric       }
1376e8d8bef9SDimitry Andric     }
1377e8d8bef9SDimitry Andric 
1378e8d8bef9SDimitry Andric     if (DMaskVal != NewDMaskVal)
1379e8d8bef9SDimitry Andric       Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
1380e8d8bef9SDimitry Andric   }
1381e8d8bef9SDimitry Andric 
138206c3fb27SDimitry Andric   unsigned NewNumElts = DemandedElts.popcount();
1383e8d8bef9SDimitry Andric   if (!NewNumElts)
1384cb14a3feSDimitry Andric     return PoisonValue::get(IIVTy);
1385e8d8bef9SDimitry Andric 
1386e8d8bef9SDimitry Andric   if (NewNumElts >= VWidth && DemandedElts.isMask()) {
1387e8d8bef9SDimitry Andric     if (DMaskIdx >= 0)
1388e8d8bef9SDimitry Andric       II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1389e8d8bef9SDimitry Andric     return nullptr;
1390e8d8bef9SDimitry Andric   }
1391e8d8bef9SDimitry Andric 
1392e8d8bef9SDimitry Andric   // Validate function argument and return types, extracting overloaded types
1393e8d8bef9SDimitry Andric   // along the way.
1394e8d8bef9SDimitry Andric   SmallVector<Type *, 6> OverloadTys;
1395e8d8bef9SDimitry Andric   if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys))
1396e8d8bef9SDimitry Andric     return nullptr;
1397e8d8bef9SDimitry Andric 
1398e8d8bef9SDimitry Andric   Type *NewTy =
1399e8d8bef9SDimitry Andric       (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
1400e8d8bef9SDimitry Andric   OverloadTys[0] = NewTy;
1401e8d8bef9SDimitry Andric 
140206c3fb27SDimitry Andric   if (!IsLoad) {
140306c3fb27SDimitry Andric     SmallVector<int, 8> EltMask;
140406c3fb27SDimitry Andric     for (unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
140506c3fb27SDimitry Andric       if (DemandedElts[OrigStoreIdx])
140606c3fb27SDimitry Andric         EltMask.push_back(OrigStoreIdx);
140706c3fb27SDimitry Andric 
140806c3fb27SDimitry Andric     if (NewNumElts == 1)
140906c3fb27SDimitry Andric       Args[0] = IC.Builder.CreateExtractElement(II.getOperand(0), EltMask[0]);
141006c3fb27SDimitry Andric     else
141106c3fb27SDimitry Andric       Args[0] = IC.Builder.CreateShuffleVector(II.getOperand(0), EltMask);
141206c3fb27SDimitry Andric   }
141306c3fb27SDimitry Andric 
1414bdd1243dSDimitry Andric   Function *NewIntrin = Intrinsic::getDeclaration(
1415bdd1243dSDimitry Andric       II.getModule(), II.getIntrinsicID(), OverloadTys);
1416e8d8bef9SDimitry Andric   CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
1417e8d8bef9SDimitry Andric   NewCall->takeName(&II);
1418e8d8bef9SDimitry Andric   NewCall->copyMetadata(II);
1419e8d8bef9SDimitry Andric 
142006c3fb27SDimitry Andric   if (IsLoad) {
1421e8d8bef9SDimitry Andric     if (NewNumElts == 1) {
1422cb14a3feSDimitry Andric       return IC.Builder.CreateInsertElement(PoisonValue::get(IIVTy), NewCall,
142306c3fb27SDimitry Andric                                             DemandedElts.countr_zero());
1424e8d8bef9SDimitry Andric     }
1425e8d8bef9SDimitry Andric 
1426e8d8bef9SDimitry Andric     SmallVector<int, 8> EltMask;
1427e8d8bef9SDimitry Andric     unsigned NewLoadIdx = 0;
1428e8d8bef9SDimitry Andric     for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1429e8d8bef9SDimitry Andric       if (!!DemandedElts[OrigLoadIdx])
1430e8d8bef9SDimitry Andric         EltMask.push_back(NewLoadIdx++);
1431e8d8bef9SDimitry Andric       else
1432e8d8bef9SDimitry Andric         EltMask.push_back(NewNumElts);
1433e8d8bef9SDimitry Andric     }
1434e8d8bef9SDimitry Andric 
143506c3fb27SDimitry Andric     auto *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
1436e8d8bef9SDimitry Andric 
1437e8d8bef9SDimitry Andric     return Shuffle;
1438e8d8bef9SDimitry Andric   }
1439e8d8bef9SDimitry Andric 
144006c3fb27SDimitry Andric   return NewCall;
144106c3fb27SDimitry Andric }
144206c3fb27SDimitry Andric 
1443bdd1243dSDimitry Andric std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
1444e8d8bef9SDimitry Andric     InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1445e8d8bef9SDimitry Andric     APInt &UndefElts2, APInt &UndefElts3,
1446e8d8bef9SDimitry Andric     std::function<void(Instruction *, unsigned, APInt, APInt &)>
1447e8d8bef9SDimitry Andric         SimplifyAndSetOp) const {
1448e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
1449e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_raw_buffer_load:
145006c3fb27SDimitry Andric   case Intrinsic::amdgcn_raw_ptr_buffer_load:
1451e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_raw_buffer_load_format:
145206c3fb27SDimitry Andric   case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
1453e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_raw_tbuffer_load:
145406c3fb27SDimitry Andric   case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
1455e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_s_buffer_load:
1456e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_struct_buffer_load:
145706c3fb27SDimitry Andric   case Intrinsic::amdgcn_struct_ptr_buffer_load:
1458e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_struct_buffer_load_format:
145906c3fb27SDimitry Andric   case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
1460e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_struct_tbuffer_load:
146106c3fb27SDimitry Andric   case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
1462e8d8bef9SDimitry Andric     return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
1463e8d8bef9SDimitry Andric   default: {
1464e8d8bef9SDimitry Andric     if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
1465e8d8bef9SDimitry Andric       return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
1466e8d8bef9SDimitry Andric     }
1467e8d8bef9SDimitry Andric     break;
1468e8d8bef9SDimitry Andric   }
1469e8d8bef9SDimitry Andric   }
1470bdd1243dSDimitry Andric   return std::nullopt;
1471e8d8bef9SDimitry Andric }
1472