xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (revision 04eeddc0aa8e0a417a16eaf9d7d095207f4a8623)
1e8d8bef9SDimitry Andric //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric // \file
10e8d8bef9SDimitry Andric // This file implements a TargetTransformInfo analysis pass specific to the
11e8d8bef9SDimitry Andric // AMDGPU target machine. It uses the target's detailed information to provide
12e8d8bef9SDimitry Andric // more precise answers to certain TTI queries, while letting the target
13e8d8bef9SDimitry Andric // independent and default TTI implementations handle the rest.
14e8d8bef9SDimitry Andric //
15e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
16e8d8bef9SDimitry Andric 
17e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h"
18e8d8bef9SDimitry Andric #include "AMDGPUTargetTransformInfo.h"
19e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
20e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
21e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h"
22e8d8bef9SDimitry Andric 
23e8d8bef9SDimitry Andric using namespace llvm;
24e8d8bef9SDimitry Andric 
25e8d8bef9SDimitry Andric #define DEBUG_TYPE "AMDGPUtti"
26e8d8bef9SDimitry Andric 
27e8d8bef9SDimitry Andric namespace {
28e8d8bef9SDimitry Andric 
29e8d8bef9SDimitry Andric struct AMDGPUImageDMaskIntrinsic {
30e8d8bef9SDimitry Andric   unsigned Intr;
31e8d8bef9SDimitry Andric };
32e8d8bef9SDimitry Andric 
33e8d8bef9SDimitry Andric #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
34e8d8bef9SDimitry Andric #include "InstCombineTables.inc"
35e8d8bef9SDimitry Andric 
36e8d8bef9SDimitry Andric } // end anonymous namespace
37e8d8bef9SDimitry Andric 
38e8d8bef9SDimitry Andric // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
39e8d8bef9SDimitry Andric //
40e8d8bef9SDimitry Andric // A single NaN input is folded to minnum, so we rely on that folding for
41e8d8bef9SDimitry Andric // handling NaNs.
42e8d8bef9SDimitry Andric static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
43e8d8bef9SDimitry Andric                            const APFloat &Src2) {
44e8d8bef9SDimitry Andric   APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
45e8d8bef9SDimitry Andric 
46e8d8bef9SDimitry Andric   APFloat::cmpResult Cmp0 = Max3.compare(Src0);
47e8d8bef9SDimitry Andric   assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
48e8d8bef9SDimitry Andric   if (Cmp0 == APFloat::cmpEqual)
49e8d8bef9SDimitry Andric     return maxnum(Src1, Src2);
50e8d8bef9SDimitry Andric 
51e8d8bef9SDimitry Andric   APFloat::cmpResult Cmp1 = Max3.compare(Src1);
52e8d8bef9SDimitry Andric   assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
53e8d8bef9SDimitry Andric   if (Cmp1 == APFloat::cmpEqual)
54e8d8bef9SDimitry Andric     return maxnum(Src0, Src2);
55e8d8bef9SDimitry Andric 
56e8d8bef9SDimitry Andric   return maxnum(Src0, Src1);
57e8d8bef9SDimitry Andric }
58e8d8bef9SDimitry Andric 
59e8d8bef9SDimitry Andric // Check if a value can be converted to a 16-bit value without losing
60e8d8bef9SDimitry Andric // precision.
61*04eeddc0SDimitry Andric // The value is expected to be either a float (IsFloat = true) or an unsigned
62*04eeddc0SDimitry Andric // integer (IsFloat = false).
63*04eeddc0SDimitry Andric static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
64e8d8bef9SDimitry Andric   Type *VTy = V.getType();
65e8d8bef9SDimitry Andric   if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
66e8d8bef9SDimitry Andric     // The value is already 16-bit, so we don't want to convert to 16-bit again!
67e8d8bef9SDimitry Andric     return false;
68e8d8bef9SDimitry Andric   }
69*04eeddc0SDimitry Andric   if (IsFloat) {
70e8d8bef9SDimitry Andric     if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
71*04eeddc0SDimitry Andric       // We need to check that if we cast the index down to a half, we do not
72*04eeddc0SDimitry Andric       // lose precision.
73e8d8bef9SDimitry Andric       APFloat FloatValue(ConstFloat->getValueAPF());
74e8d8bef9SDimitry Andric       bool LosesInfo = true;
75*04eeddc0SDimitry Andric       FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
76*04eeddc0SDimitry Andric                          &LosesInfo);
77e8d8bef9SDimitry Andric       return !LosesInfo;
78e8d8bef9SDimitry Andric     }
79*04eeddc0SDimitry Andric   } else {
80*04eeddc0SDimitry Andric     if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
81*04eeddc0SDimitry Andric       // We need to check that if we cast the index down to an i16, we do not
82*04eeddc0SDimitry Andric       // lose precision.
83*04eeddc0SDimitry Andric       APInt IntValue(ConstInt->getValue());
84*04eeddc0SDimitry Andric       return IntValue.getActiveBits() <= 16;
85*04eeddc0SDimitry Andric     }
86*04eeddc0SDimitry Andric   }
87*04eeddc0SDimitry Andric 
88e8d8bef9SDimitry Andric   Value *CastSrc;
89*04eeddc0SDimitry Andric   bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
90*04eeddc0SDimitry Andric                        : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
91*04eeddc0SDimitry Andric   if (IsExt) {
92e8d8bef9SDimitry Andric     Type *CastSrcTy = CastSrc->getType();
93e8d8bef9SDimitry Andric     if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
94e8d8bef9SDimitry Andric       return true;
95e8d8bef9SDimitry Andric   }
96e8d8bef9SDimitry Andric 
97e8d8bef9SDimitry Andric   return false;
98e8d8bef9SDimitry Andric }
99e8d8bef9SDimitry Andric 
100e8d8bef9SDimitry Andric // Convert a value to 16-bit.
101e8d8bef9SDimitry Andric static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
102e8d8bef9SDimitry Andric   Type *VTy = V.getType();
103e8d8bef9SDimitry Andric   if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
104e8d8bef9SDimitry Andric     return cast<Instruction>(&V)->getOperand(0);
105e8d8bef9SDimitry Andric   if (VTy->isIntegerTy())
106e8d8bef9SDimitry Andric     return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
107e8d8bef9SDimitry Andric   if (VTy->isFloatingPointTy())
108e8d8bef9SDimitry Andric     return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
109e8d8bef9SDimitry Andric 
110e8d8bef9SDimitry Andric   llvm_unreachable("Should never be called!");
111e8d8bef9SDimitry Andric }
112e8d8bef9SDimitry Andric 
113*04eeddc0SDimitry Andric /// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with
114*04eeddc0SDimitry Andric /// the modified arguments.
115*04eeddc0SDimitry Andric static Optional<Instruction *> modifyIntrinsicCall(
116*04eeddc0SDimitry Andric     IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC,
117*04eeddc0SDimitry Andric     std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
118*04eeddc0SDimitry Andric         Func) {
119*04eeddc0SDimitry Andric   SmallVector<Type *, 4> ArgTys;
120*04eeddc0SDimitry Andric   if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
121*04eeddc0SDimitry Andric     return None;
122*04eeddc0SDimitry Andric 
123*04eeddc0SDimitry Andric   SmallVector<Value *, 8> Args(II.args());
124*04eeddc0SDimitry Andric 
125*04eeddc0SDimitry Andric   // Modify arguments and types
126*04eeddc0SDimitry Andric   Func(Args, ArgTys);
127*04eeddc0SDimitry Andric 
128*04eeddc0SDimitry Andric   Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys);
129*04eeddc0SDimitry Andric 
130*04eeddc0SDimitry Andric   CallInst *NewCall = IC.Builder.CreateCall(I, Args);
131*04eeddc0SDimitry Andric   NewCall->takeName(&II);
132*04eeddc0SDimitry Andric   NewCall->copyMetadata(II);
133*04eeddc0SDimitry Andric   if (isa<FPMathOperator>(NewCall))
134*04eeddc0SDimitry Andric     NewCall->copyFastMathFlags(&II);
135*04eeddc0SDimitry Andric 
136*04eeddc0SDimitry Andric   // Erase and replace uses
137*04eeddc0SDimitry Andric   if (!II.getType()->isVoidTy())
138*04eeddc0SDimitry Andric     IC.replaceInstUsesWith(II, NewCall);
139*04eeddc0SDimitry Andric   return IC.eraseInstFromFunction(II);
140*04eeddc0SDimitry Andric }
141*04eeddc0SDimitry Andric 
142e8d8bef9SDimitry Andric static Optional<Instruction *>
143e8d8bef9SDimitry Andric simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
144e8d8bef9SDimitry Andric                              const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
145e8d8bef9SDimitry Andric                              IntrinsicInst &II, InstCombiner &IC) {
146*04eeddc0SDimitry Andric   // Optimize _L to _LZ when _L is zero
147*04eeddc0SDimitry Andric   if (const auto *LZMappingInfo =
148*04eeddc0SDimitry Andric           AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
149*04eeddc0SDimitry Andric     if (auto *ConstantLod =
150*04eeddc0SDimitry Andric             dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
151*04eeddc0SDimitry Andric       if (ConstantLod->isZero() || ConstantLod->isNegative()) {
152*04eeddc0SDimitry Andric         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
153*04eeddc0SDimitry Andric             AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
154*04eeddc0SDimitry Andric                                                      ImageDimIntr->Dim);
155*04eeddc0SDimitry Andric         return modifyIntrinsicCall(
156*04eeddc0SDimitry Andric             II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
157*04eeddc0SDimitry Andric               Args.erase(Args.begin() + ImageDimIntr->LodIndex);
158*04eeddc0SDimitry Andric             });
159*04eeddc0SDimitry Andric       }
160*04eeddc0SDimitry Andric     }
161*04eeddc0SDimitry Andric   }
162*04eeddc0SDimitry Andric 
163*04eeddc0SDimitry Andric   // Optimize _mip away, when 'lod' is zero
164*04eeddc0SDimitry Andric   if (const auto *MIPMappingInfo =
165*04eeddc0SDimitry Andric           AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
166*04eeddc0SDimitry Andric     if (auto *ConstantMip =
167*04eeddc0SDimitry Andric             dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
168*04eeddc0SDimitry Andric       if (ConstantMip->isZero()) {
169*04eeddc0SDimitry Andric         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
170*04eeddc0SDimitry Andric             AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
171*04eeddc0SDimitry Andric                                                      ImageDimIntr->Dim);
172*04eeddc0SDimitry Andric         return modifyIntrinsicCall(
173*04eeddc0SDimitry Andric             II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
174*04eeddc0SDimitry Andric               Args.erase(Args.begin() + ImageDimIntr->MipIndex);
175*04eeddc0SDimitry Andric             });
176*04eeddc0SDimitry Andric       }
177*04eeddc0SDimitry Andric     }
178*04eeddc0SDimitry Andric   }
179*04eeddc0SDimitry Andric 
180*04eeddc0SDimitry Andric   // Optimize _bias away when 'bias' is zero
181*04eeddc0SDimitry Andric   if (const auto *BiasMappingInfo =
182*04eeddc0SDimitry Andric           AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) {
183*04eeddc0SDimitry Andric     if (auto *ConstantBias =
184*04eeddc0SDimitry Andric             dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
185*04eeddc0SDimitry Andric       if (ConstantBias->isZero()) {
186*04eeddc0SDimitry Andric         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
187*04eeddc0SDimitry Andric             AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
188*04eeddc0SDimitry Andric                                                      ImageDimIntr->Dim);
189*04eeddc0SDimitry Andric         return modifyIntrinsicCall(
190*04eeddc0SDimitry Andric             II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
191*04eeddc0SDimitry Andric               Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
192*04eeddc0SDimitry Andric               ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
193*04eeddc0SDimitry Andric             });
194*04eeddc0SDimitry Andric       }
195*04eeddc0SDimitry Andric     }
196*04eeddc0SDimitry Andric   }
197*04eeddc0SDimitry Andric 
198*04eeddc0SDimitry Andric   // Optimize _offset away when 'offset' is zero
199*04eeddc0SDimitry Andric   if (const auto *OffsetMappingInfo =
200*04eeddc0SDimitry Andric           AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) {
201*04eeddc0SDimitry Andric     if (auto *ConstantOffset =
202*04eeddc0SDimitry Andric             dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) {
203*04eeddc0SDimitry Andric       if (ConstantOffset->isZero()) {
204*04eeddc0SDimitry Andric         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
205*04eeddc0SDimitry Andric             AMDGPU::getImageDimIntrinsicByBaseOpcode(
206*04eeddc0SDimitry Andric                 OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
207*04eeddc0SDimitry Andric         return modifyIntrinsicCall(
208*04eeddc0SDimitry Andric             II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
209*04eeddc0SDimitry Andric               Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
210*04eeddc0SDimitry Andric             });
211*04eeddc0SDimitry Andric       }
212*04eeddc0SDimitry Andric     }
213*04eeddc0SDimitry Andric   }
214*04eeddc0SDimitry Andric 
215*04eeddc0SDimitry Andric   // Try to use A16 or G16
216e8d8bef9SDimitry Andric   if (!ST->hasA16() && !ST->hasG16())
217e8d8bef9SDimitry Andric     return None;
218e8d8bef9SDimitry Andric 
219*04eeddc0SDimitry Andric   // Address is interpreted as float if the instruction has a sampler or as
220*04eeddc0SDimitry Andric   // unsigned int if there is no sampler.
221*04eeddc0SDimitry Andric   bool HasSampler =
222*04eeddc0SDimitry Andric       AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler;
223e8d8bef9SDimitry Andric   bool FloatCoord = false;
224e8d8bef9SDimitry Andric   // true means derivatives can be converted to 16 bit, coordinates not
225e8d8bef9SDimitry Andric   bool OnlyDerivatives = false;
226e8d8bef9SDimitry Andric 
227e8d8bef9SDimitry Andric   for (unsigned OperandIndex = ImageDimIntr->GradientStart;
228e8d8bef9SDimitry Andric        OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
229e8d8bef9SDimitry Andric     Value *Coord = II.getOperand(OperandIndex);
230e8d8bef9SDimitry Andric     // If the values are not derived from 16-bit values, we cannot optimize.
231*04eeddc0SDimitry Andric     if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
232e8d8bef9SDimitry Andric       if (OperandIndex < ImageDimIntr->CoordStart ||
233e8d8bef9SDimitry Andric           ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
234e8d8bef9SDimitry Andric         return None;
235e8d8bef9SDimitry Andric       }
236e8d8bef9SDimitry Andric       // All gradients can be converted, so convert only them
237e8d8bef9SDimitry Andric       OnlyDerivatives = true;
238e8d8bef9SDimitry Andric       break;
239e8d8bef9SDimitry Andric     }
240e8d8bef9SDimitry Andric 
241e8d8bef9SDimitry Andric     assert(OperandIndex == ImageDimIntr->GradientStart ||
242e8d8bef9SDimitry Andric            FloatCoord == Coord->getType()->isFloatingPointTy());
243e8d8bef9SDimitry Andric     FloatCoord = Coord->getType()->isFloatingPointTy();
244e8d8bef9SDimitry Andric   }
245e8d8bef9SDimitry Andric 
246*04eeddc0SDimitry Andric   if (!OnlyDerivatives && !ST->hasA16())
247e8d8bef9SDimitry Andric     OnlyDerivatives = true; // Only supports G16
248*04eeddc0SDimitry Andric 
249*04eeddc0SDimitry Andric   // Check if there is a bias parameter and if it can be converted to f16
250*04eeddc0SDimitry Andric   if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
251*04eeddc0SDimitry Andric     Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
252*04eeddc0SDimitry Andric     assert(HasSampler &&
253*04eeddc0SDimitry Andric            "Only image instructions with a sampler can have a bias");
254*04eeddc0SDimitry Andric     if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
255*04eeddc0SDimitry Andric       OnlyDerivatives = true;
256e8d8bef9SDimitry Andric   }
257e8d8bef9SDimitry Andric 
258*04eeddc0SDimitry Andric   if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
259*04eeddc0SDimitry Andric                                                ImageDimIntr->CoordStart))
260*04eeddc0SDimitry Andric     return None;
261*04eeddc0SDimitry Andric 
262e8d8bef9SDimitry Andric   Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
263e8d8bef9SDimitry Andric                                : Type::getInt16Ty(II.getContext());
264e8d8bef9SDimitry Andric 
265*04eeddc0SDimitry Andric   return modifyIntrinsicCall(
266*04eeddc0SDimitry Andric       II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
267e8d8bef9SDimitry Andric         ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
268*04eeddc0SDimitry Andric         if (!OnlyDerivatives) {
269e8d8bef9SDimitry Andric           ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
270e8d8bef9SDimitry Andric 
271*04eeddc0SDimitry Andric           // Change the bias type
272*04eeddc0SDimitry Andric           if (ImageDimIntr->NumBiasArgs != 0)
273*04eeddc0SDimitry Andric             ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
274*04eeddc0SDimitry Andric         }
275e8d8bef9SDimitry Andric 
276e8d8bef9SDimitry Andric         unsigned EndIndex =
277e8d8bef9SDimitry Andric             OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
278e8d8bef9SDimitry Andric         for (unsigned OperandIndex = ImageDimIntr->GradientStart;
279e8d8bef9SDimitry Andric              OperandIndex < EndIndex; OperandIndex++) {
280e8d8bef9SDimitry Andric           Args[OperandIndex] =
281e8d8bef9SDimitry Andric               convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
282e8d8bef9SDimitry Andric         }
283e8d8bef9SDimitry Andric 
284*04eeddc0SDimitry Andric         // Convert the bias
285*04eeddc0SDimitry Andric         if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
286*04eeddc0SDimitry Andric           Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
287*04eeddc0SDimitry Andric           Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
288*04eeddc0SDimitry Andric         }
289*04eeddc0SDimitry Andric       });
290e8d8bef9SDimitry Andric }
291e8d8bef9SDimitry Andric 
292e8d8bef9SDimitry Andric bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
293e8d8bef9SDimitry Andric                                            InstCombiner &IC) const {
294e8d8bef9SDimitry Andric   // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
295e8d8bef9SDimitry Andric   // infinity, gives +0.0. If we can prove we don't have one of the special
296e8d8bef9SDimitry Andric   // cases then we can use a normal multiply instead.
297e8d8bef9SDimitry Andric   // TODO: Create and use isKnownFiniteNonZero instead of just matching
298e8d8bef9SDimitry Andric   // constants here.
299e8d8bef9SDimitry Andric   if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
300e8d8bef9SDimitry Andric       match(Op1, PatternMatch::m_FiniteNonZero())) {
301e8d8bef9SDimitry Andric     // One operand is not zero or infinity or NaN.
302e8d8bef9SDimitry Andric     return true;
303e8d8bef9SDimitry Andric   }
304e8d8bef9SDimitry Andric   auto *TLI = &IC.getTargetLibraryInfo();
305e8d8bef9SDimitry Andric   if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
306e8d8bef9SDimitry Andric       isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
307e8d8bef9SDimitry Andric     // Neither operand is infinity or NaN.
308e8d8bef9SDimitry Andric     return true;
309e8d8bef9SDimitry Andric   }
310e8d8bef9SDimitry Andric   return false;
311e8d8bef9SDimitry Andric }
312e8d8bef9SDimitry Andric 
313e8d8bef9SDimitry Andric Optional<Instruction *>
314e8d8bef9SDimitry Andric GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
315e8d8bef9SDimitry Andric   Intrinsic::ID IID = II.getIntrinsicID();
316e8d8bef9SDimitry Andric   switch (IID) {
317e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_rcp: {
318e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
319e8d8bef9SDimitry Andric 
320e8d8bef9SDimitry Andric     // TODO: Move to ConstantFolding/InstSimplify?
321e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
322e8d8bef9SDimitry Andric       Type *Ty = II.getType();
323e8d8bef9SDimitry Andric       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
324e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, QNaN);
325e8d8bef9SDimitry Andric     }
326e8d8bef9SDimitry Andric 
327e8d8bef9SDimitry Andric     if (II.isStrictFP())
328e8d8bef9SDimitry Andric       break;
329e8d8bef9SDimitry Andric 
330e8d8bef9SDimitry Andric     if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
331e8d8bef9SDimitry Andric       const APFloat &ArgVal = C->getValueAPF();
332e8d8bef9SDimitry Andric       APFloat Val(ArgVal.getSemantics(), 1);
333e8d8bef9SDimitry Andric       Val.divide(ArgVal, APFloat::rmNearestTiesToEven);
334e8d8bef9SDimitry Andric 
335e8d8bef9SDimitry Andric       // This is more precise than the instruction may give.
336e8d8bef9SDimitry Andric       //
337e8d8bef9SDimitry Andric       // TODO: The instruction always flushes denormal results (except for f16),
338e8d8bef9SDimitry Andric       // should this also?
339e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
340e8d8bef9SDimitry Andric     }
341e8d8bef9SDimitry Andric 
342e8d8bef9SDimitry Andric     break;
343e8d8bef9SDimitry Andric   }
344e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_rsq: {
345e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
346e8d8bef9SDimitry Andric 
347e8d8bef9SDimitry Andric     // TODO: Move to ConstantFolding/InstSimplify?
348e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
349e8d8bef9SDimitry Andric       Type *Ty = II.getType();
350e8d8bef9SDimitry Andric       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
351e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, QNaN);
352e8d8bef9SDimitry Andric     }
353e8d8bef9SDimitry Andric 
354e8d8bef9SDimitry Andric     break;
355e8d8bef9SDimitry Andric   }
356e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_frexp_mant:
357e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_frexp_exp: {
358e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
359e8d8bef9SDimitry Andric     if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
360e8d8bef9SDimitry Andric       int Exp;
361e8d8bef9SDimitry Andric       APFloat Significand =
362e8d8bef9SDimitry Andric           frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
363e8d8bef9SDimitry Andric 
364e8d8bef9SDimitry Andric       if (IID == Intrinsic::amdgcn_frexp_mant) {
365e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(
366e8d8bef9SDimitry Andric             II, ConstantFP::get(II.getContext(), Significand));
367e8d8bef9SDimitry Andric       }
368e8d8bef9SDimitry Andric 
369e8d8bef9SDimitry Andric       // Match instruction special case behavior.
370e8d8bef9SDimitry Andric       if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
371e8d8bef9SDimitry Andric         Exp = 0;
372e8d8bef9SDimitry Andric 
373e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
374e8d8bef9SDimitry Andric     }
375e8d8bef9SDimitry Andric 
376e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
377e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
378e8d8bef9SDimitry Andric     }
379e8d8bef9SDimitry Andric 
380e8d8bef9SDimitry Andric     break;
381e8d8bef9SDimitry Andric   }
382e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_class: {
383e8d8bef9SDimitry Andric     enum {
384e8d8bef9SDimitry Andric       S_NAN = 1 << 0,       // Signaling NaN
385e8d8bef9SDimitry Andric       Q_NAN = 1 << 1,       // Quiet NaN
386e8d8bef9SDimitry Andric       N_INFINITY = 1 << 2,  // Negative infinity
387e8d8bef9SDimitry Andric       N_NORMAL = 1 << 3,    // Negative normal
388e8d8bef9SDimitry Andric       N_SUBNORMAL = 1 << 4, // Negative subnormal
389e8d8bef9SDimitry Andric       N_ZERO = 1 << 5,      // Negative zero
390e8d8bef9SDimitry Andric       P_ZERO = 1 << 6,      // Positive zero
391e8d8bef9SDimitry Andric       P_SUBNORMAL = 1 << 7, // Positive subnormal
392e8d8bef9SDimitry Andric       P_NORMAL = 1 << 8,    // Positive normal
393e8d8bef9SDimitry Andric       P_INFINITY = 1 << 9   // Positive infinity
394e8d8bef9SDimitry Andric     };
395e8d8bef9SDimitry Andric 
396e8d8bef9SDimitry Andric     const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
397e8d8bef9SDimitry Andric                               N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL |
398e8d8bef9SDimitry Andric                               P_NORMAL | P_INFINITY;
399e8d8bef9SDimitry Andric 
400e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
401e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
402e8d8bef9SDimitry Andric     const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
403e8d8bef9SDimitry Andric     if (!CMask) {
404e8d8bef9SDimitry Andric       if (isa<UndefValue>(Src0)) {
405e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
406e8d8bef9SDimitry Andric       }
407e8d8bef9SDimitry Andric 
408e8d8bef9SDimitry Andric       if (isa<UndefValue>(Src1)) {
409e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
410e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), false));
411e8d8bef9SDimitry Andric       }
412e8d8bef9SDimitry Andric       break;
413e8d8bef9SDimitry Andric     }
414e8d8bef9SDimitry Andric 
415e8d8bef9SDimitry Andric     uint32_t Mask = CMask->getZExtValue();
416e8d8bef9SDimitry Andric 
417e8d8bef9SDimitry Andric     // If all tests are made, it doesn't matter what the value is.
418e8d8bef9SDimitry Andric     if ((Mask & FullMask) == FullMask) {
419e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
420e8d8bef9SDimitry Andric     }
421e8d8bef9SDimitry Andric 
422e8d8bef9SDimitry Andric     if ((Mask & FullMask) == 0) {
423e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
424e8d8bef9SDimitry Andric     }
425e8d8bef9SDimitry Andric 
426e8d8bef9SDimitry Andric     if (Mask == (S_NAN | Q_NAN)) {
427e8d8bef9SDimitry Andric       // Equivalent of isnan. Replace with standard fcmp.
428e8d8bef9SDimitry Andric       Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
429e8d8bef9SDimitry Andric       FCmp->takeName(&II);
430e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, FCmp);
431e8d8bef9SDimitry Andric     }
432e8d8bef9SDimitry Andric 
433e8d8bef9SDimitry Andric     if (Mask == (N_ZERO | P_ZERO)) {
434e8d8bef9SDimitry Andric       // Equivalent of == 0.
435e8d8bef9SDimitry Andric       Value *FCmp =
436e8d8bef9SDimitry Andric           IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));
437e8d8bef9SDimitry Andric 
438e8d8bef9SDimitry Andric       FCmp->takeName(&II);
439e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, FCmp);
440e8d8bef9SDimitry Andric     }
441e8d8bef9SDimitry Andric 
442e8d8bef9SDimitry Andric     // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
443e8d8bef9SDimitry Andric     if (((Mask & S_NAN) || (Mask & Q_NAN)) &&
444e8d8bef9SDimitry Andric         isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) {
445e8d8bef9SDimitry Andric       return IC.replaceOperand(
446e8d8bef9SDimitry Andric           II, 1, ConstantInt::get(Src1->getType(), Mask & ~(S_NAN | Q_NAN)));
447e8d8bef9SDimitry Andric     }
448e8d8bef9SDimitry Andric 
449e8d8bef9SDimitry Andric     const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
450e8d8bef9SDimitry Andric     if (!CVal) {
451e8d8bef9SDimitry Andric       if (isa<UndefValue>(Src0)) {
452e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
453e8d8bef9SDimitry Andric       }
454e8d8bef9SDimitry Andric 
455e8d8bef9SDimitry Andric       // Clamp mask to used bits
456e8d8bef9SDimitry Andric       if ((Mask & FullMask) != Mask) {
457e8d8bef9SDimitry Andric         CallInst *NewCall = IC.Builder.CreateCall(
458e8d8bef9SDimitry Andric             II.getCalledFunction(),
459e8d8bef9SDimitry Andric             {Src0, ConstantInt::get(Src1->getType(), Mask & FullMask)});
460e8d8bef9SDimitry Andric 
461e8d8bef9SDimitry Andric         NewCall->takeName(&II);
462e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, NewCall);
463e8d8bef9SDimitry Andric       }
464e8d8bef9SDimitry Andric 
465e8d8bef9SDimitry Andric       break;
466e8d8bef9SDimitry Andric     }
467e8d8bef9SDimitry Andric 
468e8d8bef9SDimitry Andric     const APFloat &Val = CVal->getValueAPF();
469e8d8bef9SDimitry Andric 
470e8d8bef9SDimitry Andric     bool Result =
471e8d8bef9SDimitry Andric         ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
472e8d8bef9SDimitry Andric         ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
473e8d8bef9SDimitry Andric         ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
474e8d8bef9SDimitry Andric         ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
475e8d8bef9SDimitry Andric         ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
476e8d8bef9SDimitry Andric         ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
477e8d8bef9SDimitry Andric         ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
478e8d8bef9SDimitry Andric         ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
479e8d8bef9SDimitry Andric         ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
480e8d8bef9SDimitry Andric         ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
481e8d8bef9SDimitry Andric 
482e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
483e8d8bef9SDimitry Andric   }
484e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pkrtz: {
485e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
486e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
487e8d8bef9SDimitry Andric     if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
488e8d8bef9SDimitry Andric       if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
489e8d8bef9SDimitry Andric         const fltSemantics &HalfSem =
490e8d8bef9SDimitry Andric             II.getType()->getScalarType()->getFltSemantics();
491e8d8bef9SDimitry Andric         bool LosesInfo;
492e8d8bef9SDimitry Andric         APFloat Val0 = C0->getValueAPF();
493e8d8bef9SDimitry Andric         APFloat Val1 = C1->getValueAPF();
494e8d8bef9SDimitry Andric         Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
495e8d8bef9SDimitry Andric         Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
496e8d8bef9SDimitry Andric 
497e8d8bef9SDimitry Andric         Constant *Folded =
498e8d8bef9SDimitry Andric             ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
499e8d8bef9SDimitry Andric                                  ConstantFP::get(II.getContext(), Val1)});
500e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Folded);
501e8d8bef9SDimitry Andric       }
502e8d8bef9SDimitry Andric     }
503e8d8bef9SDimitry Andric 
504e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
505e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
506e8d8bef9SDimitry Andric     }
507e8d8bef9SDimitry Andric 
508e8d8bef9SDimitry Andric     break;
509e8d8bef9SDimitry Andric   }
510e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pknorm_i16:
511e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pknorm_u16:
512e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pk_i16:
513e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pk_u16: {
514e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
515e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
516e8d8bef9SDimitry Andric 
517e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
518e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
519e8d8bef9SDimitry Andric     }
520e8d8bef9SDimitry Andric 
521e8d8bef9SDimitry Andric     break;
522e8d8bef9SDimitry Andric   }
523e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_ubfe:
524e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_sbfe: {
525e8d8bef9SDimitry Andric     // Decompose simple cases into standard shifts.
526e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
527e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
528e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Src);
529e8d8bef9SDimitry Andric     }
530e8d8bef9SDimitry Andric 
531e8d8bef9SDimitry Andric     unsigned Width;
532e8d8bef9SDimitry Andric     Type *Ty = II.getType();
533e8d8bef9SDimitry Andric     unsigned IntSize = Ty->getIntegerBitWidth();
534e8d8bef9SDimitry Andric 
535e8d8bef9SDimitry Andric     ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
536e8d8bef9SDimitry Andric     if (CWidth) {
537e8d8bef9SDimitry Andric       Width = CWidth->getZExtValue();
538e8d8bef9SDimitry Andric       if ((Width & (IntSize - 1)) == 0) {
539e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(Ty));
540e8d8bef9SDimitry Andric       }
541e8d8bef9SDimitry Andric 
542e8d8bef9SDimitry Andric       // Hardware ignores high bits, so remove those.
543e8d8bef9SDimitry Andric       if (Width >= IntSize) {
544e8d8bef9SDimitry Andric         return IC.replaceOperand(
545e8d8bef9SDimitry Andric             II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
546e8d8bef9SDimitry Andric       }
547e8d8bef9SDimitry Andric     }
548e8d8bef9SDimitry Andric 
549e8d8bef9SDimitry Andric     unsigned Offset;
550e8d8bef9SDimitry Andric     ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
551e8d8bef9SDimitry Andric     if (COffset) {
552e8d8bef9SDimitry Andric       Offset = COffset->getZExtValue();
553e8d8bef9SDimitry Andric       if (Offset >= IntSize) {
554e8d8bef9SDimitry Andric         return IC.replaceOperand(
555e8d8bef9SDimitry Andric             II, 1,
556e8d8bef9SDimitry Andric             ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
557e8d8bef9SDimitry Andric       }
558e8d8bef9SDimitry Andric     }
559e8d8bef9SDimitry Andric 
560e8d8bef9SDimitry Andric     bool Signed = IID == Intrinsic::amdgcn_sbfe;
561e8d8bef9SDimitry Andric 
562e8d8bef9SDimitry Andric     if (!CWidth || !COffset)
563e8d8bef9SDimitry Andric       break;
564e8d8bef9SDimitry Andric 
565349cc55cSDimitry Andric     // The case of Width == 0 is handled above, which makes this transformation
566e8d8bef9SDimitry Andric     // safe.  If Width == 0, then the ashr and lshr instructions become poison
567e8d8bef9SDimitry Andric     // value since the shift amount would be equal to the bit size.
568e8d8bef9SDimitry Andric     assert(Width != 0);
569e8d8bef9SDimitry Andric 
570e8d8bef9SDimitry Andric     // TODO: This allows folding to undef when the hardware has specific
571e8d8bef9SDimitry Andric     // behavior?
572e8d8bef9SDimitry Andric     if (Offset + Width < IntSize) {
573e8d8bef9SDimitry Andric       Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
574e8d8bef9SDimitry Andric       Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
575e8d8bef9SDimitry Andric                                  : IC.Builder.CreateLShr(Shl, IntSize - Width);
576e8d8bef9SDimitry Andric       RightShift->takeName(&II);
577e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, RightShift);
578e8d8bef9SDimitry Andric     }
579e8d8bef9SDimitry Andric 
580e8d8bef9SDimitry Andric     Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
581e8d8bef9SDimitry Andric                                : IC.Builder.CreateLShr(Src, Offset);
582e8d8bef9SDimitry Andric 
583e8d8bef9SDimitry Andric     RightShift->takeName(&II);
584e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, RightShift);
585e8d8bef9SDimitry Andric   }
586e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_exp:
587e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_exp_compr: {
588e8d8bef9SDimitry Andric     ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
589e8d8bef9SDimitry Andric     unsigned EnBits = En->getZExtValue();
590e8d8bef9SDimitry Andric     if (EnBits == 0xf)
591e8d8bef9SDimitry Andric       break; // All inputs enabled.
592e8d8bef9SDimitry Andric 
593e8d8bef9SDimitry Andric     bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
594e8d8bef9SDimitry Andric     bool Changed = false;
595e8d8bef9SDimitry Andric     for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
596e8d8bef9SDimitry Andric       if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
597e8d8bef9SDimitry Andric           (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
598e8d8bef9SDimitry Andric         Value *Src = II.getArgOperand(I + 2);
599e8d8bef9SDimitry Andric         if (!isa<UndefValue>(Src)) {
600e8d8bef9SDimitry Andric           IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
601e8d8bef9SDimitry Andric           Changed = true;
602e8d8bef9SDimitry Andric         }
603e8d8bef9SDimitry Andric       }
604e8d8bef9SDimitry Andric     }
605e8d8bef9SDimitry Andric 
606e8d8bef9SDimitry Andric     if (Changed) {
607e8d8bef9SDimitry Andric       return &II;
608e8d8bef9SDimitry Andric     }
609e8d8bef9SDimitry Andric 
610e8d8bef9SDimitry Andric     break;
611e8d8bef9SDimitry Andric   }
612e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fmed3: {
613e8d8bef9SDimitry Andric     // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
614e8d8bef9SDimitry Andric     // for the shader.
615e8d8bef9SDimitry Andric 
616e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
617e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
618e8d8bef9SDimitry Andric     Value *Src2 = II.getArgOperand(2);
619e8d8bef9SDimitry Andric 
620e8d8bef9SDimitry Andric     // Checking for NaN before canonicalization provides better fidelity when
621e8d8bef9SDimitry Andric     // mapping other operations onto fmed3 since the order of operands is
622e8d8bef9SDimitry Andric     // unchanged.
623e8d8bef9SDimitry Andric     CallInst *NewCall = nullptr;
624e8d8bef9SDimitry Andric     if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
625e8d8bef9SDimitry Andric       NewCall = IC.Builder.CreateMinNum(Src1, Src2);
626e8d8bef9SDimitry Andric     } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
627e8d8bef9SDimitry Andric       NewCall = IC.Builder.CreateMinNum(Src0, Src2);
628e8d8bef9SDimitry Andric     } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
629e8d8bef9SDimitry Andric       NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
630e8d8bef9SDimitry Andric     }
631e8d8bef9SDimitry Andric 
632e8d8bef9SDimitry Andric     if (NewCall) {
633e8d8bef9SDimitry Andric       NewCall->copyFastMathFlags(&II);
634e8d8bef9SDimitry Andric       NewCall->takeName(&II);
635e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, NewCall);
636e8d8bef9SDimitry Andric     }
637e8d8bef9SDimitry Andric 
638e8d8bef9SDimitry Andric     bool Swap = false;
639e8d8bef9SDimitry Andric     // Canonicalize constants to RHS operands.
640e8d8bef9SDimitry Andric     //
641e8d8bef9SDimitry Andric     // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
642e8d8bef9SDimitry Andric     if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
643e8d8bef9SDimitry Andric       std::swap(Src0, Src1);
644e8d8bef9SDimitry Andric       Swap = true;
645e8d8bef9SDimitry Andric     }
646e8d8bef9SDimitry Andric 
647e8d8bef9SDimitry Andric     if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
648e8d8bef9SDimitry Andric       std::swap(Src1, Src2);
649e8d8bef9SDimitry Andric       Swap = true;
650e8d8bef9SDimitry Andric     }
651e8d8bef9SDimitry Andric 
652e8d8bef9SDimitry Andric     if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
653e8d8bef9SDimitry Andric       std::swap(Src0, Src1);
654e8d8bef9SDimitry Andric       Swap = true;
655e8d8bef9SDimitry Andric     }
656e8d8bef9SDimitry Andric 
657e8d8bef9SDimitry Andric     if (Swap) {
658e8d8bef9SDimitry Andric       II.setArgOperand(0, Src0);
659e8d8bef9SDimitry Andric       II.setArgOperand(1, Src1);
660e8d8bef9SDimitry Andric       II.setArgOperand(2, Src2);
661e8d8bef9SDimitry Andric       return &II;
662e8d8bef9SDimitry Andric     }
663e8d8bef9SDimitry Andric 
664e8d8bef9SDimitry Andric     if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
665e8d8bef9SDimitry Andric       if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
666e8d8bef9SDimitry Andric         if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
667e8d8bef9SDimitry Andric           APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
668e8d8bef9SDimitry Andric                                        C2->getValueAPF());
669e8d8bef9SDimitry Andric           return IC.replaceInstUsesWith(
670e8d8bef9SDimitry Andric               II, ConstantFP::get(IC.Builder.getContext(), Result));
671e8d8bef9SDimitry Andric         }
672e8d8bef9SDimitry Andric       }
673e8d8bef9SDimitry Andric     }
674e8d8bef9SDimitry Andric 
675e8d8bef9SDimitry Andric     break;
676e8d8bef9SDimitry Andric   }
677e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_icmp:
678e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fcmp: {
679e8d8bef9SDimitry Andric     const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
680e8d8bef9SDimitry Andric     // Guard against invalid arguments.
681e8d8bef9SDimitry Andric     int64_t CCVal = CC->getZExtValue();
682e8d8bef9SDimitry Andric     bool IsInteger = IID == Intrinsic::amdgcn_icmp;
683e8d8bef9SDimitry Andric     if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
684e8d8bef9SDimitry Andric                        CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
685e8d8bef9SDimitry Andric         (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
686e8d8bef9SDimitry Andric                         CCVal > CmpInst::LAST_FCMP_PREDICATE)))
687e8d8bef9SDimitry Andric       break;
688e8d8bef9SDimitry Andric 
689e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
690e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
691e8d8bef9SDimitry Andric 
692e8d8bef9SDimitry Andric     if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
693e8d8bef9SDimitry Andric       if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
694e8d8bef9SDimitry Andric         Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
695e8d8bef9SDimitry Andric         if (CCmp->isNullValue()) {
696e8d8bef9SDimitry Andric           return IC.replaceInstUsesWith(
697e8d8bef9SDimitry Andric               II, ConstantExpr::getSExt(CCmp, II.getType()));
698e8d8bef9SDimitry Andric         }
699e8d8bef9SDimitry Andric 
700e8d8bef9SDimitry Andric         // The result of V_ICMP/V_FCMP assembly instructions (which this
701e8d8bef9SDimitry Andric         // intrinsic exposes) is one bit per thread, masked with the EXEC
702e8d8bef9SDimitry Andric         // register (which contains the bitmask of live threads). So a
703e8d8bef9SDimitry Andric         // comparison that always returns true is the same as a read of the
704e8d8bef9SDimitry Andric         // EXEC register.
705e8d8bef9SDimitry Andric         Function *NewF = Intrinsic::getDeclaration(
706e8d8bef9SDimitry Andric             II.getModule(), Intrinsic::read_register, II.getType());
707e8d8bef9SDimitry Andric         Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
708e8d8bef9SDimitry Andric         MDNode *MD = MDNode::get(II.getContext(), MDArgs);
709e8d8bef9SDimitry Andric         Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
710e8d8bef9SDimitry Andric         CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
711349cc55cSDimitry Andric         NewCall->addFnAttr(Attribute::Convergent);
712e8d8bef9SDimitry Andric         NewCall->takeName(&II);
713e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, NewCall);
714e8d8bef9SDimitry Andric       }
715e8d8bef9SDimitry Andric 
716e8d8bef9SDimitry Andric       // Canonicalize constants to RHS.
717e8d8bef9SDimitry Andric       CmpInst::Predicate SwapPred =
718e8d8bef9SDimitry Andric           CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
719e8d8bef9SDimitry Andric       II.setArgOperand(0, Src1);
720e8d8bef9SDimitry Andric       II.setArgOperand(1, Src0);
721e8d8bef9SDimitry Andric       II.setArgOperand(
722e8d8bef9SDimitry Andric           2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
723e8d8bef9SDimitry Andric       return &II;
724e8d8bef9SDimitry Andric     }
725e8d8bef9SDimitry Andric 
726e8d8bef9SDimitry Andric     if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
727e8d8bef9SDimitry Andric       break;
728e8d8bef9SDimitry Andric 
729e8d8bef9SDimitry Andric     // Canonicalize compare eq with true value to compare != 0
730e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
731e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
732e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
733e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
734e8d8bef9SDimitry Andric     Value *ExtSrc;
735e8d8bef9SDimitry Andric     if (CCVal == CmpInst::ICMP_EQ &&
736e8d8bef9SDimitry Andric         ((match(Src1, PatternMatch::m_One()) &&
737e8d8bef9SDimitry Andric           match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
738e8d8bef9SDimitry Andric          (match(Src1, PatternMatch::m_AllOnes()) &&
739e8d8bef9SDimitry Andric           match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
740e8d8bef9SDimitry Andric         ExtSrc->getType()->isIntegerTy(1)) {
741e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, ConstantInt::getNullValue(Src1->getType()));
742e8d8bef9SDimitry Andric       IC.replaceOperand(II, 2,
743e8d8bef9SDimitry Andric                         ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
744e8d8bef9SDimitry Andric       return &II;
745e8d8bef9SDimitry Andric     }
746e8d8bef9SDimitry Andric 
747e8d8bef9SDimitry Andric     CmpInst::Predicate SrcPred;
748e8d8bef9SDimitry Andric     Value *SrcLHS;
749e8d8bef9SDimitry Andric     Value *SrcRHS;
750e8d8bef9SDimitry Andric 
751e8d8bef9SDimitry Andric     // Fold compare eq/ne with 0 from a compare result as the predicate to the
752e8d8bef9SDimitry Andric     // intrinsic. The typical use is a wave vote function in the library, which
753e8d8bef9SDimitry Andric     // will be fed from a user code condition compared with 0. Fold in the
754e8d8bef9SDimitry Andric     // redundant compare.
755e8d8bef9SDimitry Andric 
756e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
757e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.[if]cmp(a, b, pred)
758e8d8bef9SDimitry Andric     //
759e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
760e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.[if]cmp(a, b, inv pred)
761e8d8bef9SDimitry Andric     if (match(Src1, PatternMatch::m_Zero()) &&
762e8d8bef9SDimitry Andric         match(Src0, PatternMatch::m_ZExtOrSExt(
763e8d8bef9SDimitry Andric                         m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
764e8d8bef9SDimitry Andric                               PatternMatch::m_Value(SrcRHS))))) {
765e8d8bef9SDimitry Andric       if (CCVal == CmpInst::ICMP_EQ)
766e8d8bef9SDimitry Andric         SrcPred = CmpInst::getInversePredicate(SrcPred);
767e8d8bef9SDimitry Andric 
768e8d8bef9SDimitry Andric       Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
769e8d8bef9SDimitry Andric                                  ? Intrinsic::amdgcn_fcmp
770e8d8bef9SDimitry Andric                                  : Intrinsic::amdgcn_icmp;
771e8d8bef9SDimitry Andric 
772e8d8bef9SDimitry Andric       Type *Ty = SrcLHS->getType();
773e8d8bef9SDimitry Andric       if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
774e8d8bef9SDimitry Andric         // Promote to next legal integer type.
775e8d8bef9SDimitry Andric         unsigned Width = CmpType->getBitWidth();
776e8d8bef9SDimitry Andric         unsigned NewWidth = Width;
777e8d8bef9SDimitry Andric 
778e8d8bef9SDimitry Andric         // Don't do anything for i1 comparisons.
779e8d8bef9SDimitry Andric         if (Width == 1)
780e8d8bef9SDimitry Andric           break;
781e8d8bef9SDimitry Andric 
782e8d8bef9SDimitry Andric         if (Width <= 16)
783e8d8bef9SDimitry Andric           NewWidth = 16;
784e8d8bef9SDimitry Andric         else if (Width <= 32)
785e8d8bef9SDimitry Andric           NewWidth = 32;
786e8d8bef9SDimitry Andric         else if (Width <= 64)
787e8d8bef9SDimitry Andric           NewWidth = 64;
788e8d8bef9SDimitry Andric         else if (Width > 64)
789e8d8bef9SDimitry Andric           break; // Can't handle this.
790e8d8bef9SDimitry Andric 
791e8d8bef9SDimitry Andric         if (Width != NewWidth) {
792e8d8bef9SDimitry Andric           IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
793e8d8bef9SDimitry Andric           if (CmpInst::isSigned(SrcPred)) {
794e8d8bef9SDimitry Andric             SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
795e8d8bef9SDimitry Andric             SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
796e8d8bef9SDimitry Andric           } else {
797e8d8bef9SDimitry Andric             SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
798e8d8bef9SDimitry Andric             SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
799e8d8bef9SDimitry Andric           }
800e8d8bef9SDimitry Andric         }
801e8d8bef9SDimitry Andric       } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
802e8d8bef9SDimitry Andric         break;
803e8d8bef9SDimitry Andric 
804e8d8bef9SDimitry Andric       Function *NewF = Intrinsic::getDeclaration(
805e8d8bef9SDimitry Andric           II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
806e8d8bef9SDimitry Andric       Value *Args[] = {SrcLHS, SrcRHS,
807e8d8bef9SDimitry Andric                        ConstantInt::get(CC->getType(), SrcPred)};
808e8d8bef9SDimitry Andric       CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
809e8d8bef9SDimitry Andric       NewCall->takeName(&II);
810e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, NewCall);
811e8d8bef9SDimitry Andric     }
812e8d8bef9SDimitry Andric 
813e8d8bef9SDimitry Andric     break;
814e8d8bef9SDimitry Andric   }
815e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_ballot: {
816e8d8bef9SDimitry Andric     if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
817e8d8bef9SDimitry Andric       if (Src->isZero()) {
818e8d8bef9SDimitry Andric         // amdgcn.ballot(i1 0) is zero.
819e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
820e8d8bef9SDimitry Andric       }
821e8d8bef9SDimitry Andric 
822e8d8bef9SDimitry Andric       if (Src->isOne()) {
823e8d8bef9SDimitry Andric         // amdgcn.ballot(i1 1) is exec.
824e8d8bef9SDimitry Andric         const char *RegName = "exec";
825e8d8bef9SDimitry Andric         if (II.getType()->isIntegerTy(32))
826e8d8bef9SDimitry Andric           RegName = "exec_lo";
827e8d8bef9SDimitry Andric         else if (!II.getType()->isIntegerTy(64))
828e8d8bef9SDimitry Andric           break;
829e8d8bef9SDimitry Andric 
830e8d8bef9SDimitry Andric         Function *NewF = Intrinsic::getDeclaration(
831e8d8bef9SDimitry Andric             II.getModule(), Intrinsic::read_register, II.getType());
832e8d8bef9SDimitry Andric         Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)};
833e8d8bef9SDimitry Andric         MDNode *MD = MDNode::get(II.getContext(), MDArgs);
834e8d8bef9SDimitry Andric         Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
835e8d8bef9SDimitry Andric         CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
836349cc55cSDimitry Andric         NewCall->addFnAttr(Attribute::Convergent);
837e8d8bef9SDimitry Andric         NewCall->takeName(&II);
838e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, NewCall);
839e8d8bef9SDimitry Andric       }
840e8d8bef9SDimitry Andric     }
841e8d8bef9SDimitry Andric     break;
842e8d8bef9SDimitry Andric   }
843e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_wqm_vote: {
844e8d8bef9SDimitry Andric     // wqm_vote is identity when the argument is constant.
845e8d8bef9SDimitry Andric     if (!isa<Constant>(II.getArgOperand(0)))
846e8d8bef9SDimitry Andric       break;
847e8d8bef9SDimitry Andric 
848e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, II.getArgOperand(0));
849e8d8bef9SDimitry Andric   }
850e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_kill: {
851e8d8bef9SDimitry Andric     const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
852e8d8bef9SDimitry Andric     if (!C || !C->getZExtValue())
853e8d8bef9SDimitry Andric       break;
854e8d8bef9SDimitry Andric 
855e8d8bef9SDimitry Andric     // amdgcn.kill(i1 1) is a no-op
856e8d8bef9SDimitry Andric     return IC.eraseInstFromFunction(II);
857e8d8bef9SDimitry Andric   }
858e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_update_dpp: {
859e8d8bef9SDimitry Andric     Value *Old = II.getArgOperand(0);
860e8d8bef9SDimitry Andric 
861e8d8bef9SDimitry Andric     auto *BC = cast<ConstantInt>(II.getArgOperand(5));
862e8d8bef9SDimitry Andric     auto *RM = cast<ConstantInt>(II.getArgOperand(3));
863e8d8bef9SDimitry Andric     auto *BM = cast<ConstantInt>(II.getArgOperand(4));
864e8d8bef9SDimitry Andric     if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
865e8d8bef9SDimitry Andric         BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
866e8d8bef9SDimitry Andric       break;
867e8d8bef9SDimitry Andric 
868e8d8bef9SDimitry Andric     // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
869e8d8bef9SDimitry Andric     return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
870e8d8bef9SDimitry Andric   }
871e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_permlane16:
872e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_permlanex16: {
873e8d8bef9SDimitry Andric     // Discard vdst_in if it's not going to be read.
874e8d8bef9SDimitry Andric     Value *VDstIn = II.getArgOperand(0);
875e8d8bef9SDimitry Andric     if (isa<UndefValue>(VDstIn))
876e8d8bef9SDimitry Andric       break;
877e8d8bef9SDimitry Andric 
878e8d8bef9SDimitry Andric     ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
879e8d8bef9SDimitry Andric     ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
880e8d8bef9SDimitry Andric     if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
881e8d8bef9SDimitry Andric       break;
882e8d8bef9SDimitry Andric 
883e8d8bef9SDimitry Andric     return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
884e8d8bef9SDimitry Andric   }
885e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_readfirstlane:
886e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_readlane: {
887e8d8bef9SDimitry Andric     // A constant value is trivially uniform.
888e8d8bef9SDimitry Andric     if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
889e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, C);
890e8d8bef9SDimitry Andric     }
891e8d8bef9SDimitry Andric 
892e8d8bef9SDimitry Andric     // The rest of these may not be safe if the exec may not be the same between
893e8d8bef9SDimitry Andric     // the def and use.
894e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
895e8d8bef9SDimitry Andric     Instruction *SrcInst = dyn_cast<Instruction>(Src);
896e8d8bef9SDimitry Andric     if (SrcInst && SrcInst->getParent() != II.getParent())
897e8d8bef9SDimitry Andric       break;
898e8d8bef9SDimitry Andric 
899e8d8bef9SDimitry Andric     // readfirstlane (readfirstlane x) -> readfirstlane x
900e8d8bef9SDimitry Andric     // readlane (readfirstlane x), y -> readfirstlane x
901e8d8bef9SDimitry Andric     if (match(Src,
902e8d8bef9SDimitry Andric               PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
903e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Src);
904e8d8bef9SDimitry Andric     }
905e8d8bef9SDimitry Andric 
906e8d8bef9SDimitry Andric     if (IID == Intrinsic::amdgcn_readfirstlane) {
907e8d8bef9SDimitry Andric       // readfirstlane (readlane x, y) -> readlane x, y
908e8d8bef9SDimitry Andric       if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
909e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Src);
910e8d8bef9SDimitry Andric       }
911e8d8bef9SDimitry Andric     } else {
912e8d8bef9SDimitry Andric       // readlane (readlane x, y), y -> readlane x, y
913e8d8bef9SDimitry Andric       if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
914e8d8bef9SDimitry Andric                          PatternMatch::m_Value(),
915e8d8bef9SDimitry Andric                          PatternMatch::m_Specific(II.getArgOperand(1))))) {
916e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Src);
917e8d8bef9SDimitry Andric       }
918e8d8bef9SDimitry Andric     }
919e8d8bef9SDimitry Andric 
920e8d8bef9SDimitry Andric     break;
921e8d8bef9SDimitry Andric   }
922e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_ldexp: {
923e8d8bef9SDimitry Andric     // FIXME: This doesn't introduce new instructions and belongs in
924e8d8bef9SDimitry Andric     // InstructionSimplify.
925e8d8bef9SDimitry Andric     Type *Ty = II.getType();
926e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
927e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
928e8d8bef9SDimitry Andric 
929e8d8bef9SDimitry Andric     // Folding undef to qnan is safe regardless of the FP mode.
930e8d8bef9SDimitry Andric     if (isa<UndefValue>(Op0)) {
931e8d8bef9SDimitry Andric       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
932e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, QNaN);
933e8d8bef9SDimitry Andric     }
934e8d8bef9SDimitry Andric 
935e8d8bef9SDimitry Andric     const APFloat *C = nullptr;
936e8d8bef9SDimitry Andric     match(Op0, PatternMatch::m_APFloat(C));
937e8d8bef9SDimitry Andric 
938e8d8bef9SDimitry Andric     // FIXME: Should flush denorms depending on FP mode, but that's ignored
939e8d8bef9SDimitry Andric     // everywhere else.
940e8d8bef9SDimitry Andric     //
941e8d8bef9SDimitry Andric     // These cases should be safe, even with strictfp.
942e8d8bef9SDimitry Andric     // ldexp(0.0, x) -> 0.0
943e8d8bef9SDimitry Andric     // ldexp(-0.0, x) -> -0.0
944e8d8bef9SDimitry Andric     // ldexp(inf, x) -> inf
945e8d8bef9SDimitry Andric     // ldexp(-inf, x) -> -inf
946e8d8bef9SDimitry Andric     if (C && (C->isZero() || C->isInfinity())) {
947e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Op0);
948e8d8bef9SDimitry Andric     }
949e8d8bef9SDimitry Andric 
950e8d8bef9SDimitry Andric     // With strictfp, be more careful about possibly needing to flush denormals
951e8d8bef9SDimitry Andric     // or not, and snan behavior depends on ieee_mode.
952e8d8bef9SDimitry Andric     if (II.isStrictFP())
953e8d8bef9SDimitry Andric       break;
954e8d8bef9SDimitry Andric 
955e8d8bef9SDimitry Andric     if (C && C->isNaN()) {
956e8d8bef9SDimitry Andric       // FIXME: We just need to make the nan quiet here, but that's unavailable
957e8d8bef9SDimitry Andric       // on APFloat, only IEEEfloat
958e8d8bef9SDimitry Andric       auto *Quieted =
959e8d8bef9SDimitry Andric           ConstantFP::get(Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven));
960e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Quieted);
961e8d8bef9SDimitry Andric     }
962e8d8bef9SDimitry Andric 
963e8d8bef9SDimitry Andric     // ldexp(x, 0) -> x
964e8d8bef9SDimitry Andric     // ldexp(x, undef) -> x
965e8d8bef9SDimitry Andric     if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
966e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Op0);
967e8d8bef9SDimitry Andric     }
968e8d8bef9SDimitry Andric 
969e8d8bef9SDimitry Andric     break;
970e8d8bef9SDimitry Andric   }
971e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fmul_legacy: {
972e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
973e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
974e8d8bef9SDimitry Andric 
975e8d8bef9SDimitry Andric     // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
976e8d8bef9SDimitry Andric     // infinity, gives +0.0.
977e8d8bef9SDimitry Andric     // TODO: Move to InstSimplify?
978e8d8bef9SDimitry Andric     if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
979e8d8bef9SDimitry Andric         match(Op1, PatternMatch::m_AnyZeroFP()))
980e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType()));
981e8d8bef9SDimitry Andric 
982e8d8bef9SDimitry Andric     // If we can prove we don't have one of the special cases then we can use a
983e8d8bef9SDimitry Andric     // normal fmul instruction instead.
984e8d8bef9SDimitry Andric     if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
985e8d8bef9SDimitry Andric       auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
986e8d8bef9SDimitry Andric       FMul->takeName(&II);
987e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, FMul);
988e8d8bef9SDimitry Andric     }
989e8d8bef9SDimitry Andric     break;
990e8d8bef9SDimitry Andric   }
991e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fma_legacy: {
992e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
993e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
994e8d8bef9SDimitry Andric     Value *Op2 = II.getArgOperand(2);
995e8d8bef9SDimitry Andric 
996e8d8bef9SDimitry Andric     // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
997e8d8bef9SDimitry Andric     // infinity, gives +0.0.
998e8d8bef9SDimitry Andric     // TODO: Move to InstSimplify?
999e8d8bef9SDimitry Andric     if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1000e8d8bef9SDimitry Andric         match(Op1, PatternMatch::m_AnyZeroFP())) {
1001e8d8bef9SDimitry Andric       // It's tempting to just return Op2 here, but that would give the wrong
1002e8d8bef9SDimitry Andric       // result if Op2 was -0.0.
1003e8d8bef9SDimitry Andric       auto *Zero = ConstantFP::getNullValue(II.getType());
1004e8d8bef9SDimitry Andric       auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
1005e8d8bef9SDimitry Andric       FAdd->takeName(&II);
1006e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, FAdd);
1007e8d8bef9SDimitry Andric     }
1008e8d8bef9SDimitry Andric 
1009e8d8bef9SDimitry Andric     // If we can prove we don't have one of the special cases then we can use a
1010e8d8bef9SDimitry Andric     // normal fma instead.
1011e8d8bef9SDimitry Andric     if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
1012e8d8bef9SDimitry Andric       II.setCalledOperand(Intrinsic::getDeclaration(
1013e8d8bef9SDimitry Andric           II.getModule(), Intrinsic::fma, II.getType()));
1014e8d8bef9SDimitry Andric       return &II;
1015e8d8bef9SDimitry Andric     }
1016e8d8bef9SDimitry Andric     break;
1017e8d8bef9SDimitry Andric   }
10180eae32dcSDimitry Andric   case Intrinsic::amdgcn_is_shared:
10190eae32dcSDimitry Andric   case Intrinsic::amdgcn_is_private: {
10200eae32dcSDimitry Andric     if (isa<UndefValue>(II.getArgOperand(0)))
10210eae32dcSDimitry Andric       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
10220eae32dcSDimitry Andric 
10230eae32dcSDimitry Andric     if (isa<ConstantPointerNull>(II.getArgOperand(0)))
10240eae32dcSDimitry Andric       return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
10250eae32dcSDimitry Andric     break;
10260eae32dcSDimitry Andric   }
1027e8d8bef9SDimitry Andric   default: {
1028e8d8bef9SDimitry Andric     if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1029e8d8bef9SDimitry Andric             AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
1030e8d8bef9SDimitry Andric       return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
1031e8d8bef9SDimitry Andric     }
1032e8d8bef9SDimitry Andric   }
1033e8d8bef9SDimitry Andric   }
1034e8d8bef9SDimitry Andric   return None;
1035e8d8bef9SDimitry Andric }
1036e8d8bef9SDimitry Andric 
1037e8d8bef9SDimitry Andric /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
1038e8d8bef9SDimitry Andric ///
1039e8d8bef9SDimitry Andric /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
1040e8d8bef9SDimitry Andric ///       struct returns.
1041e8d8bef9SDimitry Andric static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
1042e8d8bef9SDimitry Andric                                                     IntrinsicInst &II,
1043e8d8bef9SDimitry Andric                                                     APInt DemandedElts,
1044e8d8bef9SDimitry Andric                                                     int DMaskIdx = -1) {
1045e8d8bef9SDimitry Andric 
1046e8d8bef9SDimitry Andric   auto *IIVTy = cast<FixedVectorType>(II.getType());
1047e8d8bef9SDimitry Andric   unsigned VWidth = IIVTy->getNumElements();
1048e8d8bef9SDimitry Andric   if (VWidth == 1)
1049e8d8bef9SDimitry Andric     return nullptr;
1050e8d8bef9SDimitry Andric 
1051e8d8bef9SDimitry Andric   IRBuilderBase::InsertPointGuard Guard(IC.Builder);
1052e8d8bef9SDimitry Andric   IC.Builder.SetInsertPoint(&II);
1053e8d8bef9SDimitry Andric 
1054e8d8bef9SDimitry Andric   // Assume the arguments are unchanged and later override them, if needed.
1055e8d8bef9SDimitry Andric   SmallVector<Value *, 16> Args(II.args());
1056e8d8bef9SDimitry Andric 
1057e8d8bef9SDimitry Andric   if (DMaskIdx < 0) {
1058e8d8bef9SDimitry Andric     // Buffer case.
1059e8d8bef9SDimitry Andric 
1060e8d8bef9SDimitry Andric     const unsigned ActiveBits = DemandedElts.getActiveBits();
1061e8d8bef9SDimitry Andric     const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
1062e8d8bef9SDimitry Andric 
1063e8d8bef9SDimitry Andric     // Start assuming the prefix of elements is demanded, but possibly clear
1064e8d8bef9SDimitry Andric     // some other bits if there are trailing zeros (unused components at front)
1065e8d8bef9SDimitry Andric     // and update offset.
1066e8d8bef9SDimitry Andric     DemandedElts = (1 << ActiveBits) - 1;
1067e8d8bef9SDimitry Andric 
1068e8d8bef9SDimitry Andric     if (UnusedComponentsAtFront > 0) {
1069e8d8bef9SDimitry Andric       static const unsigned InvalidOffsetIdx = 0xf;
1070e8d8bef9SDimitry Andric 
1071e8d8bef9SDimitry Andric       unsigned OffsetIdx;
1072e8d8bef9SDimitry Andric       switch (II.getIntrinsicID()) {
1073e8d8bef9SDimitry Andric       case Intrinsic::amdgcn_raw_buffer_load:
1074e8d8bef9SDimitry Andric         OffsetIdx = 1;
1075e8d8bef9SDimitry Andric         break;
1076e8d8bef9SDimitry Andric       case Intrinsic::amdgcn_s_buffer_load:
1077e8d8bef9SDimitry Andric         // If resulting type is vec3, there is no point in trimming the
1078e8d8bef9SDimitry Andric         // load with updated offset, as the vec3 would most likely be widened to
1079e8d8bef9SDimitry Andric         // vec4 anyway during lowering.
1080e8d8bef9SDimitry Andric         if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1081e8d8bef9SDimitry Andric           OffsetIdx = InvalidOffsetIdx;
1082e8d8bef9SDimitry Andric         else
1083e8d8bef9SDimitry Andric           OffsetIdx = 1;
1084e8d8bef9SDimitry Andric         break;
1085e8d8bef9SDimitry Andric       case Intrinsic::amdgcn_struct_buffer_load:
1086e8d8bef9SDimitry Andric         OffsetIdx = 2;
1087e8d8bef9SDimitry Andric         break;
1088e8d8bef9SDimitry Andric       default:
1089e8d8bef9SDimitry Andric         // TODO: handle tbuffer* intrinsics.
1090e8d8bef9SDimitry Andric         OffsetIdx = InvalidOffsetIdx;
1091e8d8bef9SDimitry Andric         break;
1092e8d8bef9SDimitry Andric       }
1093e8d8bef9SDimitry Andric 
1094e8d8bef9SDimitry Andric       if (OffsetIdx != InvalidOffsetIdx) {
1095e8d8bef9SDimitry Andric         // Clear demanded bits and update the offset.
1096e8d8bef9SDimitry Andric         DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1097e8d8bef9SDimitry Andric         auto *Offset = II.getArgOperand(OffsetIdx);
1098e8d8bef9SDimitry Andric         unsigned SingleComponentSizeInBits =
1099e8d8bef9SDimitry Andric             IC.getDataLayout().getTypeSizeInBits(II.getType()->getScalarType());
1100e8d8bef9SDimitry Andric         unsigned OffsetAdd =
1101e8d8bef9SDimitry Andric             UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1102e8d8bef9SDimitry Andric         auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
1103e8d8bef9SDimitry Andric         Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
1104e8d8bef9SDimitry Andric       }
1105e8d8bef9SDimitry Andric     }
1106e8d8bef9SDimitry Andric   } else {
1107e8d8bef9SDimitry Andric     // Image case.
1108e8d8bef9SDimitry Andric 
1109e8d8bef9SDimitry Andric     ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx));
1110e8d8bef9SDimitry Andric     unsigned DMaskVal = DMask->getZExtValue() & 0xf;
1111e8d8bef9SDimitry Andric 
1112e8d8bef9SDimitry Andric     // Mask off values that are undefined because the dmask doesn't cover them
1113e8d8bef9SDimitry Andric     DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
1114e8d8bef9SDimitry Andric 
1115e8d8bef9SDimitry Andric     unsigned NewDMaskVal = 0;
1116e8d8bef9SDimitry Andric     unsigned OrigLoadIdx = 0;
1117e8d8bef9SDimitry Andric     for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1118e8d8bef9SDimitry Andric       const unsigned Bit = 1 << SrcIdx;
1119e8d8bef9SDimitry Andric       if (!!(DMaskVal & Bit)) {
1120e8d8bef9SDimitry Andric         if (!!DemandedElts[OrigLoadIdx])
1121e8d8bef9SDimitry Andric           NewDMaskVal |= Bit;
1122e8d8bef9SDimitry Andric         OrigLoadIdx++;
1123e8d8bef9SDimitry Andric       }
1124e8d8bef9SDimitry Andric     }
1125e8d8bef9SDimitry Andric 
1126e8d8bef9SDimitry Andric     if (DMaskVal != NewDMaskVal)
1127e8d8bef9SDimitry Andric       Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
1128e8d8bef9SDimitry Andric   }
1129e8d8bef9SDimitry Andric 
1130e8d8bef9SDimitry Andric   unsigned NewNumElts = DemandedElts.countPopulation();
1131e8d8bef9SDimitry Andric   if (!NewNumElts)
1132e8d8bef9SDimitry Andric     return UndefValue::get(II.getType());
1133e8d8bef9SDimitry Andric 
1134e8d8bef9SDimitry Andric   if (NewNumElts >= VWidth && DemandedElts.isMask()) {
1135e8d8bef9SDimitry Andric     if (DMaskIdx >= 0)
1136e8d8bef9SDimitry Andric       II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1137e8d8bef9SDimitry Andric     return nullptr;
1138e8d8bef9SDimitry Andric   }
1139e8d8bef9SDimitry Andric 
1140e8d8bef9SDimitry Andric   // Validate function argument and return types, extracting overloaded types
1141e8d8bef9SDimitry Andric   // along the way.
1142e8d8bef9SDimitry Andric   SmallVector<Type *, 6> OverloadTys;
1143e8d8bef9SDimitry Andric   if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys))
1144e8d8bef9SDimitry Andric     return nullptr;
1145e8d8bef9SDimitry Andric 
1146e8d8bef9SDimitry Andric   Module *M = II.getParent()->getParent()->getParent();
1147e8d8bef9SDimitry Andric   Type *EltTy = IIVTy->getElementType();
1148e8d8bef9SDimitry Andric   Type *NewTy =
1149e8d8bef9SDimitry Andric       (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
1150e8d8bef9SDimitry Andric 
1151e8d8bef9SDimitry Andric   OverloadTys[0] = NewTy;
1152e8d8bef9SDimitry Andric   Function *NewIntrin =
1153e8d8bef9SDimitry Andric       Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys);
1154e8d8bef9SDimitry Andric 
1155e8d8bef9SDimitry Andric   CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
1156e8d8bef9SDimitry Andric   NewCall->takeName(&II);
1157e8d8bef9SDimitry Andric   NewCall->copyMetadata(II);
1158e8d8bef9SDimitry Andric 
1159e8d8bef9SDimitry Andric   if (NewNumElts == 1) {
1160e8d8bef9SDimitry Andric     return IC.Builder.CreateInsertElement(UndefValue::get(II.getType()),
1161e8d8bef9SDimitry Andric                                           NewCall,
1162e8d8bef9SDimitry Andric                                           DemandedElts.countTrailingZeros());
1163e8d8bef9SDimitry Andric   }
1164e8d8bef9SDimitry Andric 
1165e8d8bef9SDimitry Andric   SmallVector<int, 8> EltMask;
1166e8d8bef9SDimitry Andric   unsigned NewLoadIdx = 0;
1167e8d8bef9SDimitry Andric   for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1168e8d8bef9SDimitry Andric     if (!!DemandedElts[OrigLoadIdx])
1169e8d8bef9SDimitry Andric       EltMask.push_back(NewLoadIdx++);
1170e8d8bef9SDimitry Andric     else
1171e8d8bef9SDimitry Andric       EltMask.push_back(NewNumElts);
1172e8d8bef9SDimitry Andric   }
1173e8d8bef9SDimitry Andric 
1174e8d8bef9SDimitry Andric   Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
1175e8d8bef9SDimitry Andric 
1176e8d8bef9SDimitry Andric   return Shuffle;
1177e8d8bef9SDimitry Andric }
1178e8d8bef9SDimitry Andric 
1179e8d8bef9SDimitry Andric Optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
1180e8d8bef9SDimitry Andric     InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1181e8d8bef9SDimitry Andric     APInt &UndefElts2, APInt &UndefElts3,
1182e8d8bef9SDimitry Andric     std::function<void(Instruction *, unsigned, APInt, APInt &)>
1183e8d8bef9SDimitry Andric         SimplifyAndSetOp) const {
1184e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
1185e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_buffer_load:
1186e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_buffer_load_format:
1187e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_raw_buffer_load:
1188e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_raw_buffer_load_format:
1189e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_raw_tbuffer_load:
1190e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_s_buffer_load:
1191e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_struct_buffer_load:
1192e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_struct_buffer_load_format:
1193e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_struct_tbuffer_load:
1194e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_tbuffer_load:
1195e8d8bef9SDimitry Andric     return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
1196e8d8bef9SDimitry Andric   default: {
1197e8d8bef9SDimitry Andric     if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
1198e8d8bef9SDimitry Andric       return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
1199e8d8bef9SDimitry Andric     }
1200e8d8bef9SDimitry Andric     break;
1201e8d8bef9SDimitry Andric   }
1202e8d8bef9SDimitry Andric   }
1203e8d8bef9SDimitry Andric   return None;
1204e8d8bef9SDimitry Andric }
1205