xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
1*e8d8bef9SDimitry Andric //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
2*e8d8bef9SDimitry Andric //
3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*e8d8bef9SDimitry Andric //
7*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8*e8d8bef9SDimitry Andric //
9*e8d8bef9SDimitry Andric // \file
10*e8d8bef9SDimitry Andric // This file implements a TargetTransformInfo analysis pass specific to the
11*e8d8bef9SDimitry Andric // AMDGPU target machine. It uses the target's detailed information to provide
12*e8d8bef9SDimitry Andric // more precise answers to certain TTI queries, while letting the target
13*e8d8bef9SDimitry Andric // independent and default TTI implementations handle the rest.
14*e8d8bef9SDimitry Andric //
15*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
16*e8d8bef9SDimitry Andric 
17*e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h"
18*e8d8bef9SDimitry Andric #include "AMDGPUTargetTransformInfo.h"
19*e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
20*e8d8bef9SDimitry Andric #include "R600Subtarget.h"
21*e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
22*e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h"
23*e8d8bef9SDimitry Andric 
24*e8d8bef9SDimitry Andric using namespace llvm;
25*e8d8bef9SDimitry Andric 
26*e8d8bef9SDimitry Andric #define DEBUG_TYPE "AMDGPUtti"
27*e8d8bef9SDimitry Andric 
28*e8d8bef9SDimitry Andric namespace {
29*e8d8bef9SDimitry Andric 
30*e8d8bef9SDimitry Andric struct AMDGPUImageDMaskIntrinsic {
31*e8d8bef9SDimitry Andric   unsigned Intr;
32*e8d8bef9SDimitry Andric };
33*e8d8bef9SDimitry Andric 
34*e8d8bef9SDimitry Andric #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
35*e8d8bef9SDimitry Andric #include "InstCombineTables.inc"
36*e8d8bef9SDimitry Andric 
37*e8d8bef9SDimitry Andric } // end anonymous namespace
38*e8d8bef9SDimitry Andric 
39*e8d8bef9SDimitry Andric // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
40*e8d8bef9SDimitry Andric //
41*e8d8bef9SDimitry Andric // A single NaN input is folded to minnum, so we rely on that folding for
42*e8d8bef9SDimitry Andric // handling NaNs.
43*e8d8bef9SDimitry Andric static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
44*e8d8bef9SDimitry Andric                            const APFloat &Src2) {
45*e8d8bef9SDimitry Andric   APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
46*e8d8bef9SDimitry Andric 
47*e8d8bef9SDimitry Andric   APFloat::cmpResult Cmp0 = Max3.compare(Src0);
48*e8d8bef9SDimitry Andric   assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
49*e8d8bef9SDimitry Andric   if (Cmp0 == APFloat::cmpEqual)
50*e8d8bef9SDimitry Andric     return maxnum(Src1, Src2);
51*e8d8bef9SDimitry Andric 
52*e8d8bef9SDimitry Andric   APFloat::cmpResult Cmp1 = Max3.compare(Src1);
53*e8d8bef9SDimitry Andric   assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
54*e8d8bef9SDimitry Andric   if (Cmp1 == APFloat::cmpEqual)
55*e8d8bef9SDimitry Andric     return maxnum(Src0, Src2);
56*e8d8bef9SDimitry Andric 
57*e8d8bef9SDimitry Andric   return maxnum(Src0, Src1);
58*e8d8bef9SDimitry Andric }
59*e8d8bef9SDimitry Andric 
60*e8d8bef9SDimitry Andric // Check if a value can be converted to a 16-bit value without losing
61*e8d8bef9SDimitry Andric // precision.
62*e8d8bef9SDimitry Andric static bool canSafelyConvertTo16Bit(Value &V) {
63*e8d8bef9SDimitry Andric   Type *VTy = V.getType();
64*e8d8bef9SDimitry Andric   if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
65*e8d8bef9SDimitry Andric     // The value is already 16-bit, so we don't want to convert to 16-bit again!
66*e8d8bef9SDimitry Andric     return false;
67*e8d8bef9SDimitry Andric   }
68*e8d8bef9SDimitry Andric   if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
69*e8d8bef9SDimitry Andric     // We need to check that if we cast the index down to a half, we do not lose
70*e8d8bef9SDimitry Andric     // precision.
71*e8d8bef9SDimitry Andric     APFloat FloatValue(ConstFloat->getValueAPF());
72*e8d8bef9SDimitry Andric     bool LosesInfo = true;
73*e8d8bef9SDimitry Andric     FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
74*e8d8bef9SDimitry Andric     return !LosesInfo;
75*e8d8bef9SDimitry Andric   }
76*e8d8bef9SDimitry Andric   Value *CastSrc;
77*e8d8bef9SDimitry Andric   if (match(&V, m_FPExt(PatternMatch::m_Value(CastSrc))) ||
78*e8d8bef9SDimitry Andric       match(&V, m_SExt(PatternMatch::m_Value(CastSrc))) ||
79*e8d8bef9SDimitry Andric       match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)))) {
80*e8d8bef9SDimitry Andric     Type *CastSrcTy = CastSrc->getType();
81*e8d8bef9SDimitry Andric     if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
82*e8d8bef9SDimitry Andric       return true;
83*e8d8bef9SDimitry Andric   }
84*e8d8bef9SDimitry Andric 
85*e8d8bef9SDimitry Andric   return false;
86*e8d8bef9SDimitry Andric }
87*e8d8bef9SDimitry Andric 
88*e8d8bef9SDimitry Andric // Convert a value to 16-bit.
89*e8d8bef9SDimitry Andric static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
90*e8d8bef9SDimitry Andric   Type *VTy = V.getType();
91*e8d8bef9SDimitry Andric   if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
92*e8d8bef9SDimitry Andric     return cast<Instruction>(&V)->getOperand(0);
93*e8d8bef9SDimitry Andric   if (VTy->isIntegerTy())
94*e8d8bef9SDimitry Andric     return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
95*e8d8bef9SDimitry Andric   if (VTy->isFloatingPointTy())
96*e8d8bef9SDimitry Andric     return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
97*e8d8bef9SDimitry Andric 
98*e8d8bef9SDimitry Andric   llvm_unreachable("Should never be called!");
99*e8d8bef9SDimitry Andric }
100*e8d8bef9SDimitry Andric 
101*e8d8bef9SDimitry Andric static Optional<Instruction *>
102*e8d8bef9SDimitry Andric simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
103*e8d8bef9SDimitry Andric                              const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
104*e8d8bef9SDimitry Andric                              IntrinsicInst &II, InstCombiner &IC) {
105*e8d8bef9SDimitry Andric   if (!ST->hasA16() && !ST->hasG16())
106*e8d8bef9SDimitry Andric     return None;
107*e8d8bef9SDimitry Andric 
108*e8d8bef9SDimitry Andric   bool FloatCoord = false;
109*e8d8bef9SDimitry Andric   // true means derivatives can be converted to 16 bit, coordinates not
110*e8d8bef9SDimitry Andric   bool OnlyDerivatives = false;
111*e8d8bef9SDimitry Andric 
112*e8d8bef9SDimitry Andric   for (unsigned OperandIndex = ImageDimIntr->GradientStart;
113*e8d8bef9SDimitry Andric        OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
114*e8d8bef9SDimitry Andric     Value *Coord = II.getOperand(OperandIndex);
115*e8d8bef9SDimitry Andric     // If the values are not derived from 16-bit values, we cannot optimize.
116*e8d8bef9SDimitry Andric     if (!canSafelyConvertTo16Bit(*Coord)) {
117*e8d8bef9SDimitry Andric       if (OperandIndex < ImageDimIntr->CoordStart ||
118*e8d8bef9SDimitry Andric           ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
119*e8d8bef9SDimitry Andric         return None;
120*e8d8bef9SDimitry Andric       }
121*e8d8bef9SDimitry Andric       // All gradients can be converted, so convert only them
122*e8d8bef9SDimitry Andric       OnlyDerivatives = true;
123*e8d8bef9SDimitry Andric       break;
124*e8d8bef9SDimitry Andric     }
125*e8d8bef9SDimitry Andric 
126*e8d8bef9SDimitry Andric     assert(OperandIndex == ImageDimIntr->GradientStart ||
127*e8d8bef9SDimitry Andric            FloatCoord == Coord->getType()->isFloatingPointTy());
128*e8d8bef9SDimitry Andric     FloatCoord = Coord->getType()->isFloatingPointTy();
129*e8d8bef9SDimitry Andric   }
130*e8d8bef9SDimitry Andric 
131*e8d8bef9SDimitry Andric   if (OnlyDerivatives) {
132*e8d8bef9SDimitry Andric     if (!ST->hasG16())
133*e8d8bef9SDimitry Andric       return None;
134*e8d8bef9SDimitry Andric   } else {
135*e8d8bef9SDimitry Andric     if (!ST->hasA16())
136*e8d8bef9SDimitry Andric       OnlyDerivatives = true; // Only supports G16
137*e8d8bef9SDimitry Andric   }
138*e8d8bef9SDimitry Andric 
139*e8d8bef9SDimitry Andric   Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
140*e8d8bef9SDimitry Andric                                : Type::getInt16Ty(II.getContext());
141*e8d8bef9SDimitry Andric 
142*e8d8bef9SDimitry Andric   SmallVector<Type *, 4> ArgTys;
143*e8d8bef9SDimitry Andric   if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
144*e8d8bef9SDimitry Andric     return None;
145*e8d8bef9SDimitry Andric 
146*e8d8bef9SDimitry Andric   ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
147*e8d8bef9SDimitry Andric   if (!OnlyDerivatives)
148*e8d8bef9SDimitry Andric     ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
149*e8d8bef9SDimitry Andric   Function *I =
150*e8d8bef9SDimitry Andric       Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys);
151*e8d8bef9SDimitry Andric 
152*e8d8bef9SDimitry Andric   SmallVector<Value *, 8> Args(II.arg_operands());
153*e8d8bef9SDimitry Andric 
154*e8d8bef9SDimitry Andric   unsigned EndIndex =
155*e8d8bef9SDimitry Andric       OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
156*e8d8bef9SDimitry Andric   for (unsigned OperandIndex = ImageDimIntr->GradientStart;
157*e8d8bef9SDimitry Andric        OperandIndex < EndIndex; OperandIndex++) {
158*e8d8bef9SDimitry Andric     Args[OperandIndex] =
159*e8d8bef9SDimitry Andric         convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
160*e8d8bef9SDimitry Andric   }
161*e8d8bef9SDimitry Andric 
162*e8d8bef9SDimitry Andric   CallInst *NewCall = IC.Builder.CreateCall(I, Args);
163*e8d8bef9SDimitry Andric   NewCall->takeName(&II);
164*e8d8bef9SDimitry Andric   NewCall->copyMetadata(II);
165*e8d8bef9SDimitry Andric   if (isa<FPMathOperator>(NewCall))
166*e8d8bef9SDimitry Andric     NewCall->copyFastMathFlags(&II);
167*e8d8bef9SDimitry Andric   return IC.replaceInstUsesWith(II, NewCall);
168*e8d8bef9SDimitry Andric }
169*e8d8bef9SDimitry Andric 
170*e8d8bef9SDimitry Andric bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
171*e8d8bef9SDimitry Andric                                            InstCombiner &IC) const {
172*e8d8bef9SDimitry Andric   // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
173*e8d8bef9SDimitry Andric   // infinity, gives +0.0. If we can prove we don't have one of the special
174*e8d8bef9SDimitry Andric   // cases then we can use a normal multiply instead.
175*e8d8bef9SDimitry Andric   // TODO: Create and use isKnownFiniteNonZero instead of just matching
176*e8d8bef9SDimitry Andric   // constants here.
177*e8d8bef9SDimitry Andric   if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
178*e8d8bef9SDimitry Andric       match(Op1, PatternMatch::m_FiniteNonZero())) {
179*e8d8bef9SDimitry Andric     // One operand is not zero or infinity or NaN.
180*e8d8bef9SDimitry Andric     return true;
181*e8d8bef9SDimitry Andric   }
182*e8d8bef9SDimitry Andric   auto *TLI = &IC.getTargetLibraryInfo();
183*e8d8bef9SDimitry Andric   if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
184*e8d8bef9SDimitry Andric       isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
185*e8d8bef9SDimitry Andric     // Neither operand is infinity or NaN.
186*e8d8bef9SDimitry Andric     return true;
187*e8d8bef9SDimitry Andric   }
188*e8d8bef9SDimitry Andric   return false;
189*e8d8bef9SDimitry Andric }
190*e8d8bef9SDimitry Andric 
191*e8d8bef9SDimitry Andric Optional<Instruction *>
192*e8d8bef9SDimitry Andric GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
193*e8d8bef9SDimitry Andric   Intrinsic::ID IID = II.getIntrinsicID();
194*e8d8bef9SDimitry Andric   switch (IID) {
195*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_rcp: {
196*e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
197*e8d8bef9SDimitry Andric 
198*e8d8bef9SDimitry Andric     // TODO: Move to ConstantFolding/InstSimplify?
199*e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
200*e8d8bef9SDimitry Andric       Type *Ty = II.getType();
201*e8d8bef9SDimitry Andric       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
202*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, QNaN);
203*e8d8bef9SDimitry Andric     }
204*e8d8bef9SDimitry Andric 
205*e8d8bef9SDimitry Andric     if (II.isStrictFP())
206*e8d8bef9SDimitry Andric       break;
207*e8d8bef9SDimitry Andric 
208*e8d8bef9SDimitry Andric     if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
209*e8d8bef9SDimitry Andric       const APFloat &ArgVal = C->getValueAPF();
210*e8d8bef9SDimitry Andric       APFloat Val(ArgVal.getSemantics(), 1);
211*e8d8bef9SDimitry Andric       Val.divide(ArgVal, APFloat::rmNearestTiesToEven);
212*e8d8bef9SDimitry Andric 
213*e8d8bef9SDimitry Andric       // This is more precise than the instruction may give.
214*e8d8bef9SDimitry Andric       //
215*e8d8bef9SDimitry Andric       // TODO: The instruction always flushes denormal results (except for f16),
216*e8d8bef9SDimitry Andric       // should this also?
217*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
218*e8d8bef9SDimitry Andric     }
219*e8d8bef9SDimitry Andric 
220*e8d8bef9SDimitry Andric     break;
221*e8d8bef9SDimitry Andric   }
222*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_rsq: {
223*e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
224*e8d8bef9SDimitry Andric 
225*e8d8bef9SDimitry Andric     // TODO: Move to ConstantFolding/InstSimplify?
226*e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
227*e8d8bef9SDimitry Andric       Type *Ty = II.getType();
228*e8d8bef9SDimitry Andric       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
229*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, QNaN);
230*e8d8bef9SDimitry Andric     }
231*e8d8bef9SDimitry Andric 
232*e8d8bef9SDimitry Andric     break;
233*e8d8bef9SDimitry Andric   }
234*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_frexp_mant:
235*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_frexp_exp: {
236*e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
237*e8d8bef9SDimitry Andric     if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
238*e8d8bef9SDimitry Andric       int Exp;
239*e8d8bef9SDimitry Andric       APFloat Significand =
240*e8d8bef9SDimitry Andric           frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
241*e8d8bef9SDimitry Andric 
242*e8d8bef9SDimitry Andric       if (IID == Intrinsic::amdgcn_frexp_mant) {
243*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(
244*e8d8bef9SDimitry Andric             II, ConstantFP::get(II.getContext(), Significand));
245*e8d8bef9SDimitry Andric       }
246*e8d8bef9SDimitry Andric 
247*e8d8bef9SDimitry Andric       // Match instruction special case behavior.
248*e8d8bef9SDimitry Andric       if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
249*e8d8bef9SDimitry Andric         Exp = 0;
250*e8d8bef9SDimitry Andric 
251*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
252*e8d8bef9SDimitry Andric     }
253*e8d8bef9SDimitry Andric 
254*e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
255*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
256*e8d8bef9SDimitry Andric     }
257*e8d8bef9SDimitry Andric 
258*e8d8bef9SDimitry Andric     break;
259*e8d8bef9SDimitry Andric   }
260*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_class: {
261*e8d8bef9SDimitry Andric     enum {
262*e8d8bef9SDimitry Andric       S_NAN = 1 << 0,       // Signaling NaN
263*e8d8bef9SDimitry Andric       Q_NAN = 1 << 1,       // Quiet NaN
264*e8d8bef9SDimitry Andric       N_INFINITY = 1 << 2,  // Negative infinity
265*e8d8bef9SDimitry Andric       N_NORMAL = 1 << 3,    // Negative normal
266*e8d8bef9SDimitry Andric       N_SUBNORMAL = 1 << 4, // Negative subnormal
267*e8d8bef9SDimitry Andric       N_ZERO = 1 << 5,      // Negative zero
268*e8d8bef9SDimitry Andric       P_ZERO = 1 << 6,      // Positive zero
269*e8d8bef9SDimitry Andric       P_SUBNORMAL = 1 << 7, // Positive subnormal
270*e8d8bef9SDimitry Andric       P_NORMAL = 1 << 8,    // Positive normal
271*e8d8bef9SDimitry Andric       P_INFINITY = 1 << 9   // Positive infinity
272*e8d8bef9SDimitry Andric     };
273*e8d8bef9SDimitry Andric 
274*e8d8bef9SDimitry Andric     const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
275*e8d8bef9SDimitry Andric                               N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL |
276*e8d8bef9SDimitry Andric                               P_NORMAL | P_INFINITY;
277*e8d8bef9SDimitry Andric 
278*e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
279*e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
280*e8d8bef9SDimitry Andric     const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
281*e8d8bef9SDimitry Andric     if (!CMask) {
282*e8d8bef9SDimitry Andric       if (isa<UndefValue>(Src0)) {
283*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
284*e8d8bef9SDimitry Andric       }
285*e8d8bef9SDimitry Andric 
286*e8d8bef9SDimitry Andric       if (isa<UndefValue>(Src1)) {
287*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
288*e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), false));
289*e8d8bef9SDimitry Andric       }
290*e8d8bef9SDimitry Andric       break;
291*e8d8bef9SDimitry Andric     }
292*e8d8bef9SDimitry Andric 
293*e8d8bef9SDimitry Andric     uint32_t Mask = CMask->getZExtValue();
294*e8d8bef9SDimitry Andric 
295*e8d8bef9SDimitry Andric     // If all tests are made, it doesn't matter what the value is.
296*e8d8bef9SDimitry Andric     if ((Mask & FullMask) == FullMask) {
297*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
298*e8d8bef9SDimitry Andric     }
299*e8d8bef9SDimitry Andric 
300*e8d8bef9SDimitry Andric     if ((Mask & FullMask) == 0) {
301*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
302*e8d8bef9SDimitry Andric     }
303*e8d8bef9SDimitry Andric 
304*e8d8bef9SDimitry Andric     if (Mask == (S_NAN | Q_NAN)) {
305*e8d8bef9SDimitry Andric       // Equivalent of isnan. Replace with standard fcmp.
306*e8d8bef9SDimitry Andric       Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
307*e8d8bef9SDimitry Andric       FCmp->takeName(&II);
308*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, FCmp);
309*e8d8bef9SDimitry Andric     }
310*e8d8bef9SDimitry Andric 
311*e8d8bef9SDimitry Andric     if (Mask == (N_ZERO | P_ZERO)) {
312*e8d8bef9SDimitry Andric       // Equivalent of == 0.
313*e8d8bef9SDimitry Andric       Value *FCmp =
314*e8d8bef9SDimitry Andric           IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));
315*e8d8bef9SDimitry Andric 
316*e8d8bef9SDimitry Andric       FCmp->takeName(&II);
317*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, FCmp);
318*e8d8bef9SDimitry Andric     }
319*e8d8bef9SDimitry Andric 
320*e8d8bef9SDimitry Andric     // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
321*e8d8bef9SDimitry Andric     if (((Mask & S_NAN) || (Mask & Q_NAN)) &&
322*e8d8bef9SDimitry Andric         isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) {
323*e8d8bef9SDimitry Andric       return IC.replaceOperand(
324*e8d8bef9SDimitry Andric           II, 1, ConstantInt::get(Src1->getType(), Mask & ~(S_NAN | Q_NAN)));
325*e8d8bef9SDimitry Andric     }
326*e8d8bef9SDimitry Andric 
327*e8d8bef9SDimitry Andric     const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
328*e8d8bef9SDimitry Andric     if (!CVal) {
329*e8d8bef9SDimitry Andric       if (isa<UndefValue>(Src0)) {
330*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
331*e8d8bef9SDimitry Andric       }
332*e8d8bef9SDimitry Andric 
333*e8d8bef9SDimitry Andric       // Clamp mask to used bits
334*e8d8bef9SDimitry Andric       if ((Mask & FullMask) != Mask) {
335*e8d8bef9SDimitry Andric         CallInst *NewCall = IC.Builder.CreateCall(
336*e8d8bef9SDimitry Andric             II.getCalledFunction(),
337*e8d8bef9SDimitry Andric             {Src0, ConstantInt::get(Src1->getType(), Mask & FullMask)});
338*e8d8bef9SDimitry Andric 
339*e8d8bef9SDimitry Andric         NewCall->takeName(&II);
340*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, NewCall);
341*e8d8bef9SDimitry Andric       }
342*e8d8bef9SDimitry Andric 
343*e8d8bef9SDimitry Andric       break;
344*e8d8bef9SDimitry Andric     }
345*e8d8bef9SDimitry Andric 
346*e8d8bef9SDimitry Andric     const APFloat &Val = CVal->getValueAPF();
347*e8d8bef9SDimitry Andric 
348*e8d8bef9SDimitry Andric     bool Result =
349*e8d8bef9SDimitry Andric         ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
350*e8d8bef9SDimitry Andric         ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
351*e8d8bef9SDimitry Andric         ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
352*e8d8bef9SDimitry Andric         ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
353*e8d8bef9SDimitry Andric         ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
354*e8d8bef9SDimitry Andric         ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
355*e8d8bef9SDimitry Andric         ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
356*e8d8bef9SDimitry Andric         ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
357*e8d8bef9SDimitry Andric         ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
358*e8d8bef9SDimitry Andric         ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
359*e8d8bef9SDimitry Andric 
360*e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
361*e8d8bef9SDimitry Andric   }
362*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pkrtz: {
363*e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
364*e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
365*e8d8bef9SDimitry Andric     if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
366*e8d8bef9SDimitry Andric       if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
367*e8d8bef9SDimitry Andric         const fltSemantics &HalfSem =
368*e8d8bef9SDimitry Andric             II.getType()->getScalarType()->getFltSemantics();
369*e8d8bef9SDimitry Andric         bool LosesInfo;
370*e8d8bef9SDimitry Andric         APFloat Val0 = C0->getValueAPF();
371*e8d8bef9SDimitry Andric         APFloat Val1 = C1->getValueAPF();
372*e8d8bef9SDimitry Andric         Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
373*e8d8bef9SDimitry Andric         Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
374*e8d8bef9SDimitry Andric 
375*e8d8bef9SDimitry Andric         Constant *Folded =
376*e8d8bef9SDimitry Andric             ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
377*e8d8bef9SDimitry Andric                                  ConstantFP::get(II.getContext(), Val1)});
378*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Folded);
379*e8d8bef9SDimitry Andric       }
380*e8d8bef9SDimitry Andric     }
381*e8d8bef9SDimitry Andric 
382*e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
383*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
384*e8d8bef9SDimitry Andric     }
385*e8d8bef9SDimitry Andric 
386*e8d8bef9SDimitry Andric     break;
387*e8d8bef9SDimitry Andric   }
388*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pknorm_i16:
389*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pknorm_u16:
390*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pk_i16:
391*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_cvt_pk_u16: {
392*e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
393*e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
394*e8d8bef9SDimitry Andric 
395*e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
396*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
397*e8d8bef9SDimitry Andric     }
398*e8d8bef9SDimitry Andric 
399*e8d8bef9SDimitry Andric     break;
400*e8d8bef9SDimitry Andric   }
401*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_ubfe:
402*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_sbfe: {
403*e8d8bef9SDimitry Andric     // Decompose simple cases into standard shifts.
404*e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
405*e8d8bef9SDimitry Andric     if (isa<UndefValue>(Src)) {
406*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Src);
407*e8d8bef9SDimitry Andric     }
408*e8d8bef9SDimitry Andric 
409*e8d8bef9SDimitry Andric     unsigned Width;
410*e8d8bef9SDimitry Andric     Type *Ty = II.getType();
411*e8d8bef9SDimitry Andric     unsigned IntSize = Ty->getIntegerBitWidth();
412*e8d8bef9SDimitry Andric 
413*e8d8bef9SDimitry Andric     ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
414*e8d8bef9SDimitry Andric     if (CWidth) {
415*e8d8bef9SDimitry Andric       Width = CWidth->getZExtValue();
416*e8d8bef9SDimitry Andric       if ((Width & (IntSize - 1)) == 0) {
417*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(Ty));
418*e8d8bef9SDimitry Andric       }
419*e8d8bef9SDimitry Andric 
420*e8d8bef9SDimitry Andric       // Hardware ignores high bits, so remove those.
421*e8d8bef9SDimitry Andric       if (Width >= IntSize) {
422*e8d8bef9SDimitry Andric         return IC.replaceOperand(
423*e8d8bef9SDimitry Andric             II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
424*e8d8bef9SDimitry Andric       }
425*e8d8bef9SDimitry Andric     }
426*e8d8bef9SDimitry Andric 
427*e8d8bef9SDimitry Andric     unsigned Offset;
428*e8d8bef9SDimitry Andric     ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
429*e8d8bef9SDimitry Andric     if (COffset) {
430*e8d8bef9SDimitry Andric       Offset = COffset->getZExtValue();
431*e8d8bef9SDimitry Andric       if (Offset >= IntSize) {
432*e8d8bef9SDimitry Andric         return IC.replaceOperand(
433*e8d8bef9SDimitry Andric             II, 1,
434*e8d8bef9SDimitry Andric             ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
435*e8d8bef9SDimitry Andric       }
436*e8d8bef9SDimitry Andric     }
437*e8d8bef9SDimitry Andric 
438*e8d8bef9SDimitry Andric     bool Signed = IID == Intrinsic::amdgcn_sbfe;
439*e8d8bef9SDimitry Andric 
440*e8d8bef9SDimitry Andric     if (!CWidth || !COffset)
441*e8d8bef9SDimitry Andric       break;
442*e8d8bef9SDimitry Andric 
443*e8d8bef9SDimitry Andric     // The case of Width == 0 is handled above, which makes this tranformation
444*e8d8bef9SDimitry Andric     // safe.  If Width == 0, then the ashr and lshr instructions become poison
445*e8d8bef9SDimitry Andric     // value since the shift amount would be equal to the bit size.
446*e8d8bef9SDimitry Andric     assert(Width != 0);
447*e8d8bef9SDimitry Andric 
448*e8d8bef9SDimitry Andric     // TODO: This allows folding to undef when the hardware has specific
449*e8d8bef9SDimitry Andric     // behavior?
450*e8d8bef9SDimitry Andric     if (Offset + Width < IntSize) {
451*e8d8bef9SDimitry Andric       Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
452*e8d8bef9SDimitry Andric       Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
453*e8d8bef9SDimitry Andric                                  : IC.Builder.CreateLShr(Shl, IntSize - Width);
454*e8d8bef9SDimitry Andric       RightShift->takeName(&II);
455*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, RightShift);
456*e8d8bef9SDimitry Andric     }
457*e8d8bef9SDimitry Andric 
458*e8d8bef9SDimitry Andric     Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
459*e8d8bef9SDimitry Andric                                : IC.Builder.CreateLShr(Src, Offset);
460*e8d8bef9SDimitry Andric 
461*e8d8bef9SDimitry Andric     RightShift->takeName(&II);
462*e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, RightShift);
463*e8d8bef9SDimitry Andric   }
464*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_exp:
465*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_exp_compr: {
466*e8d8bef9SDimitry Andric     ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
467*e8d8bef9SDimitry Andric     unsigned EnBits = En->getZExtValue();
468*e8d8bef9SDimitry Andric     if (EnBits == 0xf)
469*e8d8bef9SDimitry Andric       break; // All inputs enabled.
470*e8d8bef9SDimitry Andric 
471*e8d8bef9SDimitry Andric     bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
472*e8d8bef9SDimitry Andric     bool Changed = false;
473*e8d8bef9SDimitry Andric     for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
474*e8d8bef9SDimitry Andric       if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
475*e8d8bef9SDimitry Andric           (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
476*e8d8bef9SDimitry Andric         Value *Src = II.getArgOperand(I + 2);
477*e8d8bef9SDimitry Andric         if (!isa<UndefValue>(Src)) {
478*e8d8bef9SDimitry Andric           IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
479*e8d8bef9SDimitry Andric           Changed = true;
480*e8d8bef9SDimitry Andric         }
481*e8d8bef9SDimitry Andric       }
482*e8d8bef9SDimitry Andric     }
483*e8d8bef9SDimitry Andric 
484*e8d8bef9SDimitry Andric     if (Changed) {
485*e8d8bef9SDimitry Andric       return &II;
486*e8d8bef9SDimitry Andric     }
487*e8d8bef9SDimitry Andric 
488*e8d8bef9SDimitry Andric     break;
489*e8d8bef9SDimitry Andric   }
490*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fmed3: {
491*e8d8bef9SDimitry Andric     // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
492*e8d8bef9SDimitry Andric     // for the shader.
493*e8d8bef9SDimitry Andric 
494*e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
495*e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
496*e8d8bef9SDimitry Andric     Value *Src2 = II.getArgOperand(2);
497*e8d8bef9SDimitry Andric 
498*e8d8bef9SDimitry Andric     // Checking for NaN before canonicalization provides better fidelity when
499*e8d8bef9SDimitry Andric     // mapping other operations onto fmed3 since the order of operands is
500*e8d8bef9SDimitry Andric     // unchanged.
501*e8d8bef9SDimitry Andric     CallInst *NewCall = nullptr;
502*e8d8bef9SDimitry Andric     if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
503*e8d8bef9SDimitry Andric       NewCall = IC.Builder.CreateMinNum(Src1, Src2);
504*e8d8bef9SDimitry Andric     } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
505*e8d8bef9SDimitry Andric       NewCall = IC.Builder.CreateMinNum(Src0, Src2);
506*e8d8bef9SDimitry Andric     } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
507*e8d8bef9SDimitry Andric       NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
508*e8d8bef9SDimitry Andric     }
509*e8d8bef9SDimitry Andric 
510*e8d8bef9SDimitry Andric     if (NewCall) {
511*e8d8bef9SDimitry Andric       NewCall->copyFastMathFlags(&II);
512*e8d8bef9SDimitry Andric       NewCall->takeName(&II);
513*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, NewCall);
514*e8d8bef9SDimitry Andric     }
515*e8d8bef9SDimitry Andric 
516*e8d8bef9SDimitry Andric     bool Swap = false;
517*e8d8bef9SDimitry Andric     // Canonicalize constants to RHS operands.
518*e8d8bef9SDimitry Andric     //
519*e8d8bef9SDimitry Andric     // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
520*e8d8bef9SDimitry Andric     if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
521*e8d8bef9SDimitry Andric       std::swap(Src0, Src1);
522*e8d8bef9SDimitry Andric       Swap = true;
523*e8d8bef9SDimitry Andric     }
524*e8d8bef9SDimitry Andric 
525*e8d8bef9SDimitry Andric     if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
526*e8d8bef9SDimitry Andric       std::swap(Src1, Src2);
527*e8d8bef9SDimitry Andric       Swap = true;
528*e8d8bef9SDimitry Andric     }
529*e8d8bef9SDimitry Andric 
530*e8d8bef9SDimitry Andric     if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
531*e8d8bef9SDimitry Andric       std::swap(Src0, Src1);
532*e8d8bef9SDimitry Andric       Swap = true;
533*e8d8bef9SDimitry Andric     }
534*e8d8bef9SDimitry Andric 
535*e8d8bef9SDimitry Andric     if (Swap) {
536*e8d8bef9SDimitry Andric       II.setArgOperand(0, Src0);
537*e8d8bef9SDimitry Andric       II.setArgOperand(1, Src1);
538*e8d8bef9SDimitry Andric       II.setArgOperand(2, Src2);
539*e8d8bef9SDimitry Andric       return &II;
540*e8d8bef9SDimitry Andric     }
541*e8d8bef9SDimitry Andric 
542*e8d8bef9SDimitry Andric     if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
543*e8d8bef9SDimitry Andric       if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
544*e8d8bef9SDimitry Andric         if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
545*e8d8bef9SDimitry Andric           APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
546*e8d8bef9SDimitry Andric                                        C2->getValueAPF());
547*e8d8bef9SDimitry Andric           return IC.replaceInstUsesWith(
548*e8d8bef9SDimitry Andric               II, ConstantFP::get(IC.Builder.getContext(), Result));
549*e8d8bef9SDimitry Andric         }
550*e8d8bef9SDimitry Andric       }
551*e8d8bef9SDimitry Andric     }
552*e8d8bef9SDimitry Andric 
553*e8d8bef9SDimitry Andric     break;
554*e8d8bef9SDimitry Andric   }
555*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_icmp:
556*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fcmp: {
557*e8d8bef9SDimitry Andric     const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
558*e8d8bef9SDimitry Andric     // Guard against invalid arguments.
559*e8d8bef9SDimitry Andric     int64_t CCVal = CC->getZExtValue();
560*e8d8bef9SDimitry Andric     bool IsInteger = IID == Intrinsic::amdgcn_icmp;
561*e8d8bef9SDimitry Andric     if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
562*e8d8bef9SDimitry Andric                        CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
563*e8d8bef9SDimitry Andric         (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
564*e8d8bef9SDimitry Andric                         CCVal > CmpInst::LAST_FCMP_PREDICATE)))
565*e8d8bef9SDimitry Andric       break;
566*e8d8bef9SDimitry Andric 
567*e8d8bef9SDimitry Andric     Value *Src0 = II.getArgOperand(0);
568*e8d8bef9SDimitry Andric     Value *Src1 = II.getArgOperand(1);
569*e8d8bef9SDimitry Andric 
570*e8d8bef9SDimitry Andric     if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
571*e8d8bef9SDimitry Andric       if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
572*e8d8bef9SDimitry Andric         Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
573*e8d8bef9SDimitry Andric         if (CCmp->isNullValue()) {
574*e8d8bef9SDimitry Andric           return IC.replaceInstUsesWith(
575*e8d8bef9SDimitry Andric               II, ConstantExpr::getSExt(CCmp, II.getType()));
576*e8d8bef9SDimitry Andric         }
577*e8d8bef9SDimitry Andric 
578*e8d8bef9SDimitry Andric         // The result of V_ICMP/V_FCMP assembly instructions (which this
579*e8d8bef9SDimitry Andric         // intrinsic exposes) is one bit per thread, masked with the EXEC
580*e8d8bef9SDimitry Andric         // register (which contains the bitmask of live threads). So a
581*e8d8bef9SDimitry Andric         // comparison that always returns true is the same as a read of the
582*e8d8bef9SDimitry Andric         // EXEC register.
583*e8d8bef9SDimitry Andric         Function *NewF = Intrinsic::getDeclaration(
584*e8d8bef9SDimitry Andric             II.getModule(), Intrinsic::read_register, II.getType());
585*e8d8bef9SDimitry Andric         Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
586*e8d8bef9SDimitry Andric         MDNode *MD = MDNode::get(II.getContext(), MDArgs);
587*e8d8bef9SDimitry Andric         Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
588*e8d8bef9SDimitry Andric         CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
589*e8d8bef9SDimitry Andric         NewCall->addAttribute(AttributeList::FunctionIndex,
590*e8d8bef9SDimitry Andric                               Attribute::Convergent);
591*e8d8bef9SDimitry Andric         NewCall->takeName(&II);
592*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, NewCall);
593*e8d8bef9SDimitry Andric       }
594*e8d8bef9SDimitry Andric 
595*e8d8bef9SDimitry Andric       // Canonicalize constants to RHS.
596*e8d8bef9SDimitry Andric       CmpInst::Predicate SwapPred =
597*e8d8bef9SDimitry Andric           CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
598*e8d8bef9SDimitry Andric       II.setArgOperand(0, Src1);
599*e8d8bef9SDimitry Andric       II.setArgOperand(1, Src0);
600*e8d8bef9SDimitry Andric       II.setArgOperand(
601*e8d8bef9SDimitry Andric           2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
602*e8d8bef9SDimitry Andric       return &II;
603*e8d8bef9SDimitry Andric     }
604*e8d8bef9SDimitry Andric 
605*e8d8bef9SDimitry Andric     if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
606*e8d8bef9SDimitry Andric       break;
607*e8d8bef9SDimitry Andric 
608*e8d8bef9SDimitry Andric     // Canonicalize compare eq with true value to compare != 0
609*e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
610*e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
611*e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
612*e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
613*e8d8bef9SDimitry Andric     Value *ExtSrc;
614*e8d8bef9SDimitry Andric     if (CCVal == CmpInst::ICMP_EQ &&
615*e8d8bef9SDimitry Andric         ((match(Src1, PatternMatch::m_One()) &&
616*e8d8bef9SDimitry Andric           match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
617*e8d8bef9SDimitry Andric          (match(Src1, PatternMatch::m_AllOnes()) &&
618*e8d8bef9SDimitry Andric           match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
619*e8d8bef9SDimitry Andric         ExtSrc->getType()->isIntegerTy(1)) {
620*e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, ConstantInt::getNullValue(Src1->getType()));
621*e8d8bef9SDimitry Andric       IC.replaceOperand(II, 2,
622*e8d8bef9SDimitry Andric                         ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
623*e8d8bef9SDimitry Andric       return &II;
624*e8d8bef9SDimitry Andric     }
625*e8d8bef9SDimitry Andric 
626*e8d8bef9SDimitry Andric     CmpInst::Predicate SrcPred;
627*e8d8bef9SDimitry Andric     Value *SrcLHS;
628*e8d8bef9SDimitry Andric     Value *SrcRHS;
629*e8d8bef9SDimitry Andric 
630*e8d8bef9SDimitry Andric     // Fold compare eq/ne with 0 from a compare result as the predicate to the
631*e8d8bef9SDimitry Andric     // intrinsic. The typical use is a wave vote function in the library, which
632*e8d8bef9SDimitry Andric     // will be fed from a user code condition compared with 0. Fold in the
633*e8d8bef9SDimitry Andric     // redundant compare.
634*e8d8bef9SDimitry Andric 
635*e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
636*e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.[if]cmp(a, b, pred)
637*e8d8bef9SDimitry Andric     //
638*e8d8bef9SDimitry Andric     // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
639*e8d8bef9SDimitry Andric     //   -> llvm.amdgcn.[if]cmp(a, b, inv pred)
640*e8d8bef9SDimitry Andric     if (match(Src1, PatternMatch::m_Zero()) &&
641*e8d8bef9SDimitry Andric         match(Src0, PatternMatch::m_ZExtOrSExt(
642*e8d8bef9SDimitry Andric                         m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
643*e8d8bef9SDimitry Andric                               PatternMatch::m_Value(SrcRHS))))) {
644*e8d8bef9SDimitry Andric       if (CCVal == CmpInst::ICMP_EQ)
645*e8d8bef9SDimitry Andric         SrcPred = CmpInst::getInversePredicate(SrcPred);
646*e8d8bef9SDimitry Andric 
647*e8d8bef9SDimitry Andric       Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
648*e8d8bef9SDimitry Andric                                  ? Intrinsic::amdgcn_fcmp
649*e8d8bef9SDimitry Andric                                  : Intrinsic::amdgcn_icmp;
650*e8d8bef9SDimitry Andric 
651*e8d8bef9SDimitry Andric       Type *Ty = SrcLHS->getType();
652*e8d8bef9SDimitry Andric       if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
653*e8d8bef9SDimitry Andric         // Promote to next legal integer type.
654*e8d8bef9SDimitry Andric         unsigned Width = CmpType->getBitWidth();
655*e8d8bef9SDimitry Andric         unsigned NewWidth = Width;
656*e8d8bef9SDimitry Andric 
657*e8d8bef9SDimitry Andric         // Don't do anything for i1 comparisons.
658*e8d8bef9SDimitry Andric         if (Width == 1)
659*e8d8bef9SDimitry Andric           break;
660*e8d8bef9SDimitry Andric 
661*e8d8bef9SDimitry Andric         if (Width <= 16)
662*e8d8bef9SDimitry Andric           NewWidth = 16;
663*e8d8bef9SDimitry Andric         else if (Width <= 32)
664*e8d8bef9SDimitry Andric           NewWidth = 32;
665*e8d8bef9SDimitry Andric         else if (Width <= 64)
666*e8d8bef9SDimitry Andric           NewWidth = 64;
667*e8d8bef9SDimitry Andric         else if (Width > 64)
668*e8d8bef9SDimitry Andric           break; // Can't handle this.
669*e8d8bef9SDimitry Andric 
670*e8d8bef9SDimitry Andric         if (Width != NewWidth) {
671*e8d8bef9SDimitry Andric           IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
672*e8d8bef9SDimitry Andric           if (CmpInst::isSigned(SrcPred)) {
673*e8d8bef9SDimitry Andric             SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
674*e8d8bef9SDimitry Andric             SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
675*e8d8bef9SDimitry Andric           } else {
676*e8d8bef9SDimitry Andric             SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
677*e8d8bef9SDimitry Andric             SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
678*e8d8bef9SDimitry Andric           }
679*e8d8bef9SDimitry Andric         }
680*e8d8bef9SDimitry Andric       } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
681*e8d8bef9SDimitry Andric         break;
682*e8d8bef9SDimitry Andric 
683*e8d8bef9SDimitry Andric       Function *NewF = Intrinsic::getDeclaration(
684*e8d8bef9SDimitry Andric           II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
685*e8d8bef9SDimitry Andric       Value *Args[] = {SrcLHS, SrcRHS,
686*e8d8bef9SDimitry Andric                        ConstantInt::get(CC->getType(), SrcPred)};
687*e8d8bef9SDimitry Andric       CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
688*e8d8bef9SDimitry Andric       NewCall->takeName(&II);
689*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, NewCall);
690*e8d8bef9SDimitry Andric     }
691*e8d8bef9SDimitry Andric 
692*e8d8bef9SDimitry Andric     break;
693*e8d8bef9SDimitry Andric   }
694*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_ballot: {
695*e8d8bef9SDimitry Andric     if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
696*e8d8bef9SDimitry Andric       if (Src->isZero()) {
697*e8d8bef9SDimitry Andric         // amdgcn.ballot(i1 0) is zero.
698*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
699*e8d8bef9SDimitry Andric       }
700*e8d8bef9SDimitry Andric 
701*e8d8bef9SDimitry Andric       if (Src->isOne()) {
702*e8d8bef9SDimitry Andric         // amdgcn.ballot(i1 1) is exec.
703*e8d8bef9SDimitry Andric         const char *RegName = "exec";
704*e8d8bef9SDimitry Andric         if (II.getType()->isIntegerTy(32))
705*e8d8bef9SDimitry Andric           RegName = "exec_lo";
706*e8d8bef9SDimitry Andric         else if (!II.getType()->isIntegerTy(64))
707*e8d8bef9SDimitry Andric           break;
708*e8d8bef9SDimitry Andric 
709*e8d8bef9SDimitry Andric         Function *NewF = Intrinsic::getDeclaration(
710*e8d8bef9SDimitry Andric             II.getModule(), Intrinsic::read_register, II.getType());
711*e8d8bef9SDimitry Andric         Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)};
712*e8d8bef9SDimitry Andric         MDNode *MD = MDNode::get(II.getContext(), MDArgs);
713*e8d8bef9SDimitry Andric         Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
714*e8d8bef9SDimitry Andric         CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
715*e8d8bef9SDimitry Andric         NewCall->addAttribute(AttributeList::FunctionIndex,
716*e8d8bef9SDimitry Andric                               Attribute::Convergent);
717*e8d8bef9SDimitry Andric         NewCall->takeName(&II);
718*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, NewCall);
719*e8d8bef9SDimitry Andric       }
720*e8d8bef9SDimitry Andric     }
721*e8d8bef9SDimitry Andric     break;
722*e8d8bef9SDimitry Andric   }
723*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_wqm_vote: {
724*e8d8bef9SDimitry Andric     // wqm_vote is identity when the argument is constant.
725*e8d8bef9SDimitry Andric     if (!isa<Constant>(II.getArgOperand(0)))
726*e8d8bef9SDimitry Andric       break;
727*e8d8bef9SDimitry Andric 
728*e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, II.getArgOperand(0));
729*e8d8bef9SDimitry Andric   }
730*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_kill: {
731*e8d8bef9SDimitry Andric     const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
732*e8d8bef9SDimitry Andric     if (!C || !C->getZExtValue())
733*e8d8bef9SDimitry Andric       break;
734*e8d8bef9SDimitry Andric 
735*e8d8bef9SDimitry Andric     // amdgcn.kill(i1 1) is a no-op
736*e8d8bef9SDimitry Andric     return IC.eraseInstFromFunction(II);
737*e8d8bef9SDimitry Andric   }
738*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_update_dpp: {
739*e8d8bef9SDimitry Andric     Value *Old = II.getArgOperand(0);
740*e8d8bef9SDimitry Andric 
741*e8d8bef9SDimitry Andric     auto *BC = cast<ConstantInt>(II.getArgOperand(5));
742*e8d8bef9SDimitry Andric     auto *RM = cast<ConstantInt>(II.getArgOperand(3));
743*e8d8bef9SDimitry Andric     auto *BM = cast<ConstantInt>(II.getArgOperand(4));
744*e8d8bef9SDimitry Andric     if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
745*e8d8bef9SDimitry Andric         BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
746*e8d8bef9SDimitry Andric       break;
747*e8d8bef9SDimitry Andric 
748*e8d8bef9SDimitry Andric     // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
749*e8d8bef9SDimitry Andric     return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
750*e8d8bef9SDimitry Andric   }
751*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_permlane16:
752*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_permlanex16: {
753*e8d8bef9SDimitry Andric     // Discard vdst_in if it's not going to be read.
754*e8d8bef9SDimitry Andric     Value *VDstIn = II.getArgOperand(0);
755*e8d8bef9SDimitry Andric     if (isa<UndefValue>(VDstIn))
756*e8d8bef9SDimitry Andric       break;
757*e8d8bef9SDimitry Andric 
758*e8d8bef9SDimitry Andric     ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
759*e8d8bef9SDimitry Andric     ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
760*e8d8bef9SDimitry Andric     if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
761*e8d8bef9SDimitry Andric       break;
762*e8d8bef9SDimitry Andric 
763*e8d8bef9SDimitry Andric     return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
764*e8d8bef9SDimitry Andric   }
765*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_readfirstlane:
766*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_readlane: {
767*e8d8bef9SDimitry Andric     // A constant value is trivially uniform.
768*e8d8bef9SDimitry Andric     if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
769*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, C);
770*e8d8bef9SDimitry Andric     }
771*e8d8bef9SDimitry Andric 
772*e8d8bef9SDimitry Andric     // The rest of these may not be safe if the exec may not be the same between
773*e8d8bef9SDimitry Andric     // the def and use.
774*e8d8bef9SDimitry Andric     Value *Src = II.getArgOperand(0);
775*e8d8bef9SDimitry Andric     Instruction *SrcInst = dyn_cast<Instruction>(Src);
776*e8d8bef9SDimitry Andric     if (SrcInst && SrcInst->getParent() != II.getParent())
777*e8d8bef9SDimitry Andric       break;
778*e8d8bef9SDimitry Andric 
779*e8d8bef9SDimitry Andric     // readfirstlane (readfirstlane x) -> readfirstlane x
780*e8d8bef9SDimitry Andric     // readlane (readfirstlane x), y -> readfirstlane x
781*e8d8bef9SDimitry Andric     if (match(Src,
782*e8d8bef9SDimitry Andric               PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
783*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Src);
784*e8d8bef9SDimitry Andric     }
785*e8d8bef9SDimitry Andric 
786*e8d8bef9SDimitry Andric     if (IID == Intrinsic::amdgcn_readfirstlane) {
787*e8d8bef9SDimitry Andric       // readfirstlane (readlane x, y) -> readlane x, y
788*e8d8bef9SDimitry Andric       if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
789*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Src);
790*e8d8bef9SDimitry Andric       }
791*e8d8bef9SDimitry Andric     } else {
792*e8d8bef9SDimitry Andric       // readlane (readlane x, y), y -> readlane x, y
793*e8d8bef9SDimitry Andric       if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
794*e8d8bef9SDimitry Andric                          PatternMatch::m_Value(),
795*e8d8bef9SDimitry Andric                          PatternMatch::m_Specific(II.getArgOperand(1))))) {
796*e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Src);
797*e8d8bef9SDimitry Andric       }
798*e8d8bef9SDimitry Andric     }
799*e8d8bef9SDimitry Andric 
800*e8d8bef9SDimitry Andric     break;
801*e8d8bef9SDimitry Andric   }
802*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_ldexp: {
803*e8d8bef9SDimitry Andric     // FIXME: This doesn't introduce new instructions and belongs in
804*e8d8bef9SDimitry Andric     // InstructionSimplify.
805*e8d8bef9SDimitry Andric     Type *Ty = II.getType();
806*e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
807*e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
808*e8d8bef9SDimitry Andric 
809*e8d8bef9SDimitry Andric     // Folding undef to qnan is safe regardless of the FP mode.
810*e8d8bef9SDimitry Andric     if (isa<UndefValue>(Op0)) {
811*e8d8bef9SDimitry Andric       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
812*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, QNaN);
813*e8d8bef9SDimitry Andric     }
814*e8d8bef9SDimitry Andric 
815*e8d8bef9SDimitry Andric     const APFloat *C = nullptr;
816*e8d8bef9SDimitry Andric     match(Op0, PatternMatch::m_APFloat(C));
817*e8d8bef9SDimitry Andric 
818*e8d8bef9SDimitry Andric     // FIXME: Should flush denorms depending on FP mode, but that's ignored
819*e8d8bef9SDimitry Andric     // everywhere else.
820*e8d8bef9SDimitry Andric     //
821*e8d8bef9SDimitry Andric     // These cases should be safe, even with strictfp.
822*e8d8bef9SDimitry Andric     // ldexp(0.0, x) -> 0.0
823*e8d8bef9SDimitry Andric     // ldexp(-0.0, x) -> -0.0
824*e8d8bef9SDimitry Andric     // ldexp(inf, x) -> inf
825*e8d8bef9SDimitry Andric     // ldexp(-inf, x) -> -inf
826*e8d8bef9SDimitry Andric     if (C && (C->isZero() || C->isInfinity())) {
827*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Op0);
828*e8d8bef9SDimitry Andric     }
829*e8d8bef9SDimitry Andric 
830*e8d8bef9SDimitry Andric     // With strictfp, be more careful about possibly needing to flush denormals
831*e8d8bef9SDimitry Andric     // or not, and snan behavior depends on ieee_mode.
832*e8d8bef9SDimitry Andric     if (II.isStrictFP())
833*e8d8bef9SDimitry Andric       break;
834*e8d8bef9SDimitry Andric 
835*e8d8bef9SDimitry Andric     if (C && C->isNaN()) {
836*e8d8bef9SDimitry Andric       // FIXME: We just need to make the nan quiet here, but that's unavailable
837*e8d8bef9SDimitry Andric       // on APFloat, only IEEEfloat
838*e8d8bef9SDimitry Andric       auto *Quieted =
839*e8d8bef9SDimitry Andric           ConstantFP::get(Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven));
840*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Quieted);
841*e8d8bef9SDimitry Andric     }
842*e8d8bef9SDimitry Andric 
843*e8d8bef9SDimitry Andric     // ldexp(x, 0) -> x
844*e8d8bef9SDimitry Andric     // ldexp(x, undef) -> x
845*e8d8bef9SDimitry Andric     if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
846*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Op0);
847*e8d8bef9SDimitry Andric     }
848*e8d8bef9SDimitry Andric 
849*e8d8bef9SDimitry Andric     break;
850*e8d8bef9SDimitry Andric   }
851*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fmul_legacy: {
852*e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
853*e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
854*e8d8bef9SDimitry Andric 
855*e8d8bef9SDimitry Andric     // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
856*e8d8bef9SDimitry Andric     // infinity, gives +0.0.
857*e8d8bef9SDimitry Andric     // TODO: Move to InstSimplify?
858*e8d8bef9SDimitry Andric     if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
859*e8d8bef9SDimitry Andric         match(Op1, PatternMatch::m_AnyZeroFP()))
860*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType()));
861*e8d8bef9SDimitry Andric 
862*e8d8bef9SDimitry Andric     // If we can prove we don't have one of the special cases then we can use a
863*e8d8bef9SDimitry Andric     // normal fmul instruction instead.
864*e8d8bef9SDimitry Andric     if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
865*e8d8bef9SDimitry Andric       auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
866*e8d8bef9SDimitry Andric       FMul->takeName(&II);
867*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, FMul);
868*e8d8bef9SDimitry Andric     }
869*e8d8bef9SDimitry Andric     break;
870*e8d8bef9SDimitry Andric   }
871*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_fma_legacy: {
872*e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
873*e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
874*e8d8bef9SDimitry Andric     Value *Op2 = II.getArgOperand(2);
875*e8d8bef9SDimitry Andric 
876*e8d8bef9SDimitry Andric     // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
877*e8d8bef9SDimitry Andric     // infinity, gives +0.0.
878*e8d8bef9SDimitry Andric     // TODO: Move to InstSimplify?
879*e8d8bef9SDimitry Andric     if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
880*e8d8bef9SDimitry Andric         match(Op1, PatternMatch::m_AnyZeroFP())) {
881*e8d8bef9SDimitry Andric       // It's tempting to just return Op2 here, but that would give the wrong
882*e8d8bef9SDimitry Andric       // result if Op2 was -0.0.
883*e8d8bef9SDimitry Andric       auto *Zero = ConstantFP::getNullValue(II.getType());
884*e8d8bef9SDimitry Andric       auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
885*e8d8bef9SDimitry Andric       FAdd->takeName(&II);
886*e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, FAdd);
887*e8d8bef9SDimitry Andric     }
888*e8d8bef9SDimitry Andric 
889*e8d8bef9SDimitry Andric     // If we can prove we don't have one of the special cases then we can use a
890*e8d8bef9SDimitry Andric     // normal fma instead.
891*e8d8bef9SDimitry Andric     if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
892*e8d8bef9SDimitry Andric       II.setCalledOperand(Intrinsic::getDeclaration(
893*e8d8bef9SDimitry Andric           II.getModule(), Intrinsic::fma, II.getType()));
894*e8d8bef9SDimitry Andric       return &II;
895*e8d8bef9SDimitry Andric     }
896*e8d8bef9SDimitry Andric     break;
897*e8d8bef9SDimitry Andric   }
898*e8d8bef9SDimitry Andric   default: {
899*e8d8bef9SDimitry Andric     if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
900*e8d8bef9SDimitry Andric             AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
901*e8d8bef9SDimitry Andric       return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
902*e8d8bef9SDimitry Andric     }
903*e8d8bef9SDimitry Andric   }
904*e8d8bef9SDimitry Andric   }
905*e8d8bef9SDimitry Andric   return None;
906*e8d8bef9SDimitry Andric }
907*e8d8bef9SDimitry Andric 
908*e8d8bef9SDimitry Andric /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
909*e8d8bef9SDimitry Andric ///
910*e8d8bef9SDimitry Andric /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
911*e8d8bef9SDimitry Andric ///       struct returns.
912*e8d8bef9SDimitry Andric static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
913*e8d8bef9SDimitry Andric                                                     IntrinsicInst &II,
914*e8d8bef9SDimitry Andric                                                     APInt DemandedElts,
915*e8d8bef9SDimitry Andric                                                     int DMaskIdx = -1) {
916*e8d8bef9SDimitry Andric 
917*e8d8bef9SDimitry Andric   auto *IIVTy = cast<FixedVectorType>(II.getType());
918*e8d8bef9SDimitry Andric   unsigned VWidth = IIVTy->getNumElements();
919*e8d8bef9SDimitry Andric   if (VWidth == 1)
920*e8d8bef9SDimitry Andric     return nullptr;
921*e8d8bef9SDimitry Andric 
922*e8d8bef9SDimitry Andric   IRBuilderBase::InsertPointGuard Guard(IC.Builder);
923*e8d8bef9SDimitry Andric   IC.Builder.SetInsertPoint(&II);
924*e8d8bef9SDimitry Andric 
925*e8d8bef9SDimitry Andric   // Assume the arguments are unchanged and later override them, if needed.
926*e8d8bef9SDimitry Andric   SmallVector<Value *, 16> Args(II.args());
927*e8d8bef9SDimitry Andric 
928*e8d8bef9SDimitry Andric   if (DMaskIdx < 0) {
929*e8d8bef9SDimitry Andric     // Buffer case.
930*e8d8bef9SDimitry Andric 
931*e8d8bef9SDimitry Andric     const unsigned ActiveBits = DemandedElts.getActiveBits();
932*e8d8bef9SDimitry Andric     const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
933*e8d8bef9SDimitry Andric 
934*e8d8bef9SDimitry Andric     // Start assuming the prefix of elements is demanded, but possibly clear
935*e8d8bef9SDimitry Andric     // some other bits if there are trailing zeros (unused components at front)
936*e8d8bef9SDimitry Andric     // and update offset.
937*e8d8bef9SDimitry Andric     DemandedElts = (1 << ActiveBits) - 1;
938*e8d8bef9SDimitry Andric 
939*e8d8bef9SDimitry Andric     if (UnusedComponentsAtFront > 0) {
940*e8d8bef9SDimitry Andric       static const unsigned InvalidOffsetIdx = 0xf;
941*e8d8bef9SDimitry Andric 
942*e8d8bef9SDimitry Andric       unsigned OffsetIdx;
943*e8d8bef9SDimitry Andric       switch (II.getIntrinsicID()) {
944*e8d8bef9SDimitry Andric       case Intrinsic::amdgcn_raw_buffer_load:
945*e8d8bef9SDimitry Andric         OffsetIdx = 1;
946*e8d8bef9SDimitry Andric         break;
947*e8d8bef9SDimitry Andric       case Intrinsic::amdgcn_s_buffer_load:
948*e8d8bef9SDimitry Andric         // If resulting type is vec3, there is no point in trimming the
949*e8d8bef9SDimitry Andric         // load with updated offset, as the vec3 would most likely be widened to
950*e8d8bef9SDimitry Andric         // vec4 anyway during lowering.
951*e8d8bef9SDimitry Andric         if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
952*e8d8bef9SDimitry Andric           OffsetIdx = InvalidOffsetIdx;
953*e8d8bef9SDimitry Andric         else
954*e8d8bef9SDimitry Andric           OffsetIdx = 1;
955*e8d8bef9SDimitry Andric         break;
956*e8d8bef9SDimitry Andric       case Intrinsic::amdgcn_struct_buffer_load:
957*e8d8bef9SDimitry Andric         OffsetIdx = 2;
958*e8d8bef9SDimitry Andric         break;
959*e8d8bef9SDimitry Andric       default:
960*e8d8bef9SDimitry Andric         // TODO: handle tbuffer* intrinsics.
961*e8d8bef9SDimitry Andric         OffsetIdx = InvalidOffsetIdx;
962*e8d8bef9SDimitry Andric         break;
963*e8d8bef9SDimitry Andric       }
964*e8d8bef9SDimitry Andric 
965*e8d8bef9SDimitry Andric       if (OffsetIdx != InvalidOffsetIdx) {
966*e8d8bef9SDimitry Andric         // Clear demanded bits and update the offset.
967*e8d8bef9SDimitry Andric         DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
968*e8d8bef9SDimitry Andric         auto *Offset = II.getArgOperand(OffsetIdx);
969*e8d8bef9SDimitry Andric         unsigned SingleComponentSizeInBits =
970*e8d8bef9SDimitry Andric             IC.getDataLayout().getTypeSizeInBits(II.getType()->getScalarType());
971*e8d8bef9SDimitry Andric         unsigned OffsetAdd =
972*e8d8bef9SDimitry Andric             UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
973*e8d8bef9SDimitry Andric         auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
974*e8d8bef9SDimitry Andric         Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
975*e8d8bef9SDimitry Andric       }
976*e8d8bef9SDimitry Andric     }
977*e8d8bef9SDimitry Andric   } else {
978*e8d8bef9SDimitry Andric     // Image case.
979*e8d8bef9SDimitry Andric 
980*e8d8bef9SDimitry Andric     ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx));
981*e8d8bef9SDimitry Andric     unsigned DMaskVal = DMask->getZExtValue() & 0xf;
982*e8d8bef9SDimitry Andric 
983*e8d8bef9SDimitry Andric     // Mask off values that are undefined because the dmask doesn't cover them
984*e8d8bef9SDimitry Andric     DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
985*e8d8bef9SDimitry Andric 
986*e8d8bef9SDimitry Andric     unsigned NewDMaskVal = 0;
987*e8d8bef9SDimitry Andric     unsigned OrigLoadIdx = 0;
988*e8d8bef9SDimitry Andric     for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
989*e8d8bef9SDimitry Andric       const unsigned Bit = 1 << SrcIdx;
990*e8d8bef9SDimitry Andric       if (!!(DMaskVal & Bit)) {
991*e8d8bef9SDimitry Andric         if (!!DemandedElts[OrigLoadIdx])
992*e8d8bef9SDimitry Andric           NewDMaskVal |= Bit;
993*e8d8bef9SDimitry Andric         OrigLoadIdx++;
994*e8d8bef9SDimitry Andric       }
995*e8d8bef9SDimitry Andric     }
996*e8d8bef9SDimitry Andric 
997*e8d8bef9SDimitry Andric     if (DMaskVal != NewDMaskVal)
998*e8d8bef9SDimitry Andric       Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
999*e8d8bef9SDimitry Andric   }
1000*e8d8bef9SDimitry Andric 
1001*e8d8bef9SDimitry Andric   unsigned NewNumElts = DemandedElts.countPopulation();
1002*e8d8bef9SDimitry Andric   if (!NewNumElts)
1003*e8d8bef9SDimitry Andric     return UndefValue::get(II.getType());
1004*e8d8bef9SDimitry Andric 
1005*e8d8bef9SDimitry Andric   if (NewNumElts >= VWidth && DemandedElts.isMask()) {
1006*e8d8bef9SDimitry Andric     if (DMaskIdx >= 0)
1007*e8d8bef9SDimitry Andric       II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1008*e8d8bef9SDimitry Andric     return nullptr;
1009*e8d8bef9SDimitry Andric   }
1010*e8d8bef9SDimitry Andric 
1011*e8d8bef9SDimitry Andric   // Validate function argument and return types, extracting overloaded types
1012*e8d8bef9SDimitry Andric   // along the way.
1013*e8d8bef9SDimitry Andric   SmallVector<Type *, 6> OverloadTys;
1014*e8d8bef9SDimitry Andric   if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys))
1015*e8d8bef9SDimitry Andric     return nullptr;
1016*e8d8bef9SDimitry Andric 
1017*e8d8bef9SDimitry Andric   Module *M = II.getParent()->getParent()->getParent();
1018*e8d8bef9SDimitry Andric   Type *EltTy = IIVTy->getElementType();
1019*e8d8bef9SDimitry Andric   Type *NewTy =
1020*e8d8bef9SDimitry Andric       (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
1021*e8d8bef9SDimitry Andric 
1022*e8d8bef9SDimitry Andric   OverloadTys[0] = NewTy;
1023*e8d8bef9SDimitry Andric   Function *NewIntrin =
1024*e8d8bef9SDimitry Andric       Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys);
1025*e8d8bef9SDimitry Andric 
1026*e8d8bef9SDimitry Andric   CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
1027*e8d8bef9SDimitry Andric   NewCall->takeName(&II);
1028*e8d8bef9SDimitry Andric   NewCall->copyMetadata(II);
1029*e8d8bef9SDimitry Andric 
1030*e8d8bef9SDimitry Andric   if (NewNumElts == 1) {
1031*e8d8bef9SDimitry Andric     return IC.Builder.CreateInsertElement(UndefValue::get(II.getType()),
1032*e8d8bef9SDimitry Andric                                           NewCall,
1033*e8d8bef9SDimitry Andric                                           DemandedElts.countTrailingZeros());
1034*e8d8bef9SDimitry Andric   }
1035*e8d8bef9SDimitry Andric 
1036*e8d8bef9SDimitry Andric   SmallVector<int, 8> EltMask;
1037*e8d8bef9SDimitry Andric   unsigned NewLoadIdx = 0;
1038*e8d8bef9SDimitry Andric   for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1039*e8d8bef9SDimitry Andric     if (!!DemandedElts[OrigLoadIdx])
1040*e8d8bef9SDimitry Andric       EltMask.push_back(NewLoadIdx++);
1041*e8d8bef9SDimitry Andric     else
1042*e8d8bef9SDimitry Andric       EltMask.push_back(NewNumElts);
1043*e8d8bef9SDimitry Andric   }
1044*e8d8bef9SDimitry Andric 
1045*e8d8bef9SDimitry Andric   Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
1046*e8d8bef9SDimitry Andric 
1047*e8d8bef9SDimitry Andric   return Shuffle;
1048*e8d8bef9SDimitry Andric }
1049*e8d8bef9SDimitry Andric 
1050*e8d8bef9SDimitry Andric Optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
1051*e8d8bef9SDimitry Andric     InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1052*e8d8bef9SDimitry Andric     APInt &UndefElts2, APInt &UndefElts3,
1053*e8d8bef9SDimitry Andric     std::function<void(Instruction *, unsigned, APInt, APInt &)>
1054*e8d8bef9SDimitry Andric         SimplifyAndSetOp) const {
1055*e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
1056*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_buffer_load:
1057*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_buffer_load_format:
1058*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_raw_buffer_load:
1059*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_raw_buffer_load_format:
1060*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_raw_tbuffer_load:
1061*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_s_buffer_load:
1062*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_struct_buffer_load:
1063*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_struct_buffer_load_format:
1064*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_struct_tbuffer_load:
1065*e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_tbuffer_load:
1066*e8d8bef9SDimitry Andric     return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
1067*e8d8bef9SDimitry Andric   default: {
1068*e8d8bef9SDimitry Andric     if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
1069*e8d8bef9SDimitry Andric       return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
1070*e8d8bef9SDimitry Andric     }
1071*e8d8bef9SDimitry Andric     break;
1072*e8d8bef9SDimitry Andric   }
1073*e8d8bef9SDimitry Andric   }
1074*e8d8bef9SDimitry Andric   return None;
1075*e8d8bef9SDimitry Andric }
1076