1*e8d8bef9SDimitry Andric //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===// 2*e8d8bef9SDimitry Andric // 3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*e8d8bef9SDimitry Andric // 7*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8*e8d8bef9SDimitry Andric // 9*e8d8bef9SDimitry Andric // \file 10*e8d8bef9SDimitry Andric // This file implements a TargetTransformInfo analysis pass specific to the 11*e8d8bef9SDimitry Andric // AMDGPU target machine. It uses the target's detailed information to provide 12*e8d8bef9SDimitry Andric // more precise answers to certain TTI queries, while letting the target 13*e8d8bef9SDimitry Andric // independent and default TTI implementations handle the rest. 14*e8d8bef9SDimitry Andric // 15*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 16*e8d8bef9SDimitry Andric 17*e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h" 18*e8d8bef9SDimitry Andric #include "AMDGPUTargetTransformInfo.h" 19*e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 20*e8d8bef9SDimitry Andric #include "R600Subtarget.h" 21*e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 22*e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h" 23*e8d8bef9SDimitry Andric 24*e8d8bef9SDimitry Andric using namespace llvm; 25*e8d8bef9SDimitry Andric 26*e8d8bef9SDimitry Andric #define DEBUG_TYPE "AMDGPUtti" 27*e8d8bef9SDimitry Andric 28*e8d8bef9SDimitry Andric namespace { 29*e8d8bef9SDimitry Andric 30*e8d8bef9SDimitry Andric struct AMDGPUImageDMaskIntrinsic { 31*e8d8bef9SDimitry Andric unsigned Intr; 32*e8d8bef9SDimitry Andric }; 33*e8d8bef9SDimitry Andric 34*e8d8bef9SDimitry Andric #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL 35*e8d8bef9SDimitry Andric #include "InstCombineTables.inc" 36*e8d8bef9SDimitry Andric 37*e8d8bef9SDimitry Andric } // end anonymous namespace 38*e8d8bef9SDimitry Andric 39*e8d8bef9SDimitry Andric // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs. 40*e8d8bef9SDimitry Andric // 41*e8d8bef9SDimitry Andric // A single NaN input is folded to minnum, so we rely on that folding for 42*e8d8bef9SDimitry Andric // handling NaNs. 43*e8d8bef9SDimitry Andric static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, 44*e8d8bef9SDimitry Andric const APFloat &Src2) { 45*e8d8bef9SDimitry Andric APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2); 46*e8d8bef9SDimitry Andric 47*e8d8bef9SDimitry Andric APFloat::cmpResult Cmp0 = Max3.compare(Src0); 48*e8d8bef9SDimitry Andric assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately"); 49*e8d8bef9SDimitry Andric if (Cmp0 == APFloat::cmpEqual) 50*e8d8bef9SDimitry Andric return maxnum(Src1, Src2); 51*e8d8bef9SDimitry Andric 52*e8d8bef9SDimitry Andric APFloat::cmpResult Cmp1 = Max3.compare(Src1); 53*e8d8bef9SDimitry Andric assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately"); 54*e8d8bef9SDimitry Andric if (Cmp1 == APFloat::cmpEqual) 55*e8d8bef9SDimitry Andric return maxnum(Src0, Src2); 56*e8d8bef9SDimitry Andric 57*e8d8bef9SDimitry Andric return maxnum(Src0, Src1); 58*e8d8bef9SDimitry Andric } 59*e8d8bef9SDimitry Andric 60*e8d8bef9SDimitry Andric // Check if a value can be converted to a 16-bit value without losing 61*e8d8bef9SDimitry Andric // precision. 62*e8d8bef9SDimitry Andric static bool canSafelyConvertTo16Bit(Value &V) { 63*e8d8bef9SDimitry Andric Type *VTy = V.getType(); 64*e8d8bef9SDimitry Andric if (VTy->isHalfTy() || VTy->isIntegerTy(16)) { 65*e8d8bef9SDimitry Andric // The value is already 16-bit, so we don't want to convert to 16-bit again! 66*e8d8bef9SDimitry Andric return false; 67*e8d8bef9SDimitry Andric } 68*e8d8bef9SDimitry Andric if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) { 69*e8d8bef9SDimitry Andric // We need to check that if we cast the index down to a half, we do not lose 70*e8d8bef9SDimitry Andric // precision. 71*e8d8bef9SDimitry Andric APFloat FloatValue(ConstFloat->getValueAPF()); 72*e8d8bef9SDimitry Andric bool LosesInfo = true; 73*e8d8bef9SDimitry Andric FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo); 74*e8d8bef9SDimitry Andric return !LosesInfo; 75*e8d8bef9SDimitry Andric } 76*e8d8bef9SDimitry Andric Value *CastSrc; 77*e8d8bef9SDimitry Andric if (match(&V, m_FPExt(PatternMatch::m_Value(CastSrc))) || 78*e8d8bef9SDimitry Andric match(&V, m_SExt(PatternMatch::m_Value(CastSrc))) || 79*e8d8bef9SDimitry Andric match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)))) { 80*e8d8bef9SDimitry Andric Type *CastSrcTy = CastSrc->getType(); 81*e8d8bef9SDimitry Andric if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16)) 82*e8d8bef9SDimitry Andric return true; 83*e8d8bef9SDimitry Andric } 84*e8d8bef9SDimitry Andric 85*e8d8bef9SDimitry Andric return false; 86*e8d8bef9SDimitry Andric } 87*e8d8bef9SDimitry Andric 88*e8d8bef9SDimitry Andric // Convert a value to 16-bit. 89*e8d8bef9SDimitry Andric static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) { 90*e8d8bef9SDimitry Andric Type *VTy = V.getType(); 91*e8d8bef9SDimitry Andric if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V)) 92*e8d8bef9SDimitry Andric return cast<Instruction>(&V)->getOperand(0); 93*e8d8bef9SDimitry Andric if (VTy->isIntegerTy()) 94*e8d8bef9SDimitry Andric return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false); 95*e8d8bef9SDimitry Andric if (VTy->isFloatingPointTy()) 96*e8d8bef9SDimitry Andric return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext())); 97*e8d8bef9SDimitry Andric 98*e8d8bef9SDimitry Andric llvm_unreachable("Should never be called!"); 99*e8d8bef9SDimitry Andric } 100*e8d8bef9SDimitry Andric 101*e8d8bef9SDimitry Andric static Optional<Instruction *> 102*e8d8bef9SDimitry Andric simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, 103*e8d8bef9SDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, 104*e8d8bef9SDimitry Andric IntrinsicInst &II, InstCombiner &IC) { 105*e8d8bef9SDimitry Andric if (!ST->hasA16() && !ST->hasG16()) 106*e8d8bef9SDimitry Andric return None; 107*e8d8bef9SDimitry Andric 108*e8d8bef9SDimitry Andric bool FloatCoord = false; 109*e8d8bef9SDimitry Andric // true means derivatives can be converted to 16 bit, coordinates not 110*e8d8bef9SDimitry Andric bool OnlyDerivatives = false; 111*e8d8bef9SDimitry Andric 112*e8d8bef9SDimitry Andric for (unsigned OperandIndex = ImageDimIntr->GradientStart; 113*e8d8bef9SDimitry Andric OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) { 114*e8d8bef9SDimitry Andric Value *Coord = II.getOperand(OperandIndex); 115*e8d8bef9SDimitry Andric // If the values are not derived from 16-bit values, we cannot optimize. 116*e8d8bef9SDimitry Andric if (!canSafelyConvertTo16Bit(*Coord)) { 117*e8d8bef9SDimitry Andric if (OperandIndex < ImageDimIntr->CoordStart || 118*e8d8bef9SDimitry Andric ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) { 119*e8d8bef9SDimitry Andric return None; 120*e8d8bef9SDimitry Andric } 121*e8d8bef9SDimitry Andric // All gradients can be converted, so convert only them 122*e8d8bef9SDimitry Andric OnlyDerivatives = true; 123*e8d8bef9SDimitry Andric break; 124*e8d8bef9SDimitry Andric } 125*e8d8bef9SDimitry Andric 126*e8d8bef9SDimitry Andric assert(OperandIndex == ImageDimIntr->GradientStart || 127*e8d8bef9SDimitry Andric FloatCoord == Coord->getType()->isFloatingPointTy()); 128*e8d8bef9SDimitry Andric FloatCoord = Coord->getType()->isFloatingPointTy(); 129*e8d8bef9SDimitry Andric } 130*e8d8bef9SDimitry Andric 131*e8d8bef9SDimitry Andric if (OnlyDerivatives) { 132*e8d8bef9SDimitry Andric if (!ST->hasG16()) 133*e8d8bef9SDimitry Andric return None; 134*e8d8bef9SDimitry Andric } else { 135*e8d8bef9SDimitry Andric if (!ST->hasA16()) 136*e8d8bef9SDimitry Andric OnlyDerivatives = true; // Only supports G16 137*e8d8bef9SDimitry Andric } 138*e8d8bef9SDimitry Andric 139*e8d8bef9SDimitry Andric Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext()) 140*e8d8bef9SDimitry Andric : Type::getInt16Ty(II.getContext()); 141*e8d8bef9SDimitry Andric 142*e8d8bef9SDimitry Andric SmallVector<Type *, 4> ArgTys; 143*e8d8bef9SDimitry Andric if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys)) 144*e8d8bef9SDimitry Andric return None; 145*e8d8bef9SDimitry Andric 146*e8d8bef9SDimitry Andric ArgTys[ImageDimIntr->GradientTyArg] = CoordType; 147*e8d8bef9SDimitry Andric if (!OnlyDerivatives) 148*e8d8bef9SDimitry Andric ArgTys[ImageDimIntr->CoordTyArg] = CoordType; 149*e8d8bef9SDimitry Andric Function *I = 150*e8d8bef9SDimitry Andric Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys); 151*e8d8bef9SDimitry Andric 152*e8d8bef9SDimitry Andric SmallVector<Value *, 8> Args(II.arg_operands()); 153*e8d8bef9SDimitry Andric 154*e8d8bef9SDimitry Andric unsigned EndIndex = 155*e8d8bef9SDimitry Andric OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd; 156*e8d8bef9SDimitry Andric for (unsigned OperandIndex = ImageDimIntr->GradientStart; 157*e8d8bef9SDimitry Andric OperandIndex < EndIndex; OperandIndex++) { 158*e8d8bef9SDimitry Andric Args[OperandIndex] = 159*e8d8bef9SDimitry Andric convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder); 160*e8d8bef9SDimitry Andric } 161*e8d8bef9SDimitry Andric 162*e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(I, Args); 163*e8d8bef9SDimitry Andric NewCall->takeName(&II); 164*e8d8bef9SDimitry Andric NewCall->copyMetadata(II); 165*e8d8bef9SDimitry Andric if (isa<FPMathOperator>(NewCall)) 166*e8d8bef9SDimitry Andric NewCall->copyFastMathFlags(&II); 167*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 168*e8d8bef9SDimitry Andric } 169*e8d8bef9SDimitry Andric 170*e8d8bef9SDimitry Andric bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, 171*e8d8bef9SDimitry Andric InstCombiner &IC) const { 172*e8d8bef9SDimitry Andric // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or 173*e8d8bef9SDimitry Andric // infinity, gives +0.0. If we can prove we don't have one of the special 174*e8d8bef9SDimitry Andric // cases then we can use a normal multiply instead. 175*e8d8bef9SDimitry Andric // TODO: Create and use isKnownFiniteNonZero instead of just matching 176*e8d8bef9SDimitry Andric // constants here. 177*e8d8bef9SDimitry Andric if (match(Op0, PatternMatch::m_FiniteNonZero()) || 178*e8d8bef9SDimitry Andric match(Op1, PatternMatch::m_FiniteNonZero())) { 179*e8d8bef9SDimitry Andric // One operand is not zero or infinity or NaN. 180*e8d8bef9SDimitry Andric return true; 181*e8d8bef9SDimitry Andric } 182*e8d8bef9SDimitry Andric auto *TLI = &IC.getTargetLibraryInfo(); 183*e8d8bef9SDimitry Andric if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) && 184*e8d8bef9SDimitry Andric isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) { 185*e8d8bef9SDimitry Andric // Neither operand is infinity or NaN. 186*e8d8bef9SDimitry Andric return true; 187*e8d8bef9SDimitry Andric } 188*e8d8bef9SDimitry Andric return false; 189*e8d8bef9SDimitry Andric } 190*e8d8bef9SDimitry Andric 191*e8d8bef9SDimitry Andric Optional<Instruction *> 192*e8d8bef9SDimitry Andric GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { 193*e8d8bef9SDimitry Andric Intrinsic::ID IID = II.getIntrinsicID(); 194*e8d8bef9SDimitry Andric switch (IID) { 195*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_rcp: { 196*e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 197*e8d8bef9SDimitry Andric 198*e8d8bef9SDimitry Andric // TODO: Move to ConstantFolding/InstSimplify? 199*e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 200*e8d8bef9SDimitry Andric Type *Ty = II.getType(); 201*e8d8bef9SDimitry Andric auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); 202*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, QNaN); 203*e8d8bef9SDimitry Andric } 204*e8d8bef9SDimitry Andric 205*e8d8bef9SDimitry Andric if (II.isStrictFP()) 206*e8d8bef9SDimitry Andric break; 207*e8d8bef9SDimitry Andric 208*e8d8bef9SDimitry Andric if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) { 209*e8d8bef9SDimitry Andric const APFloat &ArgVal = C->getValueAPF(); 210*e8d8bef9SDimitry Andric APFloat Val(ArgVal.getSemantics(), 1); 211*e8d8bef9SDimitry Andric Val.divide(ArgVal, APFloat::rmNearestTiesToEven); 212*e8d8bef9SDimitry Andric 213*e8d8bef9SDimitry Andric // This is more precise than the instruction may give. 214*e8d8bef9SDimitry Andric // 215*e8d8bef9SDimitry Andric // TODO: The instruction always flushes denormal results (except for f16), 216*e8d8bef9SDimitry Andric // should this also? 217*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val)); 218*e8d8bef9SDimitry Andric } 219*e8d8bef9SDimitry Andric 220*e8d8bef9SDimitry Andric break; 221*e8d8bef9SDimitry Andric } 222*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_rsq: { 223*e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 224*e8d8bef9SDimitry Andric 225*e8d8bef9SDimitry Andric // TODO: Move to ConstantFolding/InstSimplify? 226*e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 227*e8d8bef9SDimitry Andric Type *Ty = II.getType(); 228*e8d8bef9SDimitry Andric auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); 229*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, QNaN); 230*e8d8bef9SDimitry Andric } 231*e8d8bef9SDimitry Andric 232*e8d8bef9SDimitry Andric break; 233*e8d8bef9SDimitry Andric } 234*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_frexp_mant: 235*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_frexp_exp: { 236*e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 237*e8d8bef9SDimitry Andric if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) { 238*e8d8bef9SDimitry Andric int Exp; 239*e8d8bef9SDimitry Andric APFloat Significand = 240*e8d8bef9SDimitry Andric frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven); 241*e8d8bef9SDimitry Andric 242*e8d8bef9SDimitry Andric if (IID == Intrinsic::amdgcn_frexp_mant) { 243*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith( 244*e8d8bef9SDimitry Andric II, ConstantFP::get(II.getContext(), Significand)); 245*e8d8bef9SDimitry Andric } 246*e8d8bef9SDimitry Andric 247*e8d8bef9SDimitry Andric // Match instruction special case behavior. 248*e8d8bef9SDimitry Andric if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf) 249*e8d8bef9SDimitry Andric Exp = 0; 250*e8d8bef9SDimitry Andric 251*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp)); 252*e8d8bef9SDimitry Andric } 253*e8d8bef9SDimitry Andric 254*e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 255*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 256*e8d8bef9SDimitry Andric } 257*e8d8bef9SDimitry Andric 258*e8d8bef9SDimitry Andric break; 259*e8d8bef9SDimitry Andric } 260*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_class: { 261*e8d8bef9SDimitry Andric enum { 262*e8d8bef9SDimitry Andric S_NAN = 1 << 0, // Signaling NaN 263*e8d8bef9SDimitry Andric Q_NAN = 1 << 1, // Quiet NaN 264*e8d8bef9SDimitry Andric N_INFINITY = 1 << 2, // Negative infinity 265*e8d8bef9SDimitry Andric N_NORMAL = 1 << 3, // Negative normal 266*e8d8bef9SDimitry Andric N_SUBNORMAL = 1 << 4, // Negative subnormal 267*e8d8bef9SDimitry Andric N_ZERO = 1 << 5, // Negative zero 268*e8d8bef9SDimitry Andric P_ZERO = 1 << 6, // Positive zero 269*e8d8bef9SDimitry Andric P_SUBNORMAL = 1 << 7, // Positive subnormal 270*e8d8bef9SDimitry Andric P_NORMAL = 1 << 8, // Positive normal 271*e8d8bef9SDimitry Andric P_INFINITY = 1 << 9 // Positive infinity 272*e8d8bef9SDimitry Andric }; 273*e8d8bef9SDimitry Andric 274*e8d8bef9SDimitry Andric const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL | 275*e8d8bef9SDimitry Andric N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL | 276*e8d8bef9SDimitry Andric P_NORMAL | P_INFINITY; 277*e8d8bef9SDimitry Andric 278*e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 279*e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 280*e8d8bef9SDimitry Andric const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1); 281*e8d8bef9SDimitry Andric if (!CMask) { 282*e8d8bef9SDimitry Andric if (isa<UndefValue>(Src0)) { 283*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 284*e8d8bef9SDimitry Andric } 285*e8d8bef9SDimitry Andric 286*e8d8bef9SDimitry Andric if (isa<UndefValue>(Src1)) { 287*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 288*e8d8bef9SDimitry Andric ConstantInt::get(II.getType(), false)); 289*e8d8bef9SDimitry Andric } 290*e8d8bef9SDimitry Andric break; 291*e8d8bef9SDimitry Andric } 292*e8d8bef9SDimitry Andric 293*e8d8bef9SDimitry Andric uint32_t Mask = CMask->getZExtValue(); 294*e8d8bef9SDimitry Andric 295*e8d8bef9SDimitry Andric // If all tests are made, it doesn't matter what the value is. 296*e8d8bef9SDimitry Andric if ((Mask & FullMask) == FullMask) { 297*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true)); 298*e8d8bef9SDimitry Andric } 299*e8d8bef9SDimitry Andric 300*e8d8bef9SDimitry Andric if ((Mask & FullMask) == 0) { 301*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false)); 302*e8d8bef9SDimitry Andric } 303*e8d8bef9SDimitry Andric 304*e8d8bef9SDimitry Andric if (Mask == (S_NAN | Q_NAN)) { 305*e8d8bef9SDimitry Andric // Equivalent of isnan. Replace with standard fcmp. 306*e8d8bef9SDimitry Andric Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0); 307*e8d8bef9SDimitry Andric FCmp->takeName(&II); 308*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, FCmp); 309*e8d8bef9SDimitry Andric } 310*e8d8bef9SDimitry Andric 311*e8d8bef9SDimitry Andric if (Mask == (N_ZERO | P_ZERO)) { 312*e8d8bef9SDimitry Andric // Equivalent of == 0. 313*e8d8bef9SDimitry Andric Value *FCmp = 314*e8d8bef9SDimitry Andric IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0)); 315*e8d8bef9SDimitry Andric 316*e8d8bef9SDimitry Andric FCmp->takeName(&II); 317*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, FCmp); 318*e8d8bef9SDimitry Andric } 319*e8d8bef9SDimitry Andric 320*e8d8bef9SDimitry Andric // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other 321*e8d8bef9SDimitry Andric if (((Mask & S_NAN) || (Mask & Q_NAN)) && 322*e8d8bef9SDimitry Andric isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) { 323*e8d8bef9SDimitry Andric return IC.replaceOperand( 324*e8d8bef9SDimitry Andric II, 1, ConstantInt::get(Src1->getType(), Mask & ~(S_NAN | Q_NAN))); 325*e8d8bef9SDimitry Andric } 326*e8d8bef9SDimitry Andric 327*e8d8bef9SDimitry Andric const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0); 328*e8d8bef9SDimitry Andric if (!CVal) { 329*e8d8bef9SDimitry Andric if (isa<UndefValue>(Src0)) { 330*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 331*e8d8bef9SDimitry Andric } 332*e8d8bef9SDimitry Andric 333*e8d8bef9SDimitry Andric // Clamp mask to used bits 334*e8d8bef9SDimitry Andric if ((Mask & FullMask) != Mask) { 335*e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall( 336*e8d8bef9SDimitry Andric II.getCalledFunction(), 337*e8d8bef9SDimitry Andric {Src0, ConstantInt::get(Src1->getType(), Mask & FullMask)}); 338*e8d8bef9SDimitry Andric 339*e8d8bef9SDimitry Andric NewCall->takeName(&II); 340*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 341*e8d8bef9SDimitry Andric } 342*e8d8bef9SDimitry Andric 343*e8d8bef9SDimitry Andric break; 344*e8d8bef9SDimitry Andric } 345*e8d8bef9SDimitry Andric 346*e8d8bef9SDimitry Andric const APFloat &Val = CVal->getValueAPF(); 347*e8d8bef9SDimitry Andric 348*e8d8bef9SDimitry Andric bool Result = 349*e8d8bef9SDimitry Andric ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) || 350*e8d8bef9SDimitry Andric ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) || 351*e8d8bef9SDimitry Andric ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) || 352*e8d8bef9SDimitry Andric ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) || 353*e8d8bef9SDimitry Andric ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) || 354*e8d8bef9SDimitry Andric ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) || 355*e8d8bef9SDimitry Andric ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) || 356*e8d8bef9SDimitry Andric ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) || 357*e8d8bef9SDimitry Andric ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) || 358*e8d8bef9SDimitry Andric ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative()); 359*e8d8bef9SDimitry Andric 360*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result)); 361*e8d8bef9SDimitry Andric } 362*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pkrtz: { 363*e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 364*e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 365*e8d8bef9SDimitry Andric if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) { 366*e8d8bef9SDimitry Andric if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) { 367*e8d8bef9SDimitry Andric const fltSemantics &HalfSem = 368*e8d8bef9SDimitry Andric II.getType()->getScalarType()->getFltSemantics(); 369*e8d8bef9SDimitry Andric bool LosesInfo; 370*e8d8bef9SDimitry Andric APFloat Val0 = C0->getValueAPF(); 371*e8d8bef9SDimitry Andric APFloat Val1 = C1->getValueAPF(); 372*e8d8bef9SDimitry Andric Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); 373*e8d8bef9SDimitry Andric Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); 374*e8d8bef9SDimitry Andric 375*e8d8bef9SDimitry Andric Constant *Folded = 376*e8d8bef9SDimitry Andric ConstantVector::get({ConstantFP::get(II.getContext(), Val0), 377*e8d8bef9SDimitry Andric ConstantFP::get(II.getContext(), Val1)}); 378*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Folded); 379*e8d8bef9SDimitry Andric } 380*e8d8bef9SDimitry Andric } 381*e8d8bef9SDimitry Andric 382*e8d8bef9SDimitry Andric if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) { 383*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 384*e8d8bef9SDimitry Andric } 385*e8d8bef9SDimitry Andric 386*e8d8bef9SDimitry Andric break; 387*e8d8bef9SDimitry Andric } 388*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pknorm_i16: 389*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pknorm_u16: 390*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pk_i16: 391*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pk_u16: { 392*e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 393*e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 394*e8d8bef9SDimitry Andric 395*e8d8bef9SDimitry Andric if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) { 396*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 397*e8d8bef9SDimitry Andric } 398*e8d8bef9SDimitry Andric 399*e8d8bef9SDimitry Andric break; 400*e8d8bef9SDimitry Andric } 401*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_ubfe: 402*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_sbfe: { 403*e8d8bef9SDimitry Andric // Decompose simple cases into standard shifts. 404*e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 405*e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 406*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 407*e8d8bef9SDimitry Andric } 408*e8d8bef9SDimitry Andric 409*e8d8bef9SDimitry Andric unsigned Width; 410*e8d8bef9SDimitry Andric Type *Ty = II.getType(); 411*e8d8bef9SDimitry Andric unsigned IntSize = Ty->getIntegerBitWidth(); 412*e8d8bef9SDimitry Andric 413*e8d8bef9SDimitry Andric ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2)); 414*e8d8bef9SDimitry Andric if (CWidth) { 415*e8d8bef9SDimitry Andric Width = CWidth->getZExtValue(); 416*e8d8bef9SDimitry Andric if ((Width & (IntSize - 1)) == 0) { 417*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(Ty)); 418*e8d8bef9SDimitry Andric } 419*e8d8bef9SDimitry Andric 420*e8d8bef9SDimitry Andric // Hardware ignores high bits, so remove those. 421*e8d8bef9SDimitry Andric if (Width >= IntSize) { 422*e8d8bef9SDimitry Andric return IC.replaceOperand( 423*e8d8bef9SDimitry Andric II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1))); 424*e8d8bef9SDimitry Andric } 425*e8d8bef9SDimitry Andric } 426*e8d8bef9SDimitry Andric 427*e8d8bef9SDimitry Andric unsigned Offset; 428*e8d8bef9SDimitry Andric ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1)); 429*e8d8bef9SDimitry Andric if (COffset) { 430*e8d8bef9SDimitry Andric Offset = COffset->getZExtValue(); 431*e8d8bef9SDimitry Andric if (Offset >= IntSize) { 432*e8d8bef9SDimitry Andric return IC.replaceOperand( 433*e8d8bef9SDimitry Andric II, 1, 434*e8d8bef9SDimitry Andric ConstantInt::get(COffset->getType(), Offset & (IntSize - 1))); 435*e8d8bef9SDimitry Andric } 436*e8d8bef9SDimitry Andric } 437*e8d8bef9SDimitry Andric 438*e8d8bef9SDimitry Andric bool Signed = IID == Intrinsic::amdgcn_sbfe; 439*e8d8bef9SDimitry Andric 440*e8d8bef9SDimitry Andric if (!CWidth || !COffset) 441*e8d8bef9SDimitry Andric break; 442*e8d8bef9SDimitry Andric 443*e8d8bef9SDimitry Andric // The case of Width == 0 is handled above, which makes this tranformation 444*e8d8bef9SDimitry Andric // safe. If Width == 0, then the ashr and lshr instructions become poison 445*e8d8bef9SDimitry Andric // value since the shift amount would be equal to the bit size. 446*e8d8bef9SDimitry Andric assert(Width != 0); 447*e8d8bef9SDimitry Andric 448*e8d8bef9SDimitry Andric // TODO: This allows folding to undef when the hardware has specific 449*e8d8bef9SDimitry Andric // behavior? 450*e8d8bef9SDimitry Andric if (Offset + Width < IntSize) { 451*e8d8bef9SDimitry Andric Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width); 452*e8d8bef9SDimitry Andric Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width) 453*e8d8bef9SDimitry Andric : IC.Builder.CreateLShr(Shl, IntSize - Width); 454*e8d8bef9SDimitry Andric RightShift->takeName(&II); 455*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, RightShift); 456*e8d8bef9SDimitry Andric } 457*e8d8bef9SDimitry Andric 458*e8d8bef9SDimitry Andric Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset) 459*e8d8bef9SDimitry Andric : IC.Builder.CreateLShr(Src, Offset); 460*e8d8bef9SDimitry Andric 461*e8d8bef9SDimitry Andric RightShift->takeName(&II); 462*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, RightShift); 463*e8d8bef9SDimitry Andric } 464*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_exp: 465*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_exp_compr: { 466*e8d8bef9SDimitry Andric ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1)); 467*e8d8bef9SDimitry Andric unsigned EnBits = En->getZExtValue(); 468*e8d8bef9SDimitry Andric if (EnBits == 0xf) 469*e8d8bef9SDimitry Andric break; // All inputs enabled. 470*e8d8bef9SDimitry Andric 471*e8d8bef9SDimitry Andric bool IsCompr = IID == Intrinsic::amdgcn_exp_compr; 472*e8d8bef9SDimitry Andric bool Changed = false; 473*e8d8bef9SDimitry Andric for (int I = 0; I < (IsCompr ? 2 : 4); ++I) { 474*e8d8bef9SDimitry Andric if ((!IsCompr && (EnBits & (1 << I)) == 0) || 475*e8d8bef9SDimitry Andric (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) { 476*e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(I + 2); 477*e8d8bef9SDimitry Andric if (!isa<UndefValue>(Src)) { 478*e8d8bef9SDimitry Andric IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType())); 479*e8d8bef9SDimitry Andric Changed = true; 480*e8d8bef9SDimitry Andric } 481*e8d8bef9SDimitry Andric } 482*e8d8bef9SDimitry Andric } 483*e8d8bef9SDimitry Andric 484*e8d8bef9SDimitry Andric if (Changed) { 485*e8d8bef9SDimitry Andric return &II; 486*e8d8bef9SDimitry Andric } 487*e8d8bef9SDimitry Andric 488*e8d8bef9SDimitry Andric break; 489*e8d8bef9SDimitry Andric } 490*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fmed3: { 491*e8d8bef9SDimitry Andric // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled 492*e8d8bef9SDimitry Andric // for the shader. 493*e8d8bef9SDimitry Andric 494*e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 495*e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 496*e8d8bef9SDimitry Andric Value *Src2 = II.getArgOperand(2); 497*e8d8bef9SDimitry Andric 498*e8d8bef9SDimitry Andric // Checking for NaN before canonicalization provides better fidelity when 499*e8d8bef9SDimitry Andric // mapping other operations onto fmed3 since the order of operands is 500*e8d8bef9SDimitry Andric // unchanged. 501*e8d8bef9SDimitry Andric CallInst *NewCall = nullptr; 502*e8d8bef9SDimitry Andric if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) { 503*e8d8bef9SDimitry Andric NewCall = IC.Builder.CreateMinNum(Src1, Src2); 504*e8d8bef9SDimitry Andric } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) { 505*e8d8bef9SDimitry Andric NewCall = IC.Builder.CreateMinNum(Src0, Src2); 506*e8d8bef9SDimitry Andric } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) { 507*e8d8bef9SDimitry Andric NewCall = IC.Builder.CreateMaxNum(Src0, Src1); 508*e8d8bef9SDimitry Andric } 509*e8d8bef9SDimitry Andric 510*e8d8bef9SDimitry Andric if (NewCall) { 511*e8d8bef9SDimitry Andric NewCall->copyFastMathFlags(&II); 512*e8d8bef9SDimitry Andric NewCall->takeName(&II); 513*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 514*e8d8bef9SDimitry Andric } 515*e8d8bef9SDimitry Andric 516*e8d8bef9SDimitry Andric bool Swap = false; 517*e8d8bef9SDimitry Andric // Canonicalize constants to RHS operands. 518*e8d8bef9SDimitry Andric // 519*e8d8bef9SDimitry Andric // fmed3(c0, x, c1) -> fmed3(x, c0, c1) 520*e8d8bef9SDimitry Andric if (isa<Constant>(Src0) && !isa<Constant>(Src1)) { 521*e8d8bef9SDimitry Andric std::swap(Src0, Src1); 522*e8d8bef9SDimitry Andric Swap = true; 523*e8d8bef9SDimitry Andric } 524*e8d8bef9SDimitry Andric 525*e8d8bef9SDimitry Andric if (isa<Constant>(Src1) && !isa<Constant>(Src2)) { 526*e8d8bef9SDimitry Andric std::swap(Src1, Src2); 527*e8d8bef9SDimitry Andric Swap = true; 528*e8d8bef9SDimitry Andric } 529*e8d8bef9SDimitry Andric 530*e8d8bef9SDimitry Andric if (isa<Constant>(Src0) && !isa<Constant>(Src1)) { 531*e8d8bef9SDimitry Andric std::swap(Src0, Src1); 532*e8d8bef9SDimitry Andric Swap = true; 533*e8d8bef9SDimitry Andric } 534*e8d8bef9SDimitry Andric 535*e8d8bef9SDimitry Andric if (Swap) { 536*e8d8bef9SDimitry Andric II.setArgOperand(0, Src0); 537*e8d8bef9SDimitry Andric II.setArgOperand(1, Src1); 538*e8d8bef9SDimitry Andric II.setArgOperand(2, Src2); 539*e8d8bef9SDimitry Andric return &II; 540*e8d8bef9SDimitry Andric } 541*e8d8bef9SDimitry Andric 542*e8d8bef9SDimitry Andric if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) { 543*e8d8bef9SDimitry Andric if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) { 544*e8d8bef9SDimitry Andric if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) { 545*e8d8bef9SDimitry Andric APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(), 546*e8d8bef9SDimitry Andric C2->getValueAPF()); 547*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith( 548*e8d8bef9SDimitry Andric II, ConstantFP::get(IC.Builder.getContext(), Result)); 549*e8d8bef9SDimitry Andric } 550*e8d8bef9SDimitry Andric } 551*e8d8bef9SDimitry Andric } 552*e8d8bef9SDimitry Andric 553*e8d8bef9SDimitry Andric break; 554*e8d8bef9SDimitry Andric } 555*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_icmp: 556*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fcmp: { 557*e8d8bef9SDimitry Andric const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2)); 558*e8d8bef9SDimitry Andric // Guard against invalid arguments. 559*e8d8bef9SDimitry Andric int64_t CCVal = CC->getZExtValue(); 560*e8d8bef9SDimitry Andric bool IsInteger = IID == Intrinsic::amdgcn_icmp; 561*e8d8bef9SDimitry Andric if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE || 562*e8d8bef9SDimitry Andric CCVal > CmpInst::LAST_ICMP_PREDICATE)) || 563*e8d8bef9SDimitry Andric (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE || 564*e8d8bef9SDimitry Andric CCVal > CmpInst::LAST_FCMP_PREDICATE))) 565*e8d8bef9SDimitry Andric break; 566*e8d8bef9SDimitry Andric 567*e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 568*e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 569*e8d8bef9SDimitry Andric 570*e8d8bef9SDimitry Andric if (auto *CSrc0 = dyn_cast<Constant>(Src0)) { 571*e8d8bef9SDimitry Andric if (auto *CSrc1 = dyn_cast<Constant>(Src1)) { 572*e8d8bef9SDimitry Andric Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1); 573*e8d8bef9SDimitry Andric if (CCmp->isNullValue()) { 574*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith( 575*e8d8bef9SDimitry Andric II, ConstantExpr::getSExt(CCmp, II.getType())); 576*e8d8bef9SDimitry Andric } 577*e8d8bef9SDimitry Andric 578*e8d8bef9SDimitry Andric // The result of V_ICMP/V_FCMP assembly instructions (which this 579*e8d8bef9SDimitry Andric // intrinsic exposes) is one bit per thread, masked with the EXEC 580*e8d8bef9SDimitry Andric // register (which contains the bitmask of live threads). So a 581*e8d8bef9SDimitry Andric // comparison that always returns true is the same as a read of the 582*e8d8bef9SDimitry Andric // EXEC register. 583*e8d8bef9SDimitry Andric Function *NewF = Intrinsic::getDeclaration( 584*e8d8bef9SDimitry Andric II.getModule(), Intrinsic::read_register, II.getType()); 585*e8d8bef9SDimitry Andric Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")}; 586*e8d8bef9SDimitry Andric MDNode *MD = MDNode::get(II.getContext(), MDArgs); 587*e8d8bef9SDimitry Andric Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)}; 588*e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(NewF, Args); 589*e8d8bef9SDimitry Andric NewCall->addAttribute(AttributeList::FunctionIndex, 590*e8d8bef9SDimitry Andric Attribute::Convergent); 591*e8d8bef9SDimitry Andric NewCall->takeName(&II); 592*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 593*e8d8bef9SDimitry Andric } 594*e8d8bef9SDimitry Andric 595*e8d8bef9SDimitry Andric // Canonicalize constants to RHS. 596*e8d8bef9SDimitry Andric CmpInst::Predicate SwapPred = 597*e8d8bef9SDimitry Andric CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal)); 598*e8d8bef9SDimitry Andric II.setArgOperand(0, Src1); 599*e8d8bef9SDimitry Andric II.setArgOperand(1, Src0); 600*e8d8bef9SDimitry Andric II.setArgOperand( 601*e8d8bef9SDimitry Andric 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred))); 602*e8d8bef9SDimitry Andric return &II; 603*e8d8bef9SDimitry Andric } 604*e8d8bef9SDimitry Andric 605*e8d8bef9SDimitry Andric if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE) 606*e8d8bef9SDimitry Andric break; 607*e8d8bef9SDimitry Andric 608*e8d8bef9SDimitry Andric // Canonicalize compare eq with true value to compare != 0 609*e8d8bef9SDimitry Andric // llvm.amdgcn.icmp(zext (i1 x), 1, eq) 610*e8d8bef9SDimitry Andric // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne) 611*e8d8bef9SDimitry Andric // llvm.amdgcn.icmp(sext (i1 x), -1, eq) 612*e8d8bef9SDimitry Andric // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne) 613*e8d8bef9SDimitry Andric Value *ExtSrc; 614*e8d8bef9SDimitry Andric if (CCVal == CmpInst::ICMP_EQ && 615*e8d8bef9SDimitry Andric ((match(Src1, PatternMatch::m_One()) && 616*e8d8bef9SDimitry Andric match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) || 617*e8d8bef9SDimitry Andric (match(Src1, PatternMatch::m_AllOnes()) && 618*e8d8bef9SDimitry Andric match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) && 619*e8d8bef9SDimitry Andric ExtSrc->getType()->isIntegerTy(1)) { 620*e8d8bef9SDimitry Andric IC.replaceOperand(II, 1, ConstantInt::getNullValue(Src1->getType())); 621*e8d8bef9SDimitry Andric IC.replaceOperand(II, 2, 622*e8d8bef9SDimitry Andric ConstantInt::get(CC->getType(), CmpInst::ICMP_NE)); 623*e8d8bef9SDimitry Andric return &II; 624*e8d8bef9SDimitry Andric } 625*e8d8bef9SDimitry Andric 626*e8d8bef9SDimitry Andric CmpInst::Predicate SrcPred; 627*e8d8bef9SDimitry Andric Value *SrcLHS; 628*e8d8bef9SDimitry Andric Value *SrcRHS; 629*e8d8bef9SDimitry Andric 630*e8d8bef9SDimitry Andric // Fold compare eq/ne with 0 from a compare result as the predicate to the 631*e8d8bef9SDimitry Andric // intrinsic. The typical use is a wave vote function in the library, which 632*e8d8bef9SDimitry Andric // will be fed from a user code condition compared with 0. Fold in the 633*e8d8bef9SDimitry Andric // redundant compare. 634*e8d8bef9SDimitry Andric 635*e8d8bef9SDimitry Andric // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne) 636*e8d8bef9SDimitry Andric // -> llvm.amdgcn.[if]cmp(a, b, pred) 637*e8d8bef9SDimitry Andric // 638*e8d8bef9SDimitry Andric // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq) 639*e8d8bef9SDimitry Andric // -> llvm.amdgcn.[if]cmp(a, b, inv pred) 640*e8d8bef9SDimitry Andric if (match(Src1, PatternMatch::m_Zero()) && 641*e8d8bef9SDimitry Andric match(Src0, PatternMatch::m_ZExtOrSExt( 642*e8d8bef9SDimitry Andric m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS), 643*e8d8bef9SDimitry Andric PatternMatch::m_Value(SrcRHS))))) { 644*e8d8bef9SDimitry Andric if (CCVal == CmpInst::ICMP_EQ) 645*e8d8bef9SDimitry Andric SrcPred = CmpInst::getInversePredicate(SrcPred); 646*e8d8bef9SDimitry Andric 647*e8d8bef9SDimitry Andric Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) 648*e8d8bef9SDimitry Andric ? Intrinsic::amdgcn_fcmp 649*e8d8bef9SDimitry Andric : Intrinsic::amdgcn_icmp; 650*e8d8bef9SDimitry Andric 651*e8d8bef9SDimitry Andric Type *Ty = SrcLHS->getType(); 652*e8d8bef9SDimitry Andric if (auto *CmpType = dyn_cast<IntegerType>(Ty)) { 653*e8d8bef9SDimitry Andric // Promote to next legal integer type. 654*e8d8bef9SDimitry Andric unsigned Width = CmpType->getBitWidth(); 655*e8d8bef9SDimitry Andric unsigned NewWidth = Width; 656*e8d8bef9SDimitry Andric 657*e8d8bef9SDimitry Andric // Don't do anything for i1 comparisons. 658*e8d8bef9SDimitry Andric if (Width == 1) 659*e8d8bef9SDimitry Andric break; 660*e8d8bef9SDimitry Andric 661*e8d8bef9SDimitry Andric if (Width <= 16) 662*e8d8bef9SDimitry Andric NewWidth = 16; 663*e8d8bef9SDimitry Andric else if (Width <= 32) 664*e8d8bef9SDimitry Andric NewWidth = 32; 665*e8d8bef9SDimitry Andric else if (Width <= 64) 666*e8d8bef9SDimitry Andric NewWidth = 64; 667*e8d8bef9SDimitry Andric else if (Width > 64) 668*e8d8bef9SDimitry Andric break; // Can't handle this. 669*e8d8bef9SDimitry Andric 670*e8d8bef9SDimitry Andric if (Width != NewWidth) { 671*e8d8bef9SDimitry Andric IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth); 672*e8d8bef9SDimitry Andric if (CmpInst::isSigned(SrcPred)) { 673*e8d8bef9SDimitry Andric SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy); 674*e8d8bef9SDimitry Andric SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy); 675*e8d8bef9SDimitry Andric } else { 676*e8d8bef9SDimitry Andric SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy); 677*e8d8bef9SDimitry Andric SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy); 678*e8d8bef9SDimitry Andric } 679*e8d8bef9SDimitry Andric } 680*e8d8bef9SDimitry Andric } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy()) 681*e8d8bef9SDimitry Andric break; 682*e8d8bef9SDimitry Andric 683*e8d8bef9SDimitry Andric Function *NewF = Intrinsic::getDeclaration( 684*e8d8bef9SDimitry Andric II.getModule(), NewIID, {II.getType(), SrcLHS->getType()}); 685*e8d8bef9SDimitry Andric Value *Args[] = {SrcLHS, SrcRHS, 686*e8d8bef9SDimitry Andric ConstantInt::get(CC->getType(), SrcPred)}; 687*e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(NewF, Args); 688*e8d8bef9SDimitry Andric NewCall->takeName(&II); 689*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 690*e8d8bef9SDimitry Andric } 691*e8d8bef9SDimitry Andric 692*e8d8bef9SDimitry Andric break; 693*e8d8bef9SDimitry Andric } 694*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_ballot: { 695*e8d8bef9SDimitry Andric if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) { 696*e8d8bef9SDimitry Andric if (Src->isZero()) { 697*e8d8bef9SDimitry Andric // amdgcn.ballot(i1 0) is zero. 698*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType())); 699*e8d8bef9SDimitry Andric } 700*e8d8bef9SDimitry Andric 701*e8d8bef9SDimitry Andric if (Src->isOne()) { 702*e8d8bef9SDimitry Andric // amdgcn.ballot(i1 1) is exec. 703*e8d8bef9SDimitry Andric const char *RegName = "exec"; 704*e8d8bef9SDimitry Andric if (II.getType()->isIntegerTy(32)) 705*e8d8bef9SDimitry Andric RegName = "exec_lo"; 706*e8d8bef9SDimitry Andric else if (!II.getType()->isIntegerTy(64)) 707*e8d8bef9SDimitry Andric break; 708*e8d8bef9SDimitry Andric 709*e8d8bef9SDimitry Andric Function *NewF = Intrinsic::getDeclaration( 710*e8d8bef9SDimitry Andric II.getModule(), Intrinsic::read_register, II.getType()); 711*e8d8bef9SDimitry Andric Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)}; 712*e8d8bef9SDimitry Andric MDNode *MD = MDNode::get(II.getContext(), MDArgs); 713*e8d8bef9SDimitry Andric Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)}; 714*e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(NewF, Args); 715*e8d8bef9SDimitry Andric NewCall->addAttribute(AttributeList::FunctionIndex, 716*e8d8bef9SDimitry Andric Attribute::Convergent); 717*e8d8bef9SDimitry Andric NewCall->takeName(&II); 718*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 719*e8d8bef9SDimitry Andric } 720*e8d8bef9SDimitry Andric } 721*e8d8bef9SDimitry Andric break; 722*e8d8bef9SDimitry Andric } 723*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_wqm_vote: { 724*e8d8bef9SDimitry Andric // wqm_vote is identity when the argument is constant. 725*e8d8bef9SDimitry Andric if (!isa<Constant>(II.getArgOperand(0))) 726*e8d8bef9SDimitry Andric break; 727*e8d8bef9SDimitry Andric 728*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, II.getArgOperand(0)); 729*e8d8bef9SDimitry Andric } 730*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_kill: { 731*e8d8bef9SDimitry Andric const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0)); 732*e8d8bef9SDimitry Andric if (!C || !C->getZExtValue()) 733*e8d8bef9SDimitry Andric break; 734*e8d8bef9SDimitry Andric 735*e8d8bef9SDimitry Andric // amdgcn.kill(i1 1) is a no-op 736*e8d8bef9SDimitry Andric return IC.eraseInstFromFunction(II); 737*e8d8bef9SDimitry Andric } 738*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_update_dpp: { 739*e8d8bef9SDimitry Andric Value *Old = II.getArgOperand(0); 740*e8d8bef9SDimitry Andric 741*e8d8bef9SDimitry Andric auto *BC = cast<ConstantInt>(II.getArgOperand(5)); 742*e8d8bef9SDimitry Andric auto *RM = cast<ConstantInt>(II.getArgOperand(3)); 743*e8d8bef9SDimitry Andric auto *BM = cast<ConstantInt>(II.getArgOperand(4)); 744*e8d8bef9SDimitry Andric if (BC->isZeroValue() || RM->getZExtValue() != 0xF || 745*e8d8bef9SDimitry Andric BM->getZExtValue() != 0xF || isa<UndefValue>(Old)) 746*e8d8bef9SDimitry Andric break; 747*e8d8bef9SDimitry Andric 748*e8d8bef9SDimitry Andric // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value. 749*e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, UndefValue::get(Old->getType())); 750*e8d8bef9SDimitry Andric } 751*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_permlane16: 752*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_permlanex16: { 753*e8d8bef9SDimitry Andric // Discard vdst_in if it's not going to be read. 754*e8d8bef9SDimitry Andric Value *VDstIn = II.getArgOperand(0); 755*e8d8bef9SDimitry Andric if (isa<UndefValue>(VDstIn)) 756*e8d8bef9SDimitry Andric break; 757*e8d8bef9SDimitry Andric 758*e8d8bef9SDimitry Andric ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4)); 759*e8d8bef9SDimitry Andric ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5)); 760*e8d8bef9SDimitry Andric if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue()) 761*e8d8bef9SDimitry Andric break; 762*e8d8bef9SDimitry Andric 763*e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType())); 764*e8d8bef9SDimitry Andric } 765*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_readfirstlane: 766*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_readlane: { 767*e8d8bef9SDimitry Andric // A constant value is trivially uniform. 768*e8d8bef9SDimitry Andric if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) { 769*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, C); 770*e8d8bef9SDimitry Andric } 771*e8d8bef9SDimitry Andric 772*e8d8bef9SDimitry Andric // The rest of these may not be safe if the exec may not be the same between 773*e8d8bef9SDimitry Andric // the def and use. 774*e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 775*e8d8bef9SDimitry Andric Instruction *SrcInst = dyn_cast<Instruction>(Src); 776*e8d8bef9SDimitry Andric if (SrcInst && SrcInst->getParent() != II.getParent()) 777*e8d8bef9SDimitry Andric break; 778*e8d8bef9SDimitry Andric 779*e8d8bef9SDimitry Andric // readfirstlane (readfirstlane x) -> readfirstlane x 780*e8d8bef9SDimitry Andric // readlane (readfirstlane x), y -> readfirstlane x 781*e8d8bef9SDimitry Andric if (match(Src, 782*e8d8bef9SDimitry Andric PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) { 783*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 784*e8d8bef9SDimitry Andric } 785*e8d8bef9SDimitry Andric 786*e8d8bef9SDimitry Andric if (IID == Intrinsic::amdgcn_readfirstlane) { 787*e8d8bef9SDimitry Andric // readfirstlane (readlane x, y) -> readlane x, y 788*e8d8bef9SDimitry Andric if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) { 789*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 790*e8d8bef9SDimitry Andric } 791*e8d8bef9SDimitry Andric } else { 792*e8d8bef9SDimitry Andric // readlane (readlane x, y), y -> readlane x, y 793*e8d8bef9SDimitry Andric if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>( 794*e8d8bef9SDimitry Andric PatternMatch::m_Value(), 795*e8d8bef9SDimitry Andric PatternMatch::m_Specific(II.getArgOperand(1))))) { 796*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 797*e8d8bef9SDimitry Andric } 798*e8d8bef9SDimitry Andric } 799*e8d8bef9SDimitry Andric 800*e8d8bef9SDimitry Andric break; 801*e8d8bef9SDimitry Andric } 802*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_ldexp: { 803*e8d8bef9SDimitry Andric // FIXME: This doesn't introduce new instructions and belongs in 804*e8d8bef9SDimitry Andric // InstructionSimplify. 805*e8d8bef9SDimitry Andric Type *Ty = II.getType(); 806*e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 807*e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 808*e8d8bef9SDimitry Andric 809*e8d8bef9SDimitry Andric // Folding undef to qnan is safe regardless of the FP mode. 810*e8d8bef9SDimitry Andric if (isa<UndefValue>(Op0)) { 811*e8d8bef9SDimitry Andric auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); 812*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, QNaN); 813*e8d8bef9SDimitry Andric } 814*e8d8bef9SDimitry Andric 815*e8d8bef9SDimitry Andric const APFloat *C = nullptr; 816*e8d8bef9SDimitry Andric match(Op0, PatternMatch::m_APFloat(C)); 817*e8d8bef9SDimitry Andric 818*e8d8bef9SDimitry Andric // FIXME: Should flush denorms depending on FP mode, but that's ignored 819*e8d8bef9SDimitry Andric // everywhere else. 820*e8d8bef9SDimitry Andric // 821*e8d8bef9SDimitry Andric // These cases should be safe, even with strictfp. 822*e8d8bef9SDimitry Andric // ldexp(0.0, x) -> 0.0 823*e8d8bef9SDimitry Andric // ldexp(-0.0, x) -> -0.0 824*e8d8bef9SDimitry Andric // ldexp(inf, x) -> inf 825*e8d8bef9SDimitry Andric // ldexp(-inf, x) -> -inf 826*e8d8bef9SDimitry Andric if (C && (C->isZero() || C->isInfinity())) { 827*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Op0); 828*e8d8bef9SDimitry Andric } 829*e8d8bef9SDimitry Andric 830*e8d8bef9SDimitry Andric // With strictfp, be more careful about possibly needing to flush denormals 831*e8d8bef9SDimitry Andric // or not, and snan behavior depends on ieee_mode. 832*e8d8bef9SDimitry Andric if (II.isStrictFP()) 833*e8d8bef9SDimitry Andric break; 834*e8d8bef9SDimitry Andric 835*e8d8bef9SDimitry Andric if (C && C->isNaN()) { 836*e8d8bef9SDimitry Andric // FIXME: We just need to make the nan quiet here, but that's unavailable 837*e8d8bef9SDimitry Andric // on APFloat, only IEEEfloat 838*e8d8bef9SDimitry Andric auto *Quieted = 839*e8d8bef9SDimitry Andric ConstantFP::get(Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven)); 840*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Quieted); 841*e8d8bef9SDimitry Andric } 842*e8d8bef9SDimitry Andric 843*e8d8bef9SDimitry Andric // ldexp(x, 0) -> x 844*e8d8bef9SDimitry Andric // ldexp(x, undef) -> x 845*e8d8bef9SDimitry Andric if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) { 846*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Op0); 847*e8d8bef9SDimitry Andric } 848*e8d8bef9SDimitry Andric 849*e8d8bef9SDimitry Andric break; 850*e8d8bef9SDimitry Andric } 851*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fmul_legacy: { 852*e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 853*e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 854*e8d8bef9SDimitry Andric 855*e8d8bef9SDimitry Andric // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or 856*e8d8bef9SDimitry Andric // infinity, gives +0.0. 857*e8d8bef9SDimitry Andric // TODO: Move to InstSimplify? 858*e8d8bef9SDimitry Andric if (match(Op0, PatternMatch::m_AnyZeroFP()) || 859*e8d8bef9SDimitry Andric match(Op1, PatternMatch::m_AnyZeroFP())) 860*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType())); 861*e8d8bef9SDimitry Andric 862*e8d8bef9SDimitry Andric // If we can prove we don't have one of the special cases then we can use a 863*e8d8bef9SDimitry Andric // normal fmul instruction instead. 864*e8d8bef9SDimitry Andric if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) { 865*e8d8bef9SDimitry Andric auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II); 866*e8d8bef9SDimitry Andric FMul->takeName(&II); 867*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, FMul); 868*e8d8bef9SDimitry Andric } 869*e8d8bef9SDimitry Andric break; 870*e8d8bef9SDimitry Andric } 871*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fma_legacy: { 872*e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 873*e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 874*e8d8bef9SDimitry Andric Value *Op2 = II.getArgOperand(2); 875*e8d8bef9SDimitry Andric 876*e8d8bef9SDimitry Andric // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or 877*e8d8bef9SDimitry Andric // infinity, gives +0.0. 878*e8d8bef9SDimitry Andric // TODO: Move to InstSimplify? 879*e8d8bef9SDimitry Andric if (match(Op0, PatternMatch::m_AnyZeroFP()) || 880*e8d8bef9SDimitry Andric match(Op1, PatternMatch::m_AnyZeroFP())) { 881*e8d8bef9SDimitry Andric // It's tempting to just return Op2 here, but that would give the wrong 882*e8d8bef9SDimitry Andric // result if Op2 was -0.0. 883*e8d8bef9SDimitry Andric auto *Zero = ConstantFP::getNullValue(II.getType()); 884*e8d8bef9SDimitry Andric auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II); 885*e8d8bef9SDimitry Andric FAdd->takeName(&II); 886*e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, FAdd); 887*e8d8bef9SDimitry Andric } 888*e8d8bef9SDimitry Andric 889*e8d8bef9SDimitry Andric // If we can prove we don't have one of the special cases then we can use a 890*e8d8bef9SDimitry Andric // normal fma instead. 891*e8d8bef9SDimitry Andric if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) { 892*e8d8bef9SDimitry Andric II.setCalledOperand(Intrinsic::getDeclaration( 893*e8d8bef9SDimitry Andric II.getModule(), Intrinsic::fma, II.getType())); 894*e8d8bef9SDimitry Andric return &II; 895*e8d8bef9SDimitry Andric } 896*e8d8bef9SDimitry Andric break; 897*e8d8bef9SDimitry Andric } 898*e8d8bef9SDimitry Andric default: { 899*e8d8bef9SDimitry Andric if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = 900*e8d8bef9SDimitry Andric AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) { 901*e8d8bef9SDimitry Andric return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC); 902*e8d8bef9SDimitry Andric } 903*e8d8bef9SDimitry Andric } 904*e8d8bef9SDimitry Andric } 905*e8d8bef9SDimitry Andric return None; 906*e8d8bef9SDimitry Andric } 907*e8d8bef9SDimitry Andric 908*e8d8bef9SDimitry Andric /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics. 909*e8d8bef9SDimitry Andric /// 910*e8d8bef9SDimitry Andric /// Note: This only supports non-TFE/LWE image intrinsic calls; those have 911*e8d8bef9SDimitry Andric /// struct returns. 912*e8d8bef9SDimitry Andric static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, 913*e8d8bef9SDimitry Andric IntrinsicInst &II, 914*e8d8bef9SDimitry Andric APInt DemandedElts, 915*e8d8bef9SDimitry Andric int DMaskIdx = -1) { 916*e8d8bef9SDimitry Andric 917*e8d8bef9SDimitry Andric auto *IIVTy = cast<FixedVectorType>(II.getType()); 918*e8d8bef9SDimitry Andric unsigned VWidth = IIVTy->getNumElements(); 919*e8d8bef9SDimitry Andric if (VWidth == 1) 920*e8d8bef9SDimitry Andric return nullptr; 921*e8d8bef9SDimitry Andric 922*e8d8bef9SDimitry Andric IRBuilderBase::InsertPointGuard Guard(IC.Builder); 923*e8d8bef9SDimitry Andric IC.Builder.SetInsertPoint(&II); 924*e8d8bef9SDimitry Andric 925*e8d8bef9SDimitry Andric // Assume the arguments are unchanged and later override them, if needed. 926*e8d8bef9SDimitry Andric SmallVector<Value *, 16> Args(II.args()); 927*e8d8bef9SDimitry Andric 928*e8d8bef9SDimitry Andric if (DMaskIdx < 0) { 929*e8d8bef9SDimitry Andric // Buffer case. 930*e8d8bef9SDimitry Andric 931*e8d8bef9SDimitry Andric const unsigned ActiveBits = DemandedElts.getActiveBits(); 932*e8d8bef9SDimitry Andric const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros(); 933*e8d8bef9SDimitry Andric 934*e8d8bef9SDimitry Andric // Start assuming the prefix of elements is demanded, but possibly clear 935*e8d8bef9SDimitry Andric // some other bits if there are trailing zeros (unused components at front) 936*e8d8bef9SDimitry Andric // and update offset. 937*e8d8bef9SDimitry Andric DemandedElts = (1 << ActiveBits) - 1; 938*e8d8bef9SDimitry Andric 939*e8d8bef9SDimitry Andric if (UnusedComponentsAtFront > 0) { 940*e8d8bef9SDimitry Andric static const unsigned InvalidOffsetIdx = 0xf; 941*e8d8bef9SDimitry Andric 942*e8d8bef9SDimitry Andric unsigned OffsetIdx; 943*e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 944*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_buffer_load: 945*e8d8bef9SDimitry Andric OffsetIdx = 1; 946*e8d8bef9SDimitry Andric break; 947*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_s_buffer_load: 948*e8d8bef9SDimitry Andric // If resulting type is vec3, there is no point in trimming the 949*e8d8bef9SDimitry Andric // load with updated offset, as the vec3 would most likely be widened to 950*e8d8bef9SDimitry Andric // vec4 anyway during lowering. 951*e8d8bef9SDimitry Andric if (ActiveBits == 4 && UnusedComponentsAtFront == 1) 952*e8d8bef9SDimitry Andric OffsetIdx = InvalidOffsetIdx; 953*e8d8bef9SDimitry Andric else 954*e8d8bef9SDimitry Andric OffsetIdx = 1; 955*e8d8bef9SDimitry Andric break; 956*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_buffer_load: 957*e8d8bef9SDimitry Andric OffsetIdx = 2; 958*e8d8bef9SDimitry Andric break; 959*e8d8bef9SDimitry Andric default: 960*e8d8bef9SDimitry Andric // TODO: handle tbuffer* intrinsics. 961*e8d8bef9SDimitry Andric OffsetIdx = InvalidOffsetIdx; 962*e8d8bef9SDimitry Andric break; 963*e8d8bef9SDimitry Andric } 964*e8d8bef9SDimitry Andric 965*e8d8bef9SDimitry Andric if (OffsetIdx != InvalidOffsetIdx) { 966*e8d8bef9SDimitry Andric // Clear demanded bits and update the offset. 967*e8d8bef9SDimitry Andric DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1); 968*e8d8bef9SDimitry Andric auto *Offset = II.getArgOperand(OffsetIdx); 969*e8d8bef9SDimitry Andric unsigned SingleComponentSizeInBits = 970*e8d8bef9SDimitry Andric IC.getDataLayout().getTypeSizeInBits(II.getType()->getScalarType()); 971*e8d8bef9SDimitry Andric unsigned OffsetAdd = 972*e8d8bef9SDimitry Andric UnusedComponentsAtFront * SingleComponentSizeInBits / 8; 973*e8d8bef9SDimitry Andric auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd); 974*e8d8bef9SDimitry Andric Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal); 975*e8d8bef9SDimitry Andric } 976*e8d8bef9SDimitry Andric } 977*e8d8bef9SDimitry Andric } else { 978*e8d8bef9SDimitry Andric // Image case. 979*e8d8bef9SDimitry Andric 980*e8d8bef9SDimitry Andric ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx)); 981*e8d8bef9SDimitry Andric unsigned DMaskVal = DMask->getZExtValue() & 0xf; 982*e8d8bef9SDimitry Andric 983*e8d8bef9SDimitry Andric // Mask off values that are undefined because the dmask doesn't cover them 984*e8d8bef9SDimitry Andric DemandedElts &= (1 << countPopulation(DMaskVal)) - 1; 985*e8d8bef9SDimitry Andric 986*e8d8bef9SDimitry Andric unsigned NewDMaskVal = 0; 987*e8d8bef9SDimitry Andric unsigned OrigLoadIdx = 0; 988*e8d8bef9SDimitry Andric for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) { 989*e8d8bef9SDimitry Andric const unsigned Bit = 1 << SrcIdx; 990*e8d8bef9SDimitry Andric if (!!(DMaskVal & Bit)) { 991*e8d8bef9SDimitry Andric if (!!DemandedElts[OrigLoadIdx]) 992*e8d8bef9SDimitry Andric NewDMaskVal |= Bit; 993*e8d8bef9SDimitry Andric OrigLoadIdx++; 994*e8d8bef9SDimitry Andric } 995*e8d8bef9SDimitry Andric } 996*e8d8bef9SDimitry Andric 997*e8d8bef9SDimitry Andric if (DMaskVal != NewDMaskVal) 998*e8d8bef9SDimitry Andric Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal); 999*e8d8bef9SDimitry Andric } 1000*e8d8bef9SDimitry Andric 1001*e8d8bef9SDimitry Andric unsigned NewNumElts = DemandedElts.countPopulation(); 1002*e8d8bef9SDimitry Andric if (!NewNumElts) 1003*e8d8bef9SDimitry Andric return UndefValue::get(II.getType()); 1004*e8d8bef9SDimitry Andric 1005*e8d8bef9SDimitry Andric if (NewNumElts >= VWidth && DemandedElts.isMask()) { 1006*e8d8bef9SDimitry Andric if (DMaskIdx >= 0) 1007*e8d8bef9SDimitry Andric II.setArgOperand(DMaskIdx, Args[DMaskIdx]); 1008*e8d8bef9SDimitry Andric return nullptr; 1009*e8d8bef9SDimitry Andric } 1010*e8d8bef9SDimitry Andric 1011*e8d8bef9SDimitry Andric // Validate function argument and return types, extracting overloaded types 1012*e8d8bef9SDimitry Andric // along the way. 1013*e8d8bef9SDimitry Andric SmallVector<Type *, 6> OverloadTys; 1014*e8d8bef9SDimitry Andric if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys)) 1015*e8d8bef9SDimitry Andric return nullptr; 1016*e8d8bef9SDimitry Andric 1017*e8d8bef9SDimitry Andric Module *M = II.getParent()->getParent()->getParent(); 1018*e8d8bef9SDimitry Andric Type *EltTy = IIVTy->getElementType(); 1019*e8d8bef9SDimitry Andric Type *NewTy = 1020*e8d8bef9SDimitry Andric (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts); 1021*e8d8bef9SDimitry Andric 1022*e8d8bef9SDimitry Andric OverloadTys[0] = NewTy; 1023*e8d8bef9SDimitry Andric Function *NewIntrin = 1024*e8d8bef9SDimitry Andric Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys); 1025*e8d8bef9SDimitry Andric 1026*e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args); 1027*e8d8bef9SDimitry Andric NewCall->takeName(&II); 1028*e8d8bef9SDimitry Andric NewCall->copyMetadata(II); 1029*e8d8bef9SDimitry Andric 1030*e8d8bef9SDimitry Andric if (NewNumElts == 1) { 1031*e8d8bef9SDimitry Andric return IC.Builder.CreateInsertElement(UndefValue::get(II.getType()), 1032*e8d8bef9SDimitry Andric NewCall, 1033*e8d8bef9SDimitry Andric DemandedElts.countTrailingZeros()); 1034*e8d8bef9SDimitry Andric } 1035*e8d8bef9SDimitry Andric 1036*e8d8bef9SDimitry Andric SmallVector<int, 8> EltMask; 1037*e8d8bef9SDimitry Andric unsigned NewLoadIdx = 0; 1038*e8d8bef9SDimitry Andric for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) { 1039*e8d8bef9SDimitry Andric if (!!DemandedElts[OrigLoadIdx]) 1040*e8d8bef9SDimitry Andric EltMask.push_back(NewLoadIdx++); 1041*e8d8bef9SDimitry Andric else 1042*e8d8bef9SDimitry Andric EltMask.push_back(NewNumElts); 1043*e8d8bef9SDimitry Andric } 1044*e8d8bef9SDimitry Andric 1045*e8d8bef9SDimitry Andric Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask); 1046*e8d8bef9SDimitry Andric 1047*e8d8bef9SDimitry Andric return Shuffle; 1048*e8d8bef9SDimitry Andric } 1049*e8d8bef9SDimitry Andric 1050*e8d8bef9SDimitry Andric Optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic( 1051*e8d8bef9SDimitry Andric InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 1052*e8d8bef9SDimitry Andric APInt &UndefElts2, APInt &UndefElts3, 1053*e8d8bef9SDimitry Andric std::function<void(Instruction *, unsigned, APInt, APInt &)> 1054*e8d8bef9SDimitry Andric SimplifyAndSetOp) const { 1055*e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 1056*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_buffer_load: 1057*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_buffer_load_format: 1058*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_buffer_load: 1059*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_buffer_load_format: 1060*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_tbuffer_load: 1061*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_s_buffer_load: 1062*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_buffer_load: 1063*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_buffer_load_format: 1064*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_tbuffer_load: 1065*e8d8bef9SDimitry Andric case Intrinsic::amdgcn_tbuffer_load: 1066*e8d8bef9SDimitry Andric return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts); 1067*e8d8bef9SDimitry Andric default: { 1068*e8d8bef9SDimitry Andric if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) { 1069*e8d8bef9SDimitry Andric return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0); 1070*e8d8bef9SDimitry Andric } 1071*e8d8bef9SDimitry Andric break; 1072*e8d8bef9SDimitry Andric } 1073*e8d8bef9SDimitry Andric } 1074*e8d8bef9SDimitry Andric return None; 1075*e8d8bef9SDimitry Andric } 1076