1e8d8bef9SDimitry Andric //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric // \file 10e8d8bef9SDimitry Andric // This file implements a TargetTransformInfo analysis pass specific to the 11e8d8bef9SDimitry Andric // AMDGPU target machine. It uses the target's detailed information to provide 12e8d8bef9SDimitry Andric // more precise answers to certain TTI queries, while letting the target 13e8d8bef9SDimitry Andric // independent and default TTI implementations handle the rest. 14e8d8bef9SDimitry Andric // 15e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 16e8d8bef9SDimitry Andric 17e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h" 18e8d8bef9SDimitry Andric #include "AMDGPUTargetTransformInfo.h" 19e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 20e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 21e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h" 22e8d8bef9SDimitry Andric 23e8d8bef9SDimitry Andric using namespace llvm; 24e8d8bef9SDimitry Andric 25e8d8bef9SDimitry Andric #define DEBUG_TYPE "AMDGPUtti" 26e8d8bef9SDimitry Andric 27e8d8bef9SDimitry Andric namespace { 28e8d8bef9SDimitry Andric 29e8d8bef9SDimitry Andric struct AMDGPUImageDMaskIntrinsic { 30e8d8bef9SDimitry Andric unsigned Intr; 31e8d8bef9SDimitry Andric }; 32e8d8bef9SDimitry Andric 33e8d8bef9SDimitry Andric #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL 34e8d8bef9SDimitry Andric #include "InstCombineTables.inc" 35e8d8bef9SDimitry Andric 36e8d8bef9SDimitry Andric } // end anonymous namespace 37e8d8bef9SDimitry Andric 38e8d8bef9SDimitry Andric // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs. 39e8d8bef9SDimitry Andric // 40e8d8bef9SDimitry Andric // A single NaN input is folded to minnum, so we rely on that folding for 41e8d8bef9SDimitry Andric // handling NaNs. 42e8d8bef9SDimitry Andric static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, 43e8d8bef9SDimitry Andric const APFloat &Src2) { 44e8d8bef9SDimitry Andric APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2); 45e8d8bef9SDimitry Andric 46e8d8bef9SDimitry Andric APFloat::cmpResult Cmp0 = Max3.compare(Src0); 47e8d8bef9SDimitry Andric assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately"); 48e8d8bef9SDimitry Andric if (Cmp0 == APFloat::cmpEqual) 49e8d8bef9SDimitry Andric return maxnum(Src1, Src2); 50e8d8bef9SDimitry Andric 51e8d8bef9SDimitry Andric APFloat::cmpResult Cmp1 = Max3.compare(Src1); 52e8d8bef9SDimitry Andric assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately"); 53e8d8bef9SDimitry Andric if (Cmp1 == APFloat::cmpEqual) 54e8d8bef9SDimitry Andric return maxnum(Src0, Src2); 55e8d8bef9SDimitry Andric 56e8d8bef9SDimitry Andric return maxnum(Src0, Src1); 57e8d8bef9SDimitry Andric } 58e8d8bef9SDimitry Andric 59e8d8bef9SDimitry Andric // Check if a value can be converted to a 16-bit value without losing 60e8d8bef9SDimitry Andric // precision. 61*04eeddc0SDimitry Andric // The value is expected to be either a float (IsFloat = true) or an unsigned 62*04eeddc0SDimitry Andric // integer (IsFloat = false). 63*04eeddc0SDimitry Andric static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) { 64e8d8bef9SDimitry Andric Type *VTy = V.getType(); 65e8d8bef9SDimitry Andric if (VTy->isHalfTy() || VTy->isIntegerTy(16)) { 66e8d8bef9SDimitry Andric // The value is already 16-bit, so we don't want to convert to 16-bit again! 67e8d8bef9SDimitry Andric return false; 68e8d8bef9SDimitry Andric } 69*04eeddc0SDimitry Andric if (IsFloat) { 70e8d8bef9SDimitry Andric if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) { 71*04eeddc0SDimitry Andric // We need to check that if we cast the index down to a half, we do not 72*04eeddc0SDimitry Andric // lose precision. 73e8d8bef9SDimitry Andric APFloat FloatValue(ConstFloat->getValueAPF()); 74e8d8bef9SDimitry Andric bool LosesInfo = true; 75*04eeddc0SDimitry Andric FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, 76*04eeddc0SDimitry Andric &LosesInfo); 77e8d8bef9SDimitry Andric return !LosesInfo; 78e8d8bef9SDimitry Andric } 79*04eeddc0SDimitry Andric } else { 80*04eeddc0SDimitry Andric if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) { 81*04eeddc0SDimitry Andric // We need to check that if we cast the index down to an i16, we do not 82*04eeddc0SDimitry Andric // lose precision. 83*04eeddc0SDimitry Andric APInt IntValue(ConstInt->getValue()); 84*04eeddc0SDimitry Andric return IntValue.getActiveBits() <= 16; 85*04eeddc0SDimitry Andric } 86*04eeddc0SDimitry Andric } 87*04eeddc0SDimitry Andric 88e8d8bef9SDimitry Andric Value *CastSrc; 89*04eeddc0SDimitry Andric bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc))) 90*04eeddc0SDimitry Andric : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc))); 91*04eeddc0SDimitry Andric if (IsExt) { 92e8d8bef9SDimitry Andric Type *CastSrcTy = CastSrc->getType(); 93e8d8bef9SDimitry Andric if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16)) 94e8d8bef9SDimitry Andric return true; 95e8d8bef9SDimitry Andric } 96e8d8bef9SDimitry Andric 97e8d8bef9SDimitry Andric return false; 98e8d8bef9SDimitry Andric } 99e8d8bef9SDimitry Andric 100e8d8bef9SDimitry Andric // Convert a value to 16-bit. 101e8d8bef9SDimitry Andric static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) { 102e8d8bef9SDimitry Andric Type *VTy = V.getType(); 103e8d8bef9SDimitry Andric if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V)) 104e8d8bef9SDimitry Andric return cast<Instruction>(&V)->getOperand(0); 105e8d8bef9SDimitry Andric if (VTy->isIntegerTy()) 106e8d8bef9SDimitry Andric return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false); 107e8d8bef9SDimitry Andric if (VTy->isFloatingPointTy()) 108e8d8bef9SDimitry Andric return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext())); 109e8d8bef9SDimitry Andric 110e8d8bef9SDimitry Andric llvm_unreachable("Should never be called!"); 111e8d8bef9SDimitry Andric } 112e8d8bef9SDimitry Andric 113*04eeddc0SDimitry Andric /// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with 114*04eeddc0SDimitry Andric /// the modified arguments. 115*04eeddc0SDimitry Andric static Optional<Instruction *> modifyIntrinsicCall( 116*04eeddc0SDimitry Andric IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC, 117*04eeddc0SDimitry Andric std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)> 118*04eeddc0SDimitry Andric Func) { 119*04eeddc0SDimitry Andric SmallVector<Type *, 4> ArgTys; 120*04eeddc0SDimitry Andric if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys)) 121*04eeddc0SDimitry Andric return None; 122*04eeddc0SDimitry Andric 123*04eeddc0SDimitry Andric SmallVector<Value *, 8> Args(II.args()); 124*04eeddc0SDimitry Andric 125*04eeddc0SDimitry Andric // Modify arguments and types 126*04eeddc0SDimitry Andric Func(Args, ArgTys); 127*04eeddc0SDimitry Andric 128*04eeddc0SDimitry Andric Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys); 129*04eeddc0SDimitry Andric 130*04eeddc0SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(I, Args); 131*04eeddc0SDimitry Andric NewCall->takeName(&II); 132*04eeddc0SDimitry Andric NewCall->copyMetadata(II); 133*04eeddc0SDimitry Andric if (isa<FPMathOperator>(NewCall)) 134*04eeddc0SDimitry Andric NewCall->copyFastMathFlags(&II); 135*04eeddc0SDimitry Andric 136*04eeddc0SDimitry Andric // Erase and replace uses 137*04eeddc0SDimitry Andric if (!II.getType()->isVoidTy()) 138*04eeddc0SDimitry Andric IC.replaceInstUsesWith(II, NewCall); 139*04eeddc0SDimitry Andric return IC.eraseInstFromFunction(II); 140*04eeddc0SDimitry Andric } 141*04eeddc0SDimitry Andric 142e8d8bef9SDimitry Andric static Optional<Instruction *> 143e8d8bef9SDimitry Andric simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, 144e8d8bef9SDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, 145e8d8bef9SDimitry Andric IntrinsicInst &II, InstCombiner &IC) { 146*04eeddc0SDimitry Andric // Optimize _L to _LZ when _L is zero 147*04eeddc0SDimitry Andric if (const auto *LZMappingInfo = 148*04eeddc0SDimitry Andric AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) { 149*04eeddc0SDimitry Andric if (auto *ConstantLod = 150*04eeddc0SDimitry Andric dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) { 151*04eeddc0SDimitry Andric if (ConstantLod->isZero() || ConstantLod->isNegative()) { 152*04eeddc0SDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = 153*04eeddc0SDimitry Andric AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ, 154*04eeddc0SDimitry Andric ImageDimIntr->Dim); 155*04eeddc0SDimitry Andric return modifyIntrinsicCall( 156*04eeddc0SDimitry Andric II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { 157*04eeddc0SDimitry Andric Args.erase(Args.begin() + ImageDimIntr->LodIndex); 158*04eeddc0SDimitry Andric }); 159*04eeddc0SDimitry Andric } 160*04eeddc0SDimitry Andric } 161*04eeddc0SDimitry Andric } 162*04eeddc0SDimitry Andric 163*04eeddc0SDimitry Andric // Optimize _mip away, when 'lod' is zero 164*04eeddc0SDimitry Andric if (const auto *MIPMappingInfo = 165*04eeddc0SDimitry Andric AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) { 166*04eeddc0SDimitry Andric if (auto *ConstantMip = 167*04eeddc0SDimitry Andric dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) { 168*04eeddc0SDimitry Andric if (ConstantMip->isZero()) { 169*04eeddc0SDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = 170*04eeddc0SDimitry Andric AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP, 171*04eeddc0SDimitry Andric ImageDimIntr->Dim); 172*04eeddc0SDimitry Andric return modifyIntrinsicCall( 173*04eeddc0SDimitry Andric II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { 174*04eeddc0SDimitry Andric Args.erase(Args.begin() + ImageDimIntr->MipIndex); 175*04eeddc0SDimitry Andric }); 176*04eeddc0SDimitry Andric } 177*04eeddc0SDimitry Andric } 178*04eeddc0SDimitry Andric } 179*04eeddc0SDimitry Andric 180*04eeddc0SDimitry Andric // Optimize _bias away when 'bias' is zero 181*04eeddc0SDimitry Andric if (const auto *BiasMappingInfo = 182*04eeddc0SDimitry Andric AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) { 183*04eeddc0SDimitry Andric if (auto *ConstantBias = 184*04eeddc0SDimitry Andric dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) { 185*04eeddc0SDimitry Andric if (ConstantBias->isZero()) { 186*04eeddc0SDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = 187*04eeddc0SDimitry Andric AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias, 188*04eeddc0SDimitry Andric ImageDimIntr->Dim); 189*04eeddc0SDimitry Andric return modifyIntrinsicCall( 190*04eeddc0SDimitry Andric II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { 191*04eeddc0SDimitry Andric Args.erase(Args.begin() + ImageDimIntr->BiasIndex); 192*04eeddc0SDimitry Andric ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg); 193*04eeddc0SDimitry Andric }); 194*04eeddc0SDimitry Andric } 195*04eeddc0SDimitry Andric } 196*04eeddc0SDimitry Andric } 197*04eeddc0SDimitry Andric 198*04eeddc0SDimitry Andric // Optimize _offset away when 'offset' is zero 199*04eeddc0SDimitry Andric if (const auto *OffsetMappingInfo = 200*04eeddc0SDimitry Andric AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) { 201*04eeddc0SDimitry Andric if (auto *ConstantOffset = 202*04eeddc0SDimitry Andric dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) { 203*04eeddc0SDimitry Andric if (ConstantOffset->isZero()) { 204*04eeddc0SDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = 205*04eeddc0SDimitry Andric AMDGPU::getImageDimIntrinsicByBaseOpcode( 206*04eeddc0SDimitry Andric OffsetMappingInfo->NoOffset, ImageDimIntr->Dim); 207*04eeddc0SDimitry Andric return modifyIntrinsicCall( 208*04eeddc0SDimitry Andric II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { 209*04eeddc0SDimitry Andric Args.erase(Args.begin() + ImageDimIntr->OffsetIndex); 210*04eeddc0SDimitry Andric }); 211*04eeddc0SDimitry Andric } 212*04eeddc0SDimitry Andric } 213*04eeddc0SDimitry Andric } 214*04eeddc0SDimitry Andric 215*04eeddc0SDimitry Andric // Try to use A16 or G16 216e8d8bef9SDimitry Andric if (!ST->hasA16() && !ST->hasG16()) 217e8d8bef9SDimitry Andric return None; 218e8d8bef9SDimitry Andric 219*04eeddc0SDimitry Andric // Address is interpreted as float if the instruction has a sampler or as 220*04eeddc0SDimitry Andric // unsigned int if there is no sampler. 221*04eeddc0SDimitry Andric bool HasSampler = 222*04eeddc0SDimitry Andric AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler; 223e8d8bef9SDimitry Andric bool FloatCoord = false; 224e8d8bef9SDimitry Andric // true means derivatives can be converted to 16 bit, coordinates not 225e8d8bef9SDimitry Andric bool OnlyDerivatives = false; 226e8d8bef9SDimitry Andric 227e8d8bef9SDimitry Andric for (unsigned OperandIndex = ImageDimIntr->GradientStart; 228e8d8bef9SDimitry Andric OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) { 229e8d8bef9SDimitry Andric Value *Coord = II.getOperand(OperandIndex); 230e8d8bef9SDimitry Andric // If the values are not derived from 16-bit values, we cannot optimize. 231*04eeddc0SDimitry Andric if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) { 232e8d8bef9SDimitry Andric if (OperandIndex < ImageDimIntr->CoordStart || 233e8d8bef9SDimitry Andric ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) { 234e8d8bef9SDimitry Andric return None; 235e8d8bef9SDimitry Andric } 236e8d8bef9SDimitry Andric // All gradients can be converted, so convert only them 237e8d8bef9SDimitry Andric OnlyDerivatives = true; 238e8d8bef9SDimitry Andric break; 239e8d8bef9SDimitry Andric } 240e8d8bef9SDimitry Andric 241e8d8bef9SDimitry Andric assert(OperandIndex == ImageDimIntr->GradientStart || 242e8d8bef9SDimitry Andric FloatCoord == Coord->getType()->isFloatingPointTy()); 243e8d8bef9SDimitry Andric FloatCoord = Coord->getType()->isFloatingPointTy(); 244e8d8bef9SDimitry Andric } 245e8d8bef9SDimitry Andric 246*04eeddc0SDimitry Andric if (!OnlyDerivatives && !ST->hasA16()) 247e8d8bef9SDimitry Andric OnlyDerivatives = true; // Only supports G16 248*04eeddc0SDimitry Andric 249*04eeddc0SDimitry Andric // Check if there is a bias parameter and if it can be converted to f16 250*04eeddc0SDimitry Andric if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { 251*04eeddc0SDimitry Andric Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); 252*04eeddc0SDimitry Andric assert(HasSampler && 253*04eeddc0SDimitry Andric "Only image instructions with a sampler can have a bias"); 254*04eeddc0SDimitry Andric if (!canSafelyConvertTo16Bit(*Bias, HasSampler)) 255*04eeddc0SDimitry Andric OnlyDerivatives = true; 256e8d8bef9SDimitry Andric } 257e8d8bef9SDimitry Andric 258*04eeddc0SDimitry Andric if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart == 259*04eeddc0SDimitry Andric ImageDimIntr->CoordStart)) 260*04eeddc0SDimitry Andric return None; 261*04eeddc0SDimitry Andric 262e8d8bef9SDimitry Andric Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext()) 263e8d8bef9SDimitry Andric : Type::getInt16Ty(II.getContext()); 264e8d8bef9SDimitry Andric 265*04eeddc0SDimitry Andric return modifyIntrinsicCall( 266*04eeddc0SDimitry Andric II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) { 267e8d8bef9SDimitry Andric ArgTys[ImageDimIntr->GradientTyArg] = CoordType; 268*04eeddc0SDimitry Andric if (!OnlyDerivatives) { 269e8d8bef9SDimitry Andric ArgTys[ImageDimIntr->CoordTyArg] = CoordType; 270e8d8bef9SDimitry Andric 271*04eeddc0SDimitry Andric // Change the bias type 272*04eeddc0SDimitry Andric if (ImageDimIntr->NumBiasArgs != 0) 273*04eeddc0SDimitry Andric ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext()); 274*04eeddc0SDimitry Andric } 275e8d8bef9SDimitry Andric 276e8d8bef9SDimitry Andric unsigned EndIndex = 277e8d8bef9SDimitry Andric OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd; 278e8d8bef9SDimitry Andric for (unsigned OperandIndex = ImageDimIntr->GradientStart; 279e8d8bef9SDimitry Andric OperandIndex < EndIndex; OperandIndex++) { 280e8d8bef9SDimitry Andric Args[OperandIndex] = 281e8d8bef9SDimitry Andric convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder); 282e8d8bef9SDimitry Andric } 283e8d8bef9SDimitry Andric 284*04eeddc0SDimitry Andric // Convert the bias 285*04eeddc0SDimitry Andric if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { 286*04eeddc0SDimitry Andric Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); 287*04eeddc0SDimitry Andric Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder); 288*04eeddc0SDimitry Andric } 289*04eeddc0SDimitry Andric }); 290e8d8bef9SDimitry Andric } 291e8d8bef9SDimitry Andric 292e8d8bef9SDimitry Andric bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, 293e8d8bef9SDimitry Andric InstCombiner &IC) const { 294e8d8bef9SDimitry Andric // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or 295e8d8bef9SDimitry Andric // infinity, gives +0.0. If we can prove we don't have one of the special 296e8d8bef9SDimitry Andric // cases then we can use a normal multiply instead. 297e8d8bef9SDimitry Andric // TODO: Create and use isKnownFiniteNonZero instead of just matching 298e8d8bef9SDimitry Andric // constants here. 299e8d8bef9SDimitry Andric if (match(Op0, PatternMatch::m_FiniteNonZero()) || 300e8d8bef9SDimitry Andric match(Op1, PatternMatch::m_FiniteNonZero())) { 301e8d8bef9SDimitry Andric // One operand is not zero or infinity or NaN. 302e8d8bef9SDimitry Andric return true; 303e8d8bef9SDimitry Andric } 304e8d8bef9SDimitry Andric auto *TLI = &IC.getTargetLibraryInfo(); 305e8d8bef9SDimitry Andric if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) && 306e8d8bef9SDimitry Andric isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) { 307e8d8bef9SDimitry Andric // Neither operand is infinity or NaN. 308e8d8bef9SDimitry Andric return true; 309e8d8bef9SDimitry Andric } 310e8d8bef9SDimitry Andric return false; 311e8d8bef9SDimitry Andric } 312e8d8bef9SDimitry Andric 313e8d8bef9SDimitry Andric Optional<Instruction *> 314e8d8bef9SDimitry Andric GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { 315e8d8bef9SDimitry Andric Intrinsic::ID IID = II.getIntrinsicID(); 316e8d8bef9SDimitry Andric switch (IID) { 317e8d8bef9SDimitry Andric case Intrinsic::amdgcn_rcp: { 318e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 319e8d8bef9SDimitry Andric 320e8d8bef9SDimitry Andric // TODO: Move to ConstantFolding/InstSimplify? 321e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 322e8d8bef9SDimitry Andric Type *Ty = II.getType(); 323e8d8bef9SDimitry Andric auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); 324e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, QNaN); 325e8d8bef9SDimitry Andric } 326e8d8bef9SDimitry Andric 327e8d8bef9SDimitry Andric if (II.isStrictFP()) 328e8d8bef9SDimitry Andric break; 329e8d8bef9SDimitry Andric 330e8d8bef9SDimitry Andric if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) { 331e8d8bef9SDimitry Andric const APFloat &ArgVal = C->getValueAPF(); 332e8d8bef9SDimitry Andric APFloat Val(ArgVal.getSemantics(), 1); 333e8d8bef9SDimitry Andric Val.divide(ArgVal, APFloat::rmNearestTiesToEven); 334e8d8bef9SDimitry Andric 335e8d8bef9SDimitry Andric // This is more precise than the instruction may give. 336e8d8bef9SDimitry Andric // 337e8d8bef9SDimitry Andric // TODO: The instruction always flushes denormal results (except for f16), 338e8d8bef9SDimitry Andric // should this also? 339e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val)); 340e8d8bef9SDimitry Andric } 341e8d8bef9SDimitry Andric 342e8d8bef9SDimitry Andric break; 343e8d8bef9SDimitry Andric } 344e8d8bef9SDimitry Andric case Intrinsic::amdgcn_rsq: { 345e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 346e8d8bef9SDimitry Andric 347e8d8bef9SDimitry Andric // TODO: Move to ConstantFolding/InstSimplify? 348e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 349e8d8bef9SDimitry Andric Type *Ty = II.getType(); 350e8d8bef9SDimitry Andric auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); 351e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, QNaN); 352e8d8bef9SDimitry Andric } 353e8d8bef9SDimitry Andric 354e8d8bef9SDimitry Andric break; 355e8d8bef9SDimitry Andric } 356e8d8bef9SDimitry Andric case Intrinsic::amdgcn_frexp_mant: 357e8d8bef9SDimitry Andric case Intrinsic::amdgcn_frexp_exp: { 358e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 359e8d8bef9SDimitry Andric if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) { 360e8d8bef9SDimitry Andric int Exp; 361e8d8bef9SDimitry Andric APFloat Significand = 362e8d8bef9SDimitry Andric frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven); 363e8d8bef9SDimitry Andric 364e8d8bef9SDimitry Andric if (IID == Intrinsic::amdgcn_frexp_mant) { 365e8d8bef9SDimitry Andric return IC.replaceInstUsesWith( 366e8d8bef9SDimitry Andric II, ConstantFP::get(II.getContext(), Significand)); 367e8d8bef9SDimitry Andric } 368e8d8bef9SDimitry Andric 369e8d8bef9SDimitry Andric // Match instruction special case behavior. 370e8d8bef9SDimitry Andric if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf) 371e8d8bef9SDimitry Andric Exp = 0; 372e8d8bef9SDimitry Andric 373e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp)); 374e8d8bef9SDimitry Andric } 375e8d8bef9SDimitry Andric 376e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 377e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 378e8d8bef9SDimitry Andric } 379e8d8bef9SDimitry Andric 380e8d8bef9SDimitry Andric break; 381e8d8bef9SDimitry Andric } 382e8d8bef9SDimitry Andric case Intrinsic::amdgcn_class: { 383e8d8bef9SDimitry Andric enum { 384e8d8bef9SDimitry Andric S_NAN = 1 << 0, // Signaling NaN 385e8d8bef9SDimitry Andric Q_NAN = 1 << 1, // Quiet NaN 386e8d8bef9SDimitry Andric N_INFINITY = 1 << 2, // Negative infinity 387e8d8bef9SDimitry Andric N_NORMAL = 1 << 3, // Negative normal 388e8d8bef9SDimitry Andric N_SUBNORMAL = 1 << 4, // Negative subnormal 389e8d8bef9SDimitry Andric N_ZERO = 1 << 5, // Negative zero 390e8d8bef9SDimitry Andric P_ZERO = 1 << 6, // Positive zero 391e8d8bef9SDimitry Andric P_SUBNORMAL = 1 << 7, // Positive subnormal 392e8d8bef9SDimitry Andric P_NORMAL = 1 << 8, // Positive normal 393e8d8bef9SDimitry Andric P_INFINITY = 1 << 9 // Positive infinity 394e8d8bef9SDimitry Andric }; 395e8d8bef9SDimitry Andric 396e8d8bef9SDimitry Andric const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL | 397e8d8bef9SDimitry Andric N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL | 398e8d8bef9SDimitry Andric P_NORMAL | P_INFINITY; 399e8d8bef9SDimitry Andric 400e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 401e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 402e8d8bef9SDimitry Andric const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1); 403e8d8bef9SDimitry Andric if (!CMask) { 404e8d8bef9SDimitry Andric if (isa<UndefValue>(Src0)) { 405e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 406e8d8bef9SDimitry Andric } 407e8d8bef9SDimitry Andric 408e8d8bef9SDimitry Andric if (isa<UndefValue>(Src1)) { 409e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 410e8d8bef9SDimitry Andric ConstantInt::get(II.getType(), false)); 411e8d8bef9SDimitry Andric } 412e8d8bef9SDimitry Andric break; 413e8d8bef9SDimitry Andric } 414e8d8bef9SDimitry Andric 415e8d8bef9SDimitry Andric uint32_t Mask = CMask->getZExtValue(); 416e8d8bef9SDimitry Andric 417e8d8bef9SDimitry Andric // If all tests are made, it doesn't matter what the value is. 418e8d8bef9SDimitry Andric if ((Mask & FullMask) == FullMask) { 419e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true)); 420e8d8bef9SDimitry Andric } 421e8d8bef9SDimitry Andric 422e8d8bef9SDimitry Andric if ((Mask & FullMask) == 0) { 423e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false)); 424e8d8bef9SDimitry Andric } 425e8d8bef9SDimitry Andric 426e8d8bef9SDimitry Andric if (Mask == (S_NAN | Q_NAN)) { 427e8d8bef9SDimitry Andric // Equivalent of isnan. Replace with standard fcmp. 428e8d8bef9SDimitry Andric Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0); 429e8d8bef9SDimitry Andric FCmp->takeName(&II); 430e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, FCmp); 431e8d8bef9SDimitry Andric } 432e8d8bef9SDimitry Andric 433e8d8bef9SDimitry Andric if (Mask == (N_ZERO | P_ZERO)) { 434e8d8bef9SDimitry Andric // Equivalent of == 0. 435e8d8bef9SDimitry Andric Value *FCmp = 436e8d8bef9SDimitry Andric IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0)); 437e8d8bef9SDimitry Andric 438e8d8bef9SDimitry Andric FCmp->takeName(&II); 439e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, FCmp); 440e8d8bef9SDimitry Andric } 441e8d8bef9SDimitry Andric 442e8d8bef9SDimitry Andric // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other 443e8d8bef9SDimitry Andric if (((Mask & S_NAN) || (Mask & Q_NAN)) && 444e8d8bef9SDimitry Andric isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) { 445e8d8bef9SDimitry Andric return IC.replaceOperand( 446e8d8bef9SDimitry Andric II, 1, ConstantInt::get(Src1->getType(), Mask & ~(S_NAN | Q_NAN))); 447e8d8bef9SDimitry Andric } 448e8d8bef9SDimitry Andric 449e8d8bef9SDimitry Andric const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0); 450e8d8bef9SDimitry Andric if (!CVal) { 451e8d8bef9SDimitry Andric if (isa<UndefValue>(Src0)) { 452e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 453e8d8bef9SDimitry Andric } 454e8d8bef9SDimitry Andric 455e8d8bef9SDimitry Andric // Clamp mask to used bits 456e8d8bef9SDimitry Andric if ((Mask & FullMask) != Mask) { 457e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall( 458e8d8bef9SDimitry Andric II.getCalledFunction(), 459e8d8bef9SDimitry Andric {Src0, ConstantInt::get(Src1->getType(), Mask & FullMask)}); 460e8d8bef9SDimitry Andric 461e8d8bef9SDimitry Andric NewCall->takeName(&II); 462e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 463e8d8bef9SDimitry Andric } 464e8d8bef9SDimitry Andric 465e8d8bef9SDimitry Andric break; 466e8d8bef9SDimitry Andric } 467e8d8bef9SDimitry Andric 468e8d8bef9SDimitry Andric const APFloat &Val = CVal->getValueAPF(); 469e8d8bef9SDimitry Andric 470e8d8bef9SDimitry Andric bool Result = 471e8d8bef9SDimitry Andric ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) || 472e8d8bef9SDimitry Andric ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) || 473e8d8bef9SDimitry Andric ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) || 474e8d8bef9SDimitry Andric ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) || 475e8d8bef9SDimitry Andric ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) || 476e8d8bef9SDimitry Andric ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) || 477e8d8bef9SDimitry Andric ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) || 478e8d8bef9SDimitry Andric ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) || 479e8d8bef9SDimitry Andric ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) || 480e8d8bef9SDimitry Andric ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative()); 481e8d8bef9SDimitry Andric 482e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result)); 483e8d8bef9SDimitry Andric } 484e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pkrtz: { 485e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 486e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 487e8d8bef9SDimitry Andric if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) { 488e8d8bef9SDimitry Andric if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) { 489e8d8bef9SDimitry Andric const fltSemantics &HalfSem = 490e8d8bef9SDimitry Andric II.getType()->getScalarType()->getFltSemantics(); 491e8d8bef9SDimitry Andric bool LosesInfo; 492e8d8bef9SDimitry Andric APFloat Val0 = C0->getValueAPF(); 493e8d8bef9SDimitry Andric APFloat Val1 = C1->getValueAPF(); 494e8d8bef9SDimitry Andric Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); 495e8d8bef9SDimitry Andric Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); 496e8d8bef9SDimitry Andric 497e8d8bef9SDimitry Andric Constant *Folded = 498e8d8bef9SDimitry Andric ConstantVector::get({ConstantFP::get(II.getContext(), Val0), 499e8d8bef9SDimitry Andric ConstantFP::get(II.getContext(), Val1)}); 500e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Folded); 501e8d8bef9SDimitry Andric } 502e8d8bef9SDimitry Andric } 503e8d8bef9SDimitry Andric 504e8d8bef9SDimitry Andric if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) { 505e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 506e8d8bef9SDimitry Andric } 507e8d8bef9SDimitry Andric 508e8d8bef9SDimitry Andric break; 509e8d8bef9SDimitry Andric } 510e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pknorm_i16: 511e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pknorm_u16: 512e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pk_i16: 513e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pk_u16: { 514e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 515e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 516e8d8bef9SDimitry Andric 517e8d8bef9SDimitry Andric if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) { 518e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 519e8d8bef9SDimitry Andric } 520e8d8bef9SDimitry Andric 521e8d8bef9SDimitry Andric break; 522e8d8bef9SDimitry Andric } 523e8d8bef9SDimitry Andric case Intrinsic::amdgcn_ubfe: 524e8d8bef9SDimitry Andric case Intrinsic::amdgcn_sbfe: { 525e8d8bef9SDimitry Andric // Decompose simple cases into standard shifts. 526e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 527e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 528e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 529e8d8bef9SDimitry Andric } 530e8d8bef9SDimitry Andric 531e8d8bef9SDimitry Andric unsigned Width; 532e8d8bef9SDimitry Andric Type *Ty = II.getType(); 533e8d8bef9SDimitry Andric unsigned IntSize = Ty->getIntegerBitWidth(); 534e8d8bef9SDimitry Andric 535e8d8bef9SDimitry Andric ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2)); 536e8d8bef9SDimitry Andric if (CWidth) { 537e8d8bef9SDimitry Andric Width = CWidth->getZExtValue(); 538e8d8bef9SDimitry Andric if ((Width & (IntSize - 1)) == 0) { 539e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(Ty)); 540e8d8bef9SDimitry Andric } 541e8d8bef9SDimitry Andric 542e8d8bef9SDimitry Andric // Hardware ignores high bits, so remove those. 543e8d8bef9SDimitry Andric if (Width >= IntSize) { 544e8d8bef9SDimitry Andric return IC.replaceOperand( 545e8d8bef9SDimitry Andric II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1))); 546e8d8bef9SDimitry Andric } 547e8d8bef9SDimitry Andric } 548e8d8bef9SDimitry Andric 549e8d8bef9SDimitry Andric unsigned Offset; 550e8d8bef9SDimitry Andric ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1)); 551e8d8bef9SDimitry Andric if (COffset) { 552e8d8bef9SDimitry Andric Offset = COffset->getZExtValue(); 553e8d8bef9SDimitry Andric if (Offset >= IntSize) { 554e8d8bef9SDimitry Andric return IC.replaceOperand( 555e8d8bef9SDimitry Andric II, 1, 556e8d8bef9SDimitry Andric ConstantInt::get(COffset->getType(), Offset & (IntSize - 1))); 557e8d8bef9SDimitry Andric } 558e8d8bef9SDimitry Andric } 559e8d8bef9SDimitry Andric 560e8d8bef9SDimitry Andric bool Signed = IID == Intrinsic::amdgcn_sbfe; 561e8d8bef9SDimitry Andric 562e8d8bef9SDimitry Andric if (!CWidth || !COffset) 563e8d8bef9SDimitry Andric break; 564e8d8bef9SDimitry Andric 565349cc55cSDimitry Andric // The case of Width == 0 is handled above, which makes this transformation 566e8d8bef9SDimitry Andric // safe. If Width == 0, then the ashr and lshr instructions become poison 567e8d8bef9SDimitry Andric // value since the shift amount would be equal to the bit size. 568e8d8bef9SDimitry Andric assert(Width != 0); 569e8d8bef9SDimitry Andric 570e8d8bef9SDimitry Andric // TODO: This allows folding to undef when the hardware has specific 571e8d8bef9SDimitry Andric // behavior? 572e8d8bef9SDimitry Andric if (Offset + Width < IntSize) { 573e8d8bef9SDimitry Andric Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width); 574e8d8bef9SDimitry Andric Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width) 575e8d8bef9SDimitry Andric : IC.Builder.CreateLShr(Shl, IntSize - Width); 576e8d8bef9SDimitry Andric RightShift->takeName(&II); 577e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, RightShift); 578e8d8bef9SDimitry Andric } 579e8d8bef9SDimitry Andric 580e8d8bef9SDimitry Andric Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset) 581e8d8bef9SDimitry Andric : IC.Builder.CreateLShr(Src, Offset); 582e8d8bef9SDimitry Andric 583e8d8bef9SDimitry Andric RightShift->takeName(&II); 584e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, RightShift); 585e8d8bef9SDimitry Andric } 586e8d8bef9SDimitry Andric case Intrinsic::amdgcn_exp: 587e8d8bef9SDimitry Andric case Intrinsic::amdgcn_exp_compr: { 588e8d8bef9SDimitry Andric ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1)); 589e8d8bef9SDimitry Andric unsigned EnBits = En->getZExtValue(); 590e8d8bef9SDimitry Andric if (EnBits == 0xf) 591e8d8bef9SDimitry Andric break; // All inputs enabled. 592e8d8bef9SDimitry Andric 593e8d8bef9SDimitry Andric bool IsCompr = IID == Intrinsic::amdgcn_exp_compr; 594e8d8bef9SDimitry Andric bool Changed = false; 595e8d8bef9SDimitry Andric for (int I = 0; I < (IsCompr ? 2 : 4); ++I) { 596e8d8bef9SDimitry Andric if ((!IsCompr && (EnBits & (1 << I)) == 0) || 597e8d8bef9SDimitry Andric (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) { 598e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(I + 2); 599e8d8bef9SDimitry Andric if (!isa<UndefValue>(Src)) { 600e8d8bef9SDimitry Andric IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType())); 601e8d8bef9SDimitry Andric Changed = true; 602e8d8bef9SDimitry Andric } 603e8d8bef9SDimitry Andric } 604e8d8bef9SDimitry Andric } 605e8d8bef9SDimitry Andric 606e8d8bef9SDimitry Andric if (Changed) { 607e8d8bef9SDimitry Andric return &II; 608e8d8bef9SDimitry Andric } 609e8d8bef9SDimitry Andric 610e8d8bef9SDimitry Andric break; 611e8d8bef9SDimitry Andric } 612e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fmed3: { 613e8d8bef9SDimitry Andric // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled 614e8d8bef9SDimitry Andric // for the shader. 615e8d8bef9SDimitry Andric 616e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 617e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 618e8d8bef9SDimitry Andric Value *Src2 = II.getArgOperand(2); 619e8d8bef9SDimitry Andric 620e8d8bef9SDimitry Andric // Checking for NaN before canonicalization provides better fidelity when 621e8d8bef9SDimitry Andric // mapping other operations onto fmed3 since the order of operands is 622e8d8bef9SDimitry Andric // unchanged. 623e8d8bef9SDimitry Andric CallInst *NewCall = nullptr; 624e8d8bef9SDimitry Andric if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) { 625e8d8bef9SDimitry Andric NewCall = IC.Builder.CreateMinNum(Src1, Src2); 626e8d8bef9SDimitry Andric } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) { 627e8d8bef9SDimitry Andric NewCall = IC.Builder.CreateMinNum(Src0, Src2); 628e8d8bef9SDimitry Andric } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) { 629e8d8bef9SDimitry Andric NewCall = IC.Builder.CreateMaxNum(Src0, Src1); 630e8d8bef9SDimitry Andric } 631e8d8bef9SDimitry Andric 632e8d8bef9SDimitry Andric if (NewCall) { 633e8d8bef9SDimitry Andric NewCall->copyFastMathFlags(&II); 634e8d8bef9SDimitry Andric NewCall->takeName(&II); 635e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 636e8d8bef9SDimitry Andric } 637e8d8bef9SDimitry Andric 638e8d8bef9SDimitry Andric bool Swap = false; 639e8d8bef9SDimitry Andric // Canonicalize constants to RHS operands. 640e8d8bef9SDimitry Andric // 641e8d8bef9SDimitry Andric // fmed3(c0, x, c1) -> fmed3(x, c0, c1) 642e8d8bef9SDimitry Andric if (isa<Constant>(Src0) && !isa<Constant>(Src1)) { 643e8d8bef9SDimitry Andric std::swap(Src0, Src1); 644e8d8bef9SDimitry Andric Swap = true; 645e8d8bef9SDimitry Andric } 646e8d8bef9SDimitry Andric 647e8d8bef9SDimitry Andric if (isa<Constant>(Src1) && !isa<Constant>(Src2)) { 648e8d8bef9SDimitry Andric std::swap(Src1, Src2); 649e8d8bef9SDimitry Andric Swap = true; 650e8d8bef9SDimitry Andric } 651e8d8bef9SDimitry Andric 652e8d8bef9SDimitry Andric if (isa<Constant>(Src0) && !isa<Constant>(Src1)) { 653e8d8bef9SDimitry Andric std::swap(Src0, Src1); 654e8d8bef9SDimitry Andric Swap = true; 655e8d8bef9SDimitry Andric } 656e8d8bef9SDimitry Andric 657e8d8bef9SDimitry Andric if (Swap) { 658e8d8bef9SDimitry Andric II.setArgOperand(0, Src0); 659e8d8bef9SDimitry Andric II.setArgOperand(1, Src1); 660e8d8bef9SDimitry Andric II.setArgOperand(2, Src2); 661e8d8bef9SDimitry Andric return &II; 662e8d8bef9SDimitry Andric } 663e8d8bef9SDimitry Andric 664e8d8bef9SDimitry Andric if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) { 665e8d8bef9SDimitry Andric if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) { 666e8d8bef9SDimitry Andric if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) { 667e8d8bef9SDimitry Andric APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(), 668e8d8bef9SDimitry Andric C2->getValueAPF()); 669e8d8bef9SDimitry Andric return IC.replaceInstUsesWith( 670e8d8bef9SDimitry Andric II, ConstantFP::get(IC.Builder.getContext(), Result)); 671e8d8bef9SDimitry Andric } 672e8d8bef9SDimitry Andric } 673e8d8bef9SDimitry Andric } 674e8d8bef9SDimitry Andric 675e8d8bef9SDimitry Andric break; 676e8d8bef9SDimitry Andric } 677e8d8bef9SDimitry Andric case Intrinsic::amdgcn_icmp: 678e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fcmp: { 679e8d8bef9SDimitry Andric const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2)); 680e8d8bef9SDimitry Andric // Guard against invalid arguments. 681e8d8bef9SDimitry Andric int64_t CCVal = CC->getZExtValue(); 682e8d8bef9SDimitry Andric bool IsInteger = IID == Intrinsic::amdgcn_icmp; 683e8d8bef9SDimitry Andric if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE || 684e8d8bef9SDimitry Andric CCVal > CmpInst::LAST_ICMP_PREDICATE)) || 685e8d8bef9SDimitry Andric (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE || 686e8d8bef9SDimitry Andric CCVal > CmpInst::LAST_FCMP_PREDICATE))) 687e8d8bef9SDimitry Andric break; 688e8d8bef9SDimitry Andric 689e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 690e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 691e8d8bef9SDimitry Andric 692e8d8bef9SDimitry Andric if (auto *CSrc0 = dyn_cast<Constant>(Src0)) { 693e8d8bef9SDimitry Andric if (auto *CSrc1 = dyn_cast<Constant>(Src1)) { 694e8d8bef9SDimitry Andric Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1); 695e8d8bef9SDimitry Andric if (CCmp->isNullValue()) { 696e8d8bef9SDimitry Andric return IC.replaceInstUsesWith( 697e8d8bef9SDimitry Andric II, ConstantExpr::getSExt(CCmp, II.getType())); 698e8d8bef9SDimitry Andric } 699e8d8bef9SDimitry Andric 700e8d8bef9SDimitry Andric // The result of V_ICMP/V_FCMP assembly instructions (which this 701e8d8bef9SDimitry Andric // intrinsic exposes) is one bit per thread, masked with the EXEC 702e8d8bef9SDimitry Andric // register (which contains the bitmask of live threads). So a 703e8d8bef9SDimitry Andric // comparison that always returns true is the same as a read of the 704e8d8bef9SDimitry Andric // EXEC register. 705e8d8bef9SDimitry Andric Function *NewF = Intrinsic::getDeclaration( 706e8d8bef9SDimitry Andric II.getModule(), Intrinsic::read_register, II.getType()); 707e8d8bef9SDimitry Andric Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")}; 708e8d8bef9SDimitry Andric MDNode *MD = MDNode::get(II.getContext(), MDArgs); 709e8d8bef9SDimitry Andric Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)}; 710e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(NewF, Args); 711349cc55cSDimitry Andric NewCall->addFnAttr(Attribute::Convergent); 712e8d8bef9SDimitry Andric NewCall->takeName(&II); 713e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 714e8d8bef9SDimitry Andric } 715e8d8bef9SDimitry Andric 716e8d8bef9SDimitry Andric // Canonicalize constants to RHS. 717e8d8bef9SDimitry Andric CmpInst::Predicate SwapPred = 718e8d8bef9SDimitry Andric CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal)); 719e8d8bef9SDimitry Andric II.setArgOperand(0, Src1); 720e8d8bef9SDimitry Andric II.setArgOperand(1, Src0); 721e8d8bef9SDimitry Andric II.setArgOperand( 722e8d8bef9SDimitry Andric 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred))); 723e8d8bef9SDimitry Andric return &II; 724e8d8bef9SDimitry Andric } 725e8d8bef9SDimitry Andric 726e8d8bef9SDimitry Andric if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE) 727e8d8bef9SDimitry Andric break; 728e8d8bef9SDimitry Andric 729e8d8bef9SDimitry Andric // Canonicalize compare eq with true value to compare != 0 730e8d8bef9SDimitry Andric // llvm.amdgcn.icmp(zext (i1 x), 1, eq) 731e8d8bef9SDimitry Andric // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne) 732e8d8bef9SDimitry Andric // llvm.amdgcn.icmp(sext (i1 x), -1, eq) 733e8d8bef9SDimitry Andric // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne) 734e8d8bef9SDimitry Andric Value *ExtSrc; 735e8d8bef9SDimitry Andric if (CCVal == CmpInst::ICMP_EQ && 736e8d8bef9SDimitry Andric ((match(Src1, PatternMatch::m_One()) && 737e8d8bef9SDimitry Andric match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) || 738e8d8bef9SDimitry Andric (match(Src1, PatternMatch::m_AllOnes()) && 739e8d8bef9SDimitry Andric match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) && 740e8d8bef9SDimitry Andric ExtSrc->getType()->isIntegerTy(1)) { 741e8d8bef9SDimitry Andric IC.replaceOperand(II, 1, ConstantInt::getNullValue(Src1->getType())); 742e8d8bef9SDimitry Andric IC.replaceOperand(II, 2, 743e8d8bef9SDimitry Andric ConstantInt::get(CC->getType(), CmpInst::ICMP_NE)); 744e8d8bef9SDimitry Andric return &II; 745e8d8bef9SDimitry Andric } 746e8d8bef9SDimitry Andric 747e8d8bef9SDimitry Andric CmpInst::Predicate SrcPred; 748e8d8bef9SDimitry Andric Value *SrcLHS; 749e8d8bef9SDimitry Andric Value *SrcRHS; 750e8d8bef9SDimitry Andric 751e8d8bef9SDimitry Andric // Fold compare eq/ne with 0 from a compare result as the predicate to the 752e8d8bef9SDimitry Andric // intrinsic. The typical use is a wave vote function in the library, which 753e8d8bef9SDimitry Andric // will be fed from a user code condition compared with 0. Fold in the 754e8d8bef9SDimitry Andric // redundant compare. 755e8d8bef9SDimitry Andric 756e8d8bef9SDimitry Andric // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne) 757e8d8bef9SDimitry Andric // -> llvm.amdgcn.[if]cmp(a, b, pred) 758e8d8bef9SDimitry Andric // 759e8d8bef9SDimitry Andric // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq) 760e8d8bef9SDimitry Andric // -> llvm.amdgcn.[if]cmp(a, b, inv pred) 761e8d8bef9SDimitry Andric if (match(Src1, PatternMatch::m_Zero()) && 762e8d8bef9SDimitry Andric match(Src0, PatternMatch::m_ZExtOrSExt( 763e8d8bef9SDimitry Andric m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS), 764e8d8bef9SDimitry Andric PatternMatch::m_Value(SrcRHS))))) { 765e8d8bef9SDimitry Andric if (CCVal == CmpInst::ICMP_EQ) 766e8d8bef9SDimitry Andric SrcPred = CmpInst::getInversePredicate(SrcPred); 767e8d8bef9SDimitry Andric 768e8d8bef9SDimitry Andric Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) 769e8d8bef9SDimitry Andric ? Intrinsic::amdgcn_fcmp 770e8d8bef9SDimitry Andric : Intrinsic::amdgcn_icmp; 771e8d8bef9SDimitry Andric 772e8d8bef9SDimitry Andric Type *Ty = SrcLHS->getType(); 773e8d8bef9SDimitry Andric if (auto *CmpType = dyn_cast<IntegerType>(Ty)) { 774e8d8bef9SDimitry Andric // Promote to next legal integer type. 775e8d8bef9SDimitry Andric unsigned Width = CmpType->getBitWidth(); 776e8d8bef9SDimitry Andric unsigned NewWidth = Width; 777e8d8bef9SDimitry Andric 778e8d8bef9SDimitry Andric // Don't do anything for i1 comparisons. 779e8d8bef9SDimitry Andric if (Width == 1) 780e8d8bef9SDimitry Andric break; 781e8d8bef9SDimitry Andric 782e8d8bef9SDimitry Andric if (Width <= 16) 783e8d8bef9SDimitry Andric NewWidth = 16; 784e8d8bef9SDimitry Andric else if (Width <= 32) 785e8d8bef9SDimitry Andric NewWidth = 32; 786e8d8bef9SDimitry Andric else if (Width <= 64) 787e8d8bef9SDimitry Andric NewWidth = 64; 788e8d8bef9SDimitry Andric else if (Width > 64) 789e8d8bef9SDimitry Andric break; // Can't handle this. 790e8d8bef9SDimitry Andric 791e8d8bef9SDimitry Andric if (Width != NewWidth) { 792e8d8bef9SDimitry Andric IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth); 793e8d8bef9SDimitry Andric if (CmpInst::isSigned(SrcPred)) { 794e8d8bef9SDimitry Andric SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy); 795e8d8bef9SDimitry Andric SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy); 796e8d8bef9SDimitry Andric } else { 797e8d8bef9SDimitry Andric SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy); 798e8d8bef9SDimitry Andric SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy); 799e8d8bef9SDimitry Andric } 800e8d8bef9SDimitry Andric } 801e8d8bef9SDimitry Andric } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy()) 802e8d8bef9SDimitry Andric break; 803e8d8bef9SDimitry Andric 804e8d8bef9SDimitry Andric Function *NewF = Intrinsic::getDeclaration( 805e8d8bef9SDimitry Andric II.getModule(), NewIID, {II.getType(), SrcLHS->getType()}); 806e8d8bef9SDimitry Andric Value *Args[] = {SrcLHS, SrcRHS, 807e8d8bef9SDimitry Andric ConstantInt::get(CC->getType(), SrcPred)}; 808e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(NewF, Args); 809e8d8bef9SDimitry Andric NewCall->takeName(&II); 810e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 811e8d8bef9SDimitry Andric } 812e8d8bef9SDimitry Andric 813e8d8bef9SDimitry Andric break; 814e8d8bef9SDimitry Andric } 815e8d8bef9SDimitry Andric case Intrinsic::amdgcn_ballot: { 816e8d8bef9SDimitry Andric if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) { 817e8d8bef9SDimitry Andric if (Src->isZero()) { 818e8d8bef9SDimitry Andric // amdgcn.ballot(i1 0) is zero. 819e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType())); 820e8d8bef9SDimitry Andric } 821e8d8bef9SDimitry Andric 822e8d8bef9SDimitry Andric if (Src->isOne()) { 823e8d8bef9SDimitry Andric // amdgcn.ballot(i1 1) is exec. 824e8d8bef9SDimitry Andric const char *RegName = "exec"; 825e8d8bef9SDimitry Andric if (II.getType()->isIntegerTy(32)) 826e8d8bef9SDimitry Andric RegName = "exec_lo"; 827e8d8bef9SDimitry Andric else if (!II.getType()->isIntegerTy(64)) 828e8d8bef9SDimitry Andric break; 829e8d8bef9SDimitry Andric 830e8d8bef9SDimitry Andric Function *NewF = Intrinsic::getDeclaration( 831e8d8bef9SDimitry Andric II.getModule(), Intrinsic::read_register, II.getType()); 832e8d8bef9SDimitry Andric Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)}; 833e8d8bef9SDimitry Andric MDNode *MD = MDNode::get(II.getContext(), MDArgs); 834e8d8bef9SDimitry Andric Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)}; 835e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(NewF, Args); 836349cc55cSDimitry Andric NewCall->addFnAttr(Attribute::Convergent); 837e8d8bef9SDimitry Andric NewCall->takeName(&II); 838e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 839e8d8bef9SDimitry Andric } 840e8d8bef9SDimitry Andric } 841e8d8bef9SDimitry Andric break; 842e8d8bef9SDimitry Andric } 843e8d8bef9SDimitry Andric case Intrinsic::amdgcn_wqm_vote: { 844e8d8bef9SDimitry Andric // wqm_vote is identity when the argument is constant. 845e8d8bef9SDimitry Andric if (!isa<Constant>(II.getArgOperand(0))) 846e8d8bef9SDimitry Andric break; 847e8d8bef9SDimitry Andric 848e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, II.getArgOperand(0)); 849e8d8bef9SDimitry Andric } 850e8d8bef9SDimitry Andric case Intrinsic::amdgcn_kill: { 851e8d8bef9SDimitry Andric const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0)); 852e8d8bef9SDimitry Andric if (!C || !C->getZExtValue()) 853e8d8bef9SDimitry Andric break; 854e8d8bef9SDimitry Andric 855e8d8bef9SDimitry Andric // amdgcn.kill(i1 1) is a no-op 856e8d8bef9SDimitry Andric return IC.eraseInstFromFunction(II); 857e8d8bef9SDimitry Andric } 858e8d8bef9SDimitry Andric case Intrinsic::amdgcn_update_dpp: { 859e8d8bef9SDimitry Andric Value *Old = II.getArgOperand(0); 860e8d8bef9SDimitry Andric 861e8d8bef9SDimitry Andric auto *BC = cast<ConstantInt>(II.getArgOperand(5)); 862e8d8bef9SDimitry Andric auto *RM = cast<ConstantInt>(II.getArgOperand(3)); 863e8d8bef9SDimitry Andric auto *BM = cast<ConstantInt>(II.getArgOperand(4)); 864e8d8bef9SDimitry Andric if (BC->isZeroValue() || RM->getZExtValue() != 0xF || 865e8d8bef9SDimitry Andric BM->getZExtValue() != 0xF || isa<UndefValue>(Old)) 866e8d8bef9SDimitry Andric break; 867e8d8bef9SDimitry Andric 868e8d8bef9SDimitry Andric // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value. 869e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, UndefValue::get(Old->getType())); 870e8d8bef9SDimitry Andric } 871e8d8bef9SDimitry Andric case Intrinsic::amdgcn_permlane16: 872e8d8bef9SDimitry Andric case Intrinsic::amdgcn_permlanex16: { 873e8d8bef9SDimitry Andric // Discard vdst_in if it's not going to be read. 874e8d8bef9SDimitry Andric Value *VDstIn = II.getArgOperand(0); 875e8d8bef9SDimitry Andric if (isa<UndefValue>(VDstIn)) 876e8d8bef9SDimitry Andric break; 877e8d8bef9SDimitry Andric 878e8d8bef9SDimitry Andric ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4)); 879e8d8bef9SDimitry Andric ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5)); 880e8d8bef9SDimitry Andric if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue()) 881e8d8bef9SDimitry Andric break; 882e8d8bef9SDimitry Andric 883e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType())); 884e8d8bef9SDimitry Andric } 885e8d8bef9SDimitry Andric case Intrinsic::amdgcn_readfirstlane: 886e8d8bef9SDimitry Andric case Intrinsic::amdgcn_readlane: { 887e8d8bef9SDimitry Andric // A constant value is trivially uniform. 888e8d8bef9SDimitry Andric if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) { 889e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, C); 890e8d8bef9SDimitry Andric } 891e8d8bef9SDimitry Andric 892e8d8bef9SDimitry Andric // The rest of these may not be safe if the exec may not be the same between 893e8d8bef9SDimitry Andric // the def and use. 894e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 895e8d8bef9SDimitry Andric Instruction *SrcInst = dyn_cast<Instruction>(Src); 896e8d8bef9SDimitry Andric if (SrcInst && SrcInst->getParent() != II.getParent()) 897e8d8bef9SDimitry Andric break; 898e8d8bef9SDimitry Andric 899e8d8bef9SDimitry Andric // readfirstlane (readfirstlane x) -> readfirstlane x 900e8d8bef9SDimitry Andric // readlane (readfirstlane x), y -> readfirstlane x 901e8d8bef9SDimitry Andric if (match(Src, 902e8d8bef9SDimitry Andric PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) { 903e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 904e8d8bef9SDimitry Andric } 905e8d8bef9SDimitry Andric 906e8d8bef9SDimitry Andric if (IID == Intrinsic::amdgcn_readfirstlane) { 907e8d8bef9SDimitry Andric // readfirstlane (readlane x, y) -> readlane x, y 908e8d8bef9SDimitry Andric if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) { 909e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 910e8d8bef9SDimitry Andric } 911e8d8bef9SDimitry Andric } else { 912e8d8bef9SDimitry Andric // readlane (readlane x, y), y -> readlane x, y 913e8d8bef9SDimitry Andric if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>( 914e8d8bef9SDimitry Andric PatternMatch::m_Value(), 915e8d8bef9SDimitry Andric PatternMatch::m_Specific(II.getArgOperand(1))))) { 916e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 917e8d8bef9SDimitry Andric } 918e8d8bef9SDimitry Andric } 919e8d8bef9SDimitry Andric 920e8d8bef9SDimitry Andric break; 921e8d8bef9SDimitry Andric } 922e8d8bef9SDimitry Andric case Intrinsic::amdgcn_ldexp: { 923e8d8bef9SDimitry Andric // FIXME: This doesn't introduce new instructions and belongs in 924e8d8bef9SDimitry Andric // InstructionSimplify. 925e8d8bef9SDimitry Andric Type *Ty = II.getType(); 926e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 927e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 928e8d8bef9SDimitry Andric 929e8d8bef9SDimitry Andric // Folding undef to qnan is safe regardless of the FP mode. 930e8d8bef9SDimitry Andric if (isa<UndefValue>(Op0)) { 931e8d8bef9SDimitry Andric auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); 932e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, QNaN); 933e8d8bef9SDimitry Andric } 934e8d8bef9SDimitry Andric 935e8d8bef9SDimitry Andric const APFloat *C = nullptr; 936e8d8bef9SDimitry Andric match(Op0, PatternMatch::m_APFloat(C)); 937e8d8bef9SDimitry Andric 938e8d8bef9SDimitry Andric // FIXME: Should flush denorms depending on FP mode, but that's ignored 939e8d8bef9SDimitry Andric // everywhere else. 940e8d8bef9SDimitry Andric // 941e8d8bef9SDimitry Andric // These cases should be safe, even with strictfp. 942e8d8bef9SDimitry Andric // ldexp(0.0, x) -> 0.0 943e8d8bef9SDimitry Andric // ldexp(-0.0, x) -> -0.0 944e8d8bef9SDimitry Andric // ldexp(inf, x) -> inf 945e8d8bef9SDimitry Andric // ldexp(-inf, x) -> -inf 946e8d8bef9SDimitry Andric if (C && (C->isZero() || C->isInfinity())) { 947e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Op0); 948e8d8bef9SDimitry Andric } 949e8d8bef9SDimitry Andric 950e8d8bef9SDimitry Andric // With strictfp, be more careful about possibly needing to flush denormals 951e8d8bef9SDimitry Andric // or not, and snan behavior depends on ieee_mode. 952e8d8bef9SDimitry Andric if (II.isStrictFP()) 953e8d8bef9SDimitry Andric break; 954e8d8bef9SDimitry Andric 955e8d8bef9SDimitry Andric if (C && C->isNaN()) { 956e8d8bef9SDimitry Andric // FIXME: We just need to make the nan quiet here, but that's unavailable 957e8d8bef9SDimitry Andric // on APFloat, only IEEEfloat 958e8d8bef9SDimitry Andric auto *Quieted = 959e8d8bef9SDimitry Andric ConstantFP::get(Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven)); 960e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Quieted); 961e8d8bef9SDimitry Andric } 962e8d8bef9SDimitry Andric 963e8d8bef9SDimitry Andric // ldexp(x, 0) -> x 964e8d8bef9SDimitry Andric // ldexp(x, undef) -> x 965e8d8bef9SDimitry Andric if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) { 966e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Op0); 967e8d8bef9SDimitry Andric } 968e8d8bef9SDimitry Andric 969e8d8bef9SDimitry Andric break; 970e8d8bef9SDimitry Andric } 971e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fmul_legacy: { 972e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 973e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 974e8d8bef9SDimitry Andric 975e8d8bef9SDimitry Andric // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or 976e8d8bef9SDimitry Andric // infinity, gives +0.0. 977e8d8bef9SDimitry Andric // TODO: Move to InstSimplify? 978e8d8bef9SDimitry Andric if (match(Op0, PatternMatch::m_AnyZeroFP()) || 979e8d8bef9SDimitry Andric match(Op1, PatternMatch::m_AnyZeroFP())) 980e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType())); 981e8d8bef9SDimitry Andric 982e8d8bef9SDimitry Andric // If we can prove we don't have one of the special cases then we can use a 983e8d8bef9SDimitry Andric // normal fmul instruction instead. 984e8d8bef9SDimitry Andric if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) { 985e8d8bef9SDimitry Andric auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II); 986e8d8bef9SDimitry Andric FMul->takeName(&II); 987e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, FMul); 988e8d8bef9SDimitry Andric } 989e8d8bef9SDimitry Andric break; 990e8d8bef9SDimitry Andric } 991e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fma_legacy: { 992e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 993e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 994e8d8bef9SDimitry Andric Value *Op2 = II.getArgOperand(2); 995e8d8bef9SDimitry Andric 996e8d8bef9SDimitry Andric // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or 997e8d8bef9SDimitry Andric // infinity, gives +0.0. 998e8d8bef9SDimitry Andric // TODO: Move to InstSimplify? 999e8d8bef9SDimitry Andric if (match(Op0, PatternMatch::m_AnyZeroFP()) || 1000e8d8bef9SDimitry Andric match(Op1, PatternMatch::m_AnyZeroFP())) { 1001e8d8bef9SDimitry Andric // It's tempting to just return Op2 here, but that would give the wrong 1002e8d8bef9SDimitry Andric // result if Op2 was -0.0. 1003e8d8bef9SDimitry Andric auto *Zero = ConstantFP::getNullValue(II.getType()); 1004e8d8bef9SDimitry Andric auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II); 1005e8d8bef9SDimitry Andric FAdd->takeName(&II); 1006e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, FAdd); 1007e8d8bef9SDimitry Andric } 1008e8d8bef9SDimitry Andric 1009e8d8bef9SDimitry Andric // If we can prove we don't have one of the special cases then we can use a 1010e8d8bef9SDimitry Andric // normal fma instead. 1011e8d8bef9SDimitry Andric if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) { 1012e8d8bef9SDimitry Andric II.setCalledOperand(Intrinsic::getDeclaration( 1013e8d8bef9SDimitry Andric II.getModule(), Intrinsic::fma, II.getType())); 1014e8d8bef9SDimitry Andric return &II; 1015e8d8bef9SDimitry Andric } 1016e8d8bef9SDimitry Andric break; 1017e8d8bef9SDimitry Andric } 10180eae32dcSDimitry Andric case Intrinsic::amdgcn_is_shared: 10190eae32dcSDimitry Andric case Intrinsic::amdgcn_is_private: { 10200eae32dcSDimitry Andric if (isa<UndefValue>(II.getArgOperand(0))) 10210eae32dcSDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 10220eae32dcSDimitry Andric 10230eae32dcSDimitry Andric if (isa<ConstantPointerNull>(II.getArgOperand(0))) 10240eae32dcSDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType())); 10250eae32dcSDimitry Andric break; 10260eae32dcSDimitry Andric } 1027e8d8bef9SDimitry Andric default: { 1028e8d8bef9SDimitry Andric if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = 1029e8d8bef9SDimitry Andric AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) { 1030e8d8bef9SDimitry Andric return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC); 1031e8d8bef9SDimitry Andric } 1032e8d8bef9SDimitry Andric } 1033e8d8bef9SDimitry Andric } 1034e8d8bef9SDimitry Andric return None; 1035e8d8bef9SDimitry Andric } 1036e8d8bef9SDimitry Andric 1037e8d8bef9SDimitry Andric /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics. 1038e8d8bef9SDimitry Andric /// 1039e8d8bef9SDimitry Andric /// Note: This only supports non-TFE/LWE image intrinsic calls; those have 1040e8d8bef9SDimitry Andric /// struct returns. 1041e8d8bef9SDimitry Andric static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, 1042e8d8bef9SDimitry Andric IntrinsicInst &II, 1043e8d8bef9SDimitry Andric APInt DemandedElts, 1044e8d8bef9SDimitry Andric int DMaskIdx = -1) { 1045e8d8bef9SDimitry Andric 1046e8d8bef9SDimitry Andric auto *IIVTy = cast<FixedVectorType>(II.getType()); 1047e8d8bef9SDimitry Andric unsigned VWidth = IIVTy->getNumElements(); 1048e8d8bef9SDimitry Andric if (VWidth == 1) 1049e8d8bef9SDimitry Andric return nullptr; 1050e8d8bef9SDimitry Andric 1051e8d8bef9SDimitry Andric IRBuilderBase::InsertPointGuard Guard(IC.Builder); 1052e8d8bef9SDimitry Andric IC.Builder.SetInsertPoint(&II); 1053e8d8bef9SDimitry Andric 1054e8d8bef9SDimitry Andric // Assume the arguments are unchanged and later override them, if needed. 1055e8d8bef9SDimitry Andric SmallVector<Value *, 16> Args(II.args()); 1056e8d8bef9SDimitry Andric 1057e8d8bef9SDimitry Andric if (DMaskIdx < 0) { 1058e8d8bef9SDimitry Andric // Buffer case. 1059e8d8bef9SDimitry Andric 1060e8d8bef9SDimitry Andric const unsigned ActiveBits = DemandedElts.getActiveBits(); 1061e8d8bef9SDimitry Andric const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros(); 1062e8d8bef9SDimitry Andric 1063e8d8bef9SDimitry Andric // Start assuming the prefix of elements is demanded, but possibly clear 1064e8d8bef9SDimitry Andric // some other bits if there are trailing zeros (unused components at front) 1065e8d8bef9SDimitry Andric // and update offset. 1066e8d8bef9SDimitry Andric DemandedElts = (1 << ActiveBits) - 1; 1067e8d8bef9SDimitry Andric 1068e8d8bef9SDimitry Andric if (UnusedComponentsAtFront > 0) { 1069e8d8bef9SDimitry Andric static const unsigned InvalidOffsetIdx = 0xf; 1070e8d8bef9SDimitry Andric 1071e8d8bef9SDimitry Andric unsigned OffsetIdx; 1072e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 1073e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_buffer_load: 1074e8d8bef9SDimitry Andric OffsetIdx = 1; 1075e8d8bef9SDimitry Andric break; 1076e8d8bef9SDimitry Andric case Intrinsic::amdgcn_s_buffer_load: 1077e8d8bef9SDimitry Andric // If resulting type is vec3, there is no point in trimming the 1078e8d8bef9SDimitry Andric // load with updated offset, as the vec3 would most likely be widened to 1079e8d8bef9SDimitry Andric // vec4 anyway during lowering. 1080e8d8bef9SDimitry Andric if (ActiveBits == 4 && UnusedComponentsAtFront == 1) 1081e8d8bef9SDimitry Andric OffsetIdx = InvalidOffsetIdx; 1082e8d8bef9SDimitry Andric else 1083e8d8bef9SDimitry Andric OffsetIdx = 1; 1084e8d8bef9SDimitry Andric break; 1085e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_buffer_load: 1086e8d8bef9SDimitry Andric OffsetIdx = 2; 1087e8d8bef9SDimitry Andric break; 1088e8d8bef9SDimitry Andric default: 1089e8d8bef9SDimitry Andric // TODO: handle tbuffer* intrinsics. 1090e8d8bef9SDimitry Andric OffsetIdx = InvalidOffsetIdx; 1091e8d8bef9SDimitry Andric break; 1092e8d8bef9SDimitry Andric } 1093e8d8bef9SDimitry Andric 1094e8d8bef9SDimitry Andric if (OffsetIdx != InvalidOffsetIdx) { 1095e8d8bef9SDimitry Andric // Clear demanded bits and update the offset. 1096e8d8bef9SDimitry Andric DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1); 1097e8d8bef9SDimitry Andric auto *Offset = II.getArgOperand(OffsetIdx); 1098e8d8bef9SDimitry Andric unsigned SingleComponentSizeInBits = 1099e8d8bef9SDimitry Andric IC.getDataLayout().getTypeSizeInBits(II.getType()->getScalarType()); 1100e8d8bef9SDimitry Andric unsigned OffsetAdd = 1101e8d8bef9SDimitry Andric UnusedComponentsAtFront * SingleComponentSizeInBits / 8; 1102e8d8bef9SDimitry Andric auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd); 1103e8d8bef9SDimitry Andric Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal); 1104e8d8bef9SDimitry Andric } 1105e8d8bef9SDimitry Andric } 1106e8d8bef9SDimitry Andric } else { 1107e8d8bef9SDimitry Andric // Image case. 1108e8d8bef9SDimitry Andric 1109e8d8bef9SDimitry Andric ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx)); 1110e8d8bef9SDimitry Andric unsigned DMaskVal = DMask->getZExtValue() & 0xf; 1111e8d8bef9SDimitry Andric 1112e8d8bef9SDimitry Andric // Mask off values that are undefined because the dmask doesn't cover them 1113e8d8bef9SDimitry Andric DemandedElts &= (1 << countPopulation(DMaskVal)) - 1; 1114e8d8bef9SDimitry Andric 1115e8d8bef9SDimitry Andric unsigned NewDMaskVal = 0; 1116e8d8bef9SDimitry Andric unsigned OrigLoadIdx = 0; 1117e8d8bef9SDimitry Andric for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) { 1118e8d8bef9SDimitry Andric const unsigned Bit = 1 << SrcIdx; 1119e8d8bef9SDimitry Andric if (!!(DMaskVal & Bit)) { 1120e8d8bef9SDimitry Andric if (!!DemandedElts[OrigLoadIdx]) 1121e8d8bef9SDimitry Andric NewDMaskVal |= Bit; 1122e8d8bef9SDimitry Andric OrigLoadIdx++; 1123e8d8bef9SDimitry Andric } 1124e8d8bef9SDimitry Andric } 1125e8d8bef9SDimitry Andric 1126e8d8bef9SDimitry Andric if (DMaskVal != NewDMaskVal) 1127e8d8bef9SDimitry Andric Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal); 1128e8d8bef9SDimitry Andric } 1129e8d8bef9SDimitry Andric 1130e8d8bef9SDimitry Andric unsigned NewNumElts = DemandedElts.countPopulation(); 1131e8d8bef9SDimitry Andric if (!NewNumElts) 1132e8d8bef9SDimitry Andric return UndefValue::get(II.getType()); 1133e8d8bef9SDimitry Andric 1134e8d8bef9SDimitry Andric if (NewNumElts >= VWidth && DemandedElts.isMask()) { 1135e8d8bef9SDimitry Andric if (DMaskIdx >= 0) 1136e8d8bef9SDimitry Andric II.setArgOperand(DMaskIdx, Args[DMaskIdx]); 1137e8d8bef9SDimitry Andric return nullptr; 1138e8d8bef9SDimitry Andric } 1139e8d8bef9SDimitry Andric 1140e8d8bef9SDimitry Andric // Validate function argument and return types, extracting overloaded types 1141e8d8bef9SDimitry Andric // along the way. 1142e8d8bef9SDimitry Andric SmallVector<Type *, 6> OverloadTys; 1143e8d8bef9SDimitry Andric if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys)) 1144e8d8bef9SDimitry Andric return nullptr; 1145e8d8bef9SDimitry Andric 1146e8d8bef9SDimitry Andric Module *M = II.getParent()->getParent()->getParent(); 1147e8d8bef9SDimitry Andric Type *EltTy = IIVTy->getElementType(); 1148e8d8bef9SDimitry Andric Type *NewTy = 1149e8d8bef9SDimitry Andric (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts); 1150e8d8bef9SDimitry Andric 1151e8d8bef9SDimitry Andric OverloadTys[0] = NewTy; 1152e8d8bef9SDimitry Andric Function *NewIntrin = 1153e8d8bef9SDimitry Andric Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys); 1154e8d8bef9SDimitry Andric 1155e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args); 1156e8d8bef9SDimitry Andric NewCall->takeName(&II); 1157e8d8bef9SDimitry Andric NewCall->copyMetadata(II); 1158e8d8bef9SDimitry Andric 1159e8d8bef9SDimitry Andric if (NewNumElts == 1) { 1160e8d8bef9SDimitry Andric return IC.Builder.CreateInsertElement(UndefValue::get(II.getType()), 1161e8d8bef9SDimitry Andric NewCall, 1162e8d8bef9SDimitry Andric DemandedElts.countTrailingZeros()); 1163e8d8bef9SDimitry Andric } 1164e8d8bef9SDimitry Andric 1165e8d8bef9SDimitry Andric SmallVector<int, 8> EltMask; 1166e8d8bef9SDimitry Andric unsigned NewLoadIdx = 0; 1167e8d8bef9SDimitry Andric for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) { 1168e8d8bef9SDimitry Andric if (!!DemandedElts[OrigLoadIdx]) 1169e8d8bef9SDimitry Andric EltMask.push_back(NewLoadIdx++); 1170e8d8bef9SDimitry Andric else 1171e8d8bef9SDimitry Andric EltMask.push_back(NewNumElts); 1172e8d8bef9SDimitry Andric } 1173e8d8bef9SDimitry Andric 1174e8d8bef9SDimitry Andric Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask); 1175e8d8bef9SDimitry Andric 1176e8d8bef9SDimitry Andric return Shuffle; 1177e8d8bef9SDimitry Andric } 1178e8d8bef9SDimitry Andric 1179e8d8bef9SDimitry Andric Optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic( 1180e8d8bef9SDimitry Andric InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 1181e8d8bef9SDimitry Andric APInt &UndefElts2, APInt &UndefElts3, 1182e8d8bef9SDimitry Andric std::function<void(Instruction *, unsigned, APInt, APInt &)> 1183e8d8bef9SDimitry Andric SimplifyAndSetOp) const { 1184e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 1185e8d8bef9SDimitry Andric case Intrinsic::amdgcn_buffer_load: 1186e8d8bef9SDimitry Andric case Intrinsic::amdgcn_buffer_load_format: 1187e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_buffer_load: 1188e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_buffer_load_format: 1189e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_tbuffer_load: 1190e8d8bef9SDimitry Andric case Intrinsic::amdgcn_s_buffer_load: 1191e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_buffer_load: 1192e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_buffer_load_format: 1193e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_tbuffer_load: 1194e8d8bef9SDimitry Andric case Intrinsic::amdgcn_tbuffer_load: 1195e8d8bef9SDimitry Andric return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts); 1196e8d8bef9SDimitry Andric default: { 1197e8d8bef9SDimitry Andric if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) { 1198e8d8bef9SDimitry Andric return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0); 1199e8d8bef9SDimitry Andric } 1200e8d8bef9SDimitry Andric break; 1201e8d8bef9SDimitry Andric } 1202e8d8bef9SDimitry Andric } 1203e8d8bef9SDimitry Andric return None; 1204e8d8bef9SDimitry Andric } 1205