1e8d8bef9SDimitry Andric //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric // \file 10e8d8bef9SDimitry Andric // This file implements a TargetTransformInfo analysis pass specific to the 11e8d8bef9SDimitry Andric // AMDGPU target machine. It uses the target's detailed information to provide 12e8d8bef9SDimitry Andric // more precise answers to certain TTI queries, while letting the target 13e8d8bef9SDimitry Andric // independent and default TTI implementations handle the rest. 14e8d8bef9SDimitry Andric // 15e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 16e8d8bef9SDimitry Andric 17e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h" 18e8d8bef9SDimitry Andric #include "AMDGPUTargetTransformInfo.h" 19e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 20bdd1243dSDimitry Andric #include "llvm/ADT/FloatingPointMode.h" 21e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 22e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h" 23bdd1243dSDimitry Andric #include <optional> 24e8d8bef9SDimitry Andric 25e8d8bef9SDimitry Andric using namespace llvm; 2606c3fb27SDimitry Andric using namespace llvm::PatternMatch; 27e8d8bef9SDimitry Andric 28e8d8bef9SDimitry Andric #define DEBUG_TYPE "AMDGPUtti" 29e8d8bef9SDimitry Andric 30e8d8bef9SDimitry Andric namespace { 31e8d8bef9SDimitry Andric 32e8d8bef9SDimitry Andric struct AMDGPUImageDMaskIntrinsic { 33e8d8bef9SDimitry Andric unsigned Intr; 34e8d8bef9SDimitry Andric }; 35e8d8bef9SDimitry Andric 36e8d8bef9SDimitry Andric #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL 37e8d8bef9SDimitry Andric #include "InstCombineTables.inc" 38e8d8bef9SDimitry Andric 39e8d8bef9SDimitry Andric } // end anonymous namespace 40e8d8bef9SDimitry Andric 41e8d8bef9SDimitry Andric // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs. 42e8d8bef9SDimitry Andric // 43e8d8bef9SDimitry Andric // A single NaN input is folded to minnum, so we rely on that folding for 44e8d8bef9SDimitry Andric // handling NaNs. 45e8d8bef9SDimitry Andric static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, 46e8d8bef9SDimitry Andric const APFloat &Src2) { 47e8d8bef9SDimitry Andric APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2); 48e8d8bef9SDimitry Andric 49e8d8bef9SDimitry Andric APFloat::cmpResult Cmp0 = Max3.compare(Src0); 50e8d8bef9SDimitry Andric assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately"); 51e8d8bef9SDimitry Andric if (Cmp0 == APFloat::cmpEqual) 52e8d8bef9SDimitry Andric return maxnum(Src1, Src2); 53e8d8bef9SDimitry Andric 54e8d8bef9SDimitry Andric APFloat::cmpResult Cmp1 = Max3.compare(Src1); 55e8d8bef9SDimitry Andric assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately"); 56e8d8bef9SDimitry Andric if (Cmp1 == APFloat::cmpEqual) 57e8d8bef9SDimitry Andric return maxnum(Src0, Src2); 58e8d8bef9SDimitry Andric 59e8d8bef9SDimitry Andric return maxnum(Src0, Src1); 60e8d8bef9SDimitry Andric } 61e8d8bef9SDimitry Andric 62e8d8bef9SDimitry Andric // Check if a value can be converted to a 16-bit value without losing 63e8d8bef9SDimitry Andric // precision. 6404eeddc0SDimitry Andric // The value is expected to be either a float (IsFloat = true) or an unsigned 6504eeddc0SDimitry Andric // integer (IsFloat = false). 6604eeddc0SDimitry Andric static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) { 67e8d8bef9SDimitry Andric Type *VTy = V.getType(); 68e8d8bef9SDimitry Andric if (VTy->isHalfTy() || VTy->isIntegerTy(16)) { 69e8d8bef9SDimitry Andric // The value is already 16-bit, so we don't want to convert to 16-bit again! 70e8d8bef9SDimitry Andric return false; 71e8d8bef9SDimitry Andric } 7204eeddc0SDimitry Andric if (IsFloat) { 73e8d8bef9SDimitry Andric if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) { 7404eeddc0SDimitry Andric // We need to check that if we cast the index down to a half, we do not 7504eeddc0SDimitry Andric // lose precision. 76e8d8bef9SDimitry Andric APFloat FloatValue(ConstFloat->getValueAPF()); 77e8d8bef9SDimitry Andric bool LosesInfo = true; 7804eeddc0SDimitry Andric FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, 7904eeddc0SDimitry Andric &LosesInfo); 80e8d8bef9SDimitry Andric return !LosesInfo; 81e8d8bef9SDimitry Andric } 8204eeddc0SDimitry Andric } else { 8304eeddc0SDimitry Andric if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) { 8404eeddc0SDimitry Andric // We need to check that if we cast the index down to an i16, we do not 8504eeddc0SDimitry Andric // lose precision. 8604eeddc0SDimitry Andric APInt IntValue(ConstInt->getValue()); 8704eeddc0SDimitry Andric return IntValue.getActiveBits() <= 16; 8804eeddc0SDimitry Andric } 8904eeddc0SDimitry Andric } 9004eeddc0SDimitry Andric 91e8d8bef9SDimitry Andric Value *CastSrc; 9204eeddc0SDimitry Andric bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc))) 9304eeddc0SDimitry Andric : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc))); 9404eeddc0SDimitry Andric if (IsExt) { 95e8d8bef9SDimitry Andric Type *CastSrcTy = CastSrc->getType(); 96e8d8bef9SDimitry Andric if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16)) 97e8d8bef9SDimitry Andric return true; 98e8d8bef9SDimitry Andric } 99e8d8bef9SDimitry Andric 100e8d8bef9SDimitry Andric return false; 101e8d8bef9SDimitry Andric } 102e8d8bef9SDimitry Andric 103e8d8bef9SDimitry Andric // Convert a value to 16-bit. 104e8d8bef9SDimitry Andric static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) { 105e8d8bef9SDimitry Andric Type *VTy = V.getType(); 106e8d8bef9SDimitry Andric if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V)) 107e8d8bef9SDimitry Andric return cast<Instruction>(&V)->getOperand(0); 108e8d8bef9SDimitry Andric if (VTy->isIntegerTy()) 109e8d8bef9SDimitry Andric return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false); 110e8d8bef9SDimitry Andric if (VTy->isFloatingPointTy()) 111e8d8bef9SDimitry Andric return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext())); 112e8d8bef9SDimitry Andric 113e8d8bef9SDimitry Andric llvm_unreachable("Should never be called!"); 114e8d8bef9SDimitry Andric } 115e8d8bef9SDimitry Andric 11681ad6265SDimitry Andric /// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with 11781ad6265SDimitry Andric /// modified arguments (based on OldIntr) and replaces InstToReplace with 11881ad6265SDimitry Andric /// this newly created intrinsic call. 119bdd1243dSDimitry Andric static std::optional<Instruction *> modifyIntrinsicCall( 12081ad6265SDimitry Andric IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, 12181ad6265SDimitry Andric InstCombiner &IC, 12204eeddc0SDimitry Andric std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)> 12304eeddc0SDimitry Andric Func) { 12404eeddc0SDimitry Andric SmallVector<Type *, 4> ArgTys; 12581ad6265SDimitry Andric if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys)) 126bdd1243dSDimitry Andric return std::nullopt; 12704eeddc0SDimitry Andric 12881ad6265SDimitry Andric SmallVector<Value *, 8> Args(OldIntr.args()); 12904eeddc0SDimitry Andric 13004eeddc0SDimitry Andric // Modify arguments and types 13104eeddc0SDimitry Andric Func(Args, ArgTys); 13204eeddc0SDimitry Andric 13381ad6265SDimitry Andric Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys); 13404eeddc0SDimitry Andric 13504eeddc0SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(I, Args); 13681ad6265SDimitry Andric NewCall->takeName(&OldIntr); 13781ad6265SDimitry Andric NewCall->copyMetadata(OldIntr); 13804eeddc0SDimitry Andric if (isa<FPMathOperator>(NewCall)) 13981ad6265SDimitry Andric NewCall->copyFastMathFlags(&OldIntr); 14004eeddc0SDimitry Andric 14104eeddc0SDimitry Andric // Erase and replace uses 14281ad6265SDimitry Andric if (!InstToReplace.getType()->isVoidTy()) 14381ad6265SDimitry Andric IC.replaceInstUsesWith(InstToReplace, NewCall); 14481ad6265SDimitry Andric 14581ad6265SDimitry Andric bool RemoveOldIntr = &OldIntr != &InstToReplace; 14681ad6265SDimitry Andric 14781ad6265SDimitry Andric auto RetValue = IC.eraseInstFromFunction(InstToReplace); 14881ad6265SDimitry Andric if (RemoveOldIntr) 14981ad6265SDimitry Andric IC.eraseInstFromFunction(OldIntr); 15081ad6265SDimitry Andric 15181ad6265SDimitry Andric return RetValue; 15204eeddc0SDimitry Andric } 15304eeddc0SDimitry Andric 154bdd1243dSDimitry Andric static std::optional<Instruction *> 155e8d8bef9SDimitry Andric simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, 156e8d8bef9SDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, 157e8d8bef9SDimitry Andric IntrinsicInst &II, InstCombiner &IC) { 15804eeddc0SDimitry Andric // Optimize _L to _LZ when _L is zero 15904eeddc0SDimitry Andric if (const auto *LZMappingInfo = 16004eeddc0SDimitry Andric AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) { 16104eeddc0SDimitry Andric if (auto *ConstantLod = 16204eeddc0SDimitry Andric dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) { 16304eeddc0SDimitry Andric if (ConstantLod->isZero() || ConstantLod->isNegative()) { 16404eeddc0SDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = 16504eeddc0SDimitry Andric AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ, 16604eeddc0SDimitry Andric ImageDimIntr->Dim); 16704eeddc0SDimitry Andric return modifyIntrinsicCall( 16881ad6265SDimitry Andric II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { 16904eeddc0SDimitry Andric Args.erase(Args.begin() + ImageDimIntr->LodIndex); 17004eeddc0SDimitry Andric }); 17104eeddc0SDimitry Andric } 17204eeddc0SDimitry Andric } 17304eeddc0SDimitry Andric } 17404eeddc0SDimitry Andric 17504eeddc0SDimitry Andric // Optimize _mip away, when 'lod' is zero 17604eeddc0SDimitry Andric if (const auto *MIPMappingInfo = 17704eeddc0SDimitry Andric AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) { 17804eeddc0SDimitry Andric if (auto *ConstantMip = 17904eeddc0SDimitry Andric dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) { 18004eeddc0SDimitry Andric if (ConstantMip->isZero()) { 18104eeddc0SDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = 18204eeddc0SDimitry Andric AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP, 18304eeddc0SDimitry Andric ImageDimIntr->Dim); 18404eeddc0SDimitry Andric return modifyIntrinsicCall( 18581ad6265SDimitry Andric II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { 18604eeddc0SDimitry Andric Args.erase(Args.begin() + ImageDimIntr->MipIndex); 18704eeddc0SDimitry Andric }); 18804eeddc0SDimitry Andric } 18904eeddc0SDimitry Andric } 19004eeddc0SDimitry Andric } 19104eeddc0SDimitry Andric 19204eeddc0SDimitry Andric // Optimize _bias away when 'bias' is zero 19304eeddc0SDimitry Andric if (const auto *BiasMappingInfo = 19404eeddc0SDimitry Andric AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) { 19504eeddc0SDimitry Andric if (auto *ConstantBias = 19604eeddc0SDimitry Andric dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) { 19704eeddc0SDimitry Andric if (ConstantBias->isZero()) { 19804eeddc0SDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = 19904eeddc0SDimitry Andric AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias, 20004eeddc0SDimitry Andric ImageDimIntr->Dim); 20104eeddc0SDimitry Andric return modifyIntrinsicCall( 20281ad6265SDimitry Andric II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { 20304eeddc0SDimitry Andric Args.erase(Args.begin() + ImageDimIntr->BiasIndex); 20404eeddc0SDimitry Andric ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg); 20504eeddc0SDimitry Andric }); 20604eeddc0SDimitry Andric } 20704eeddc0SDimitry Andric } 20804eeddc0SDimitry Andric } 20904eeddc0SDimitry Andric 21004eeddc0SDimitry Andric // Optimize _offset away when 'offset' is zero 21104eeddc0SDimitry Andric if (const auto *OffsetMappingInfo = 21204eeddc0SDimitry Andric AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) { 21304eeddc0SDimitry Andric if (auto *ConstantOffset = 21404eeddc0SDimitry Andric dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) { 21504eeddc0SDimitry Andric if (ConstantOffset->isZero()) { 21604eeddc0SDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = 21704eeddc0SDimitry Andric AMDGPU::getImageDimIntrinsicByBaseOpcode( 21804eeddc0SDimitry Andric OffsetMappingInfo->NoOffset, ImageDimIntr->Dim); 21904eeddc0SDimitry Andric return modifyIntrinsicCall( 22081ad6265SDimitry Andric II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { 22104eeddc0SDimitry Andric Args.erase(Args.begin() + ImageDimIntr->OffsetIndex); 22204eeddc0SDimitry Andric }); 22304eeddc0SDimitry Andric } 22404eeddc0SDimitry Andric } 22504eeddc0SDimitry Andric } 22604eeddc0SDimitry Andric 22781ad6265SDimitry Andric // Try to use D16 22881ad6265SDimitry Andric if (ST->hasD16Images()) { 22981ad6265SDimitry Andric 23081ad6265SDimitry Andric const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 23181ad6265SDimitry Andric AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode); 23281ad6265SDimitry Andric 23381ad6265SDimitry Andric if (BaseOpcode->HasD16) { 23481ad6265SDimitry Andric 23581ad6265SDimitry Andric // If the only use of image intrinsic is a fptrunc (with conversion to 23681ad6265SDimitry Andric // half) then both fptrunc and image intrinsic will be replaced with image 23781ad6265SDimitry Andric // intrinsic with D16 flag. 23881ad6265SDimitry Andric if (II.hasOneUse()) { 23981ad6265SDimitry Andric Instruction *User = II.user_back(); 24081ad6265SDimitry Andric 24181ad6265SDimitry Andric if (User->getOpcode() == Instruction::FPTrunc && 24281ad6265SDimitry Andric User->getType()->getScalarType()->isHalfTy()) { 24381ad6265SDimitry Andric 24481ad6265SDimitry Andric return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC, 24581ad6265SDimitry Andric [&](auto &Args, auto &ArgTys) { 24681ad6265SDimitry Andric // Change return type of image intrinsic. 24781ad6265SDimitry Andric // Set it to return type of fptrunc. 24881ad6265SDimitry Andric ArgTys[0] = User->getType(); 24981ad6265SDimitry Andric }); 25081ad6265SDimitry Andric } 25181ad6265SDimitry Andric } 25281ad6265SDimitry Andric } 25381ad6265SDimitry Andric } 25481ad6265SDimitry Andric 25504eeddc0SDimitry Andric // Try to use A16 or G16 256e8d8bef9SDimitry Andric if (!ST->hasA16() && !ST->hasG16()) 257bdd1243dSDimitry Andric return std::nullopt; 258e8d8bef9SDimitry Andric 25904eeddc0SDimitry Andric // Address is interpreted as float if the instruction has a sampler or as 26004eeddc0SDimitry Andric // unsigned int if there is no sampler. 26104eeddc0SDimitry Andric bool HasSampler = 26204eeddc0SDimitry Andric AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler; 263e8d8bef9SDimitry Andric bool FloatCoord = false; 264e8d8bef9SDimitry Andric // true means derivatives can be converted to 16 bit, coordinates not 265e8d8bef9SDimitry Andric bool OnlyDerivatives = false; 266e8d8bef9SDimitry Andric 267e8d8bef9SDimitry Andric for (unsigned OperandIndex = ImageDimIntr->GradientStart; 268e8d8bef9SDimitry Andric OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) { 269e8d8bef9SDimitry Andric Value *Coord = II.getOperand(OperandIndex); 270e8d8bef9SDimitry Andric // If the values are not derived from 16-bit values, we cannot optimize. 27104eeddc0SDimitry Andric if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) { 272e8d8bef9SDimitry Andric if (OperandIndex < ImageDimIntr->CoordStart || 273e8d8bef9SDimitry Andric ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) { 274bdd1243dSDimitry Andric return std::nullopt; 275e8d8bef9SDimitry Andric } 276e8d8bef9SDimitry Andric // All gradients can be converted, so convert only them 277e8d8bef9SDimitry Andric OnlyDerivatives = true; 278e8d8bef9SDimitry Andric break; 279e8d8bef9SDimitry Andric } 280e8d8bef9SDimitry Andric 281e8d8bef9SDimitry Andric assert(OperandIndex == ImageDimIntr->GradientStart || 282e8d8bef9SDimitry Andric FloatCoord == Coord->getType()->isFloatingPointTy()); 283e8d8bef9SDimitry Andric FloatCoord = Coord->getType()->isFloatingPointTy(); 284e8d8bef9SDimitry Andric } 285e8d8bef9SDimitry Andric 28604eeddc0SDimitry Andric if (!OnlyDerivatives && !ST->hasA16()) 287e8d8bef9SDimitry Andric OnlyDerivatives = true; // Only supports G16 28804eeddc0SDimitry Andric 28904eeddc0SDimitry Andric // Check if there is a bias parameter and if it can be converted to f16 29004eeddc0SDimitry Andric if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { 29104eeddc0SDimitry Andric Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); 29204eeddc0SDimitry Andric assert(HasSampler && 29304eeddc0SDimitry Andric "Only image instructions with a sampler can have a bias"); 29404eeddc0SDimitry Andric if (!canSafelyConvertTo16Bit(*Bias, HasSampler)) 29504eeddc0SDimitry Andric OnlyDerivatives = true; 296e8d8bef9SDimitry Andric } 297e8d8bef9SDimitry Andric 29804eeddc0SDimitry Andric if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart == 29904eeddc0SDimitry Andric ImageDimIntr->CoordStart)) 300bdd1243dSDimitry Andric return std::nullopt; 30104eeddc0SDimitry Andric 302e8d8bef9SDimitry Andric Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext()) 303e8d8bef9SDimitry Andric : Type::getInt16Ty(II.getContext()); 304e8d8bef9SDimitry Andric 30504eeddc0SDimitry Andric return modifyIntrinsicCall( 30681ad6265SDimitry Andric II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) { 307e8d8bef9SDimitry Andric ArgTys[ImageDimIntr->GradientTyArg] = CoordType; 30804eeddc0SDimitry Andric if (!OnlyDerivatives) { 309e8d8bef9SDimitry Andric ArgTys[ImageDimIntr->CoordTyArg] = CoordType; 310e8d8bef9SDimitry Andric 31104eeddc0SDimitry Andric // Change the bias type 31204eeddc0SDimitry Andric if (ImageDimIntr->NumBiasArgs != 0) 31304eeddc0SDimitry Andric ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext()); 31404eeddc0SDimitry Andric } 315e8d8bef9SDimitry Andric 316e8d8bef9SDimitry Andric unsigned EndIndex = 317e8d8bef9SDimitry Andric OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd; 318e8d8bef9SDimitry Andric for (unsigned OperandIndex = ImageDimIntr->GradientStart; 319e8d8bef9SDimitry Andric OperandIndex < EndIndex; OperandIndex++) { 320e8d8bef9SDimitry Andric Args[OperandIndex] = 321e8d8bef9SDimitry Andric convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder); 322e8d8bef9SDimitry Andric } 323e8d8bef9SDimitry Andric 32404eeddc0SDimitry Andric // Convert the bias 32504eeddc0SDimitry Andric if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { 32604eeddc0SDimitry Andric Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); 32704eeddc0SDimitry Andric Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder); 32804eeddc0SDimitry Andric } 32904eeddc0SDimitry Andric }); 330e8d8bef9SDimitry Andric } 331e8d8bef9SDimitry Andric 33206c3fb27SDimitry Andric bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Instruction &I, 33306c3fb27SDimitry Andric const Value *Op0, const Value *Op1, 334e8d8bef9SDimitry Andric InstCombiner &IC) const { 335e8d8bef9SDimitry Andric // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or 336e8d8bef9SDimitry Andric // infinity, gives +0.0. If we can prove we don't have one of the special 337e8d8bef9SDimitry Andric // cases then we can use a normal multiply instead. 338e8d8bef9SDimitry Andric // TODO: Create and use isKnownFiniteNonZero instead of just matching 339e8d8bef9SDimitry Andric // constants here. 340e8d8bef9SDimitry Andric if (match(Op0, PatternMatch::m_FiniteNonZero()) || 341e8d8bef9SDimitry Andric match(Op1, PatternMatch::m_FiniteNonZero())) { 342e8d8bef9SDimitry Andric // One operand is not zero or infinity or NaN. 343e8d8bef9SDimitry Andric return true; 344e8d8bef9SDimitry Andric } 34506c3fb27SDimitry Andric 346*0fca6ea1SDimitry Andric SimplifyQuery SQ = IC.getSimplifyQuery().getWithInstruction(&I); 347*0fca6ea1SDimitry Andric if (isKnownNeverInfOrNaN(Op0, /*Depth=*/0, SQ) && 348*0fca6ea1SDimitry Andric isKnownNeverInfOrNaN(Op1, /*Depth=*/0, SQ)) { 349e8d8bef9SDimitry Andric // Neither operand is infinity or NaN. 350e8d8bef9SDimitry Andric return true; 351e8d8bef9SDimitry Andric } 352e8d8bef9SDimitry Andric return false; 353e8d8bef9SDimitry Andric } 354e8d8bef9SDimitry Andric 35506c3fb27SDimitry Andric /// Match an fpext from half to float, or a constant we can convert. 35606c3fb27SDimitry Andric static bool matchFPExtFromF16(Value *Arg, Value *&FPExtSrc) { 35706c3fb27SDimitry Andric if (match(Arg, m_OneUse(m_FPExt(m_Value(FPExtSrc))))) 35806c3fb27SDimitry Andric return FPExtSrc->getType()->isHalfTy(); 35906c3fb27SDimitry Andric 36006c3fb27SDimitry Andric ConstantFP *CFP; 36106c3fb27SDimitry Andric if (match(Arg, m_ConstantFP(CFP))) { 36206c3fb27SDimitry Andric bool LosesInfo; 36306c3fb27SDimitry Andric APFloat Val(CFP->getValueAPF()); 36406c3fb27SDimitry Andric Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &LosesInfo); 36506c3fb27SDimitry Andric if (LosesInfo) 36606c3fb27SDimitry Andric return false; 36706c3fb27SDimitry Andric 36806c3fb27SDimitry Andric FPExtSrc = ConstantFP::get(Type::getHalfTy(Arg->getContext()), Val); 36906c3fb27SDimitry Andric return true; 37006c3fb27SDimitry Andric } 37106c3fb27SDimitry Andric 37206c3fb27SDimitry Andric return false; 37306c3fb27SDimitry Andric } 37406c3fb27SDimitry Andric 37506c3fb27SDimitry Andric // Trim all zero components from the end of the vector \p UseV and return 37606c3fb27SDimitry Andric // an appropriate bitset with known elements. 37706c3fb27SDimitry Andric static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, 37806c3fb27SDimitry Andric Instruction *I) { 37906c3fb27SDimitry Andric auto *VTy = cast<FixedVectorType>(UseV->getType()); 38006c3fb27SDimitry Andric unsigned VWidth = VTy->getNumElements(); 38106c3fb27SDimitry Andric APInt DemandedElts = APInt::getAllOnes(VWidth); 38206c3fb27SDimitry Andric 38306c3fb27SDimitry Andric for (int i = VWidth - 1; i > 0; --i) { 38406c3fb27SDimitry Andric auto *Elt = findScalarElement(UseV, i); 38506c3fb27SDimitry Andric if (!Elt) 38606c3fb27SDimitry Andric break; 38706c3fb27SDimitry Andric 38806c3fb27SDimitry Andric if (auto *ConstElt = dyn_cast<Constant>(Elt)) { 38906c3fb27SDimitry Andric if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt)) 39006c3fb27SDimitry Andric break; 39106c3fb27SDimitry Andric } else { 39206c3fb27SDimitry Andric break; 39306c3fb27SDimitry Andric } 39406c3fb27SDimitry Andric 39506c3fb27SDimitry Andric DemandedElts.clearBit(i); 39606c3fb27SDimitry Andric } 39706c3fb27SDimitry Andric 39806c3fb27SDimitry Andric return DemandedElts; 39906c3fb27SDimitry Andric } 40006c3fb27SDimitry Andric 4017a6dacacSDimitry Andric // Trim elements of the end of the vector \p V, if they are 4027a6dacacSDimitry Andric // equal to the first element of the vector. 4037a6dacacSDimitry Andric static APInt defaultComponentBroadcast(Value *V) { 4047a6dacacSDimitry Andric auto *VTy = cast<FixedVectorType>(V->getType()); 4057a6dacacSDimitry Andric unsigned VWidth = VTy->getNumElements(); 4067a6dacacSDimitry Andric APInt DemandedElts = APInt::getAllOnes(VWidth); 4077a6dacacSDimitry Andric Value *FirstComponent = findScalarElement(V, 0); 4087a6dacacSDimitry Andric 4097a6dacacSDimitry Andric SmallVector<int> ShuffleMask; 4107a6dacacSDimitry Andric if (auto *SVI = dyn_cast<ShuffleVectorInst>(V)) 4117a6dacacSDimitry Andric SVI->getShuffleMask(ShuffleMask); 4127a6dacacSDimitry Andric 4137a6dacacSDimitry Andric for (int I = VWidth - 1; I > 0; --I) { 4147a6dacacSDimitry Andric if (ShuffleMask.empty()) { 4157a6dacacSDimitry Andric auto *Elt = findScalarElement(V, I); 4167a6dacacSDimitry Andric if (!Elt || (Elt != FirstComponent && !isa<UndefValue>(Elt))) 4177a6dacacSDimitry Andric break; 4187a6dacacSDimitry Andric } else { 4197a6dacacSDimitry Andric // Detect identical elements in the shufflevector result, even though 4207a6dacacSDimitry Andric // findScalarElement cannot tell us what that element is. 4217a6dacacSDimitry Andric if (ShuffleMask[I] != ShuffleMask[0] && ShuffleMask[I] != PoisonMaskElem) 4227a6dacacSDimitry Andric break; 4237a6dacacSDimitry Andric } 4247a6dacacSDimitry Andric DemandedElts.clearBit(I); 4257a6dacacSDimitry Andric } 4267a6dacacSDimitry Andric 4277a6dacacSDimitry Andric return DemandedElts; 4287a6dacacSDimitry Andric } 4297a6dacacSDimitry Andric 43006c3fb27SDimitry Andric static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, 43106c3fb27SDimitry Andric IntrinsicInst &II, 43206c3fb27SDimitry Andric APInt DemandedElts, 43306c3fb27SDimitry Andric int DMaskIdx = -1, 43406c3fb27SDimitry Andric bool IsLoad = true); 43506c3fb27SDimitry Andric 4365f757f3fSDimitry Andric /// Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt) 4375f757f3fSDimitry Andric static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp) { 4385f757f3fSDimitry Andric return (SqrtOp->getType()->isFloatTy() && 4395f757f3fSDimitry Andric (SqrtOp->hasApproxFunc() || SqrtOp->getFPAccuracy() >= 1.0f)) || 4405f757f3fSDimitry Andric SqrtOp->getType()->isHalfTy(); 4415f757f3fSDimitry Andric } 4425f757f3fSDimitry Andric 443bdd1243dSDimitry Andric std::optional<Instruction *> 444e8d8bef9SDimitry Andric GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { 445e8d8bef9SDimitry Andric Intrinsic::ID IID = II.getIntrinsicID(); 446e8d8bef9SDimitry Andric switch (IID) { 447e8d8bef9SDimitry Andric case Intrinsic::amdgcn_rcp: { 448e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 449e8d8bef9SDimitry Andric 450e8d8bef9SDimitry Andric // TODO: Move to ConstantFolding/InstSimplify? 451e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 452e8d8bef9SDimitry Andric Type *Ty = II.getType(); 453e8d8bef9SDimitry Andric auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); 454e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, QNaN); 455e8d8bef9SDimitry Andric } 456e8d8bef9SDimitry Andric 457e8d8bef9SDimitry Andric if (II.isStrictFP()) 458e8d8bef9SDimitry Andric break; 459e8d8bef9SDimitry Andric 460e8d8bef9SDimitry Andric if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) { 461e8d8bef9SDimitry Andric const APFloat &ArgVal = C->getValueAPF(); 462e8d8bef9SDimitry Andric APFloat Val(ArgVal.getSemantics(), 1); 463e8d8bef9SDimitry Andric Val.divide(ArgVal, APFloat::rmNearestTiesToEven); 464e8d8bef9SDimitry Andric 465e8d8bef9SDimitry Andric // This is more precise than the instruction may give. 466e8d8bef9SDimitry Andric // 467e8d8bef9SDimitry Andric // TODO: The instruction always flushes denormal results (except for f16), 468e8d8bef9SDimitry Andric // should this also? 469e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val)); 470e8d8bef9SDimitry Andric } 471e8d8bef9SDimitry Andric 4725f757f3fSDimitry Andric FastMathFlags FMF = cast<FPMathOperator>(II).getFastMathFlags(); 4735f757f3fSDimitry Andric if (!FMF.allowContract()) 4745f757f3fSDimitry Andric break; 4755f757f3fSDimitry Andric auto *SrcCI = dyn_cast<IntrinsicInst>(Src); 4765f757f3fSDimitry Andric if (!SrcCI) 4775f757f3fSDimitry Andric break; 4785f757f3fSDimitry Andric 4795f757f3fSDimitry Andric auto IID = SrcCI->getIntrinsicID(); 4805f757f3fSDimitry Andric // llvm.amdgcn.rcp(llvm.amdgcn.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable 4815f757f3fSDimitry Andric // 4825f757f3fSDimitry Andric // llvm.amdgcn.rcp(llvm.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable and 4835f757f3fSDimitry Andric // relaxed. 4845f757f3fSDimitry Andric if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) { 4855f757f3fSDimitry Andric const FPMathOperator *SqrtOp = cast<FPMathOperator>(SrcCI); 4865f757f3fSDimitry Andric FastMathFlags InnerFMF = SqrtOp->getFastMathFlags(); 4875f757f3fSDimitry Andric if (!InnerFMF.allowContract() || !SrcCI->hasOneUse()) 4885f757f3fSDimitry Andric break; 4895f757f3fSDimitry Andric 4905f757f3fSDimitry Andric if (IID == Intrinsic::sqrt && !canContractSqrtToRsq(SqrtOp)) 4915f757f3fSDimitry Andric break; 4925f757f3fSDimitry Andric 4935f757f3fSDimitry Andric Function *NewDecl = Intrinsic::getDeclaration( 4945f757f3fSDimitry Andric SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()}); 4955f757f3fSDimitry Andric 4965f757f3fSDimitry Andric InnerFMF |= FMF; 4975f757f3fSDimitry Andric II.setFastMathFlags(InnerFMF); 4985f757f3fSDimitry Andric 4995f757f3fSDimitry Andric II.setCalledFunction(NewDecl); 5005f757f3fSDimitry Andric return IC.replaceOperand(II, 0, SrcCI->getArgOperand(0)); 5015f757f3fSDimitry Andric } 5025f757f3fSDimitry Andric 503e8d8bef9SDimitry Andric break; 504e8d8bef9SDimitry Andric } 505bdd1243dSDimitry Andric case Intrinsic::amdgcn_sqrt: 506e8d8bef9SDimitry Andric case Intrinsic::amdgcn_rsq: { 507e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 508e8d8bef9SDimitry Andric 509e8d8bef9SDimitry Andric // TODO: Move to ConstantFolding/InstSimplify? 510e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 511e8d8bef9SDimitry Andric Type *Ty = II.getType(); 512e8d8bef9SDimitry Andric auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); 513e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, QNaN); 514e8d8bef9SDimitry Andric } 515e8d8bef9SDimitry Andric 5165f757f3fSDimitry Andric // f16 amdgcn.sqrt is identical to regular sqrt. 5175f757f3fSDimitry Andric if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) { 5185f757f3fSDimitry Andric Function *NewDecl = Intrinsic::getDeclaration( 5195f757f3fSDimitry Andric II.getModule(), Intrinsic::sqrt, {II.getType()}); 5205f757f3fSDimitry Andric II.setCalledFunction(NewDecl); 5215f757f3fSDimitry Andric return &II; 5225f757f3fSDimitry Andric } 5235f757f3fSDimitry Andric 524e8d8bef9SDimitry Andric break; 525e8d8bef9SDimitry Andric } 52606c3fb27SDimitry Andric case Intrinsic::amdgcn_log: 52706c3fb27SDimitry Andric case Intrinsic::amdgcn_exp2: { 52806c3fb27SDimitry Andric const bool IsLog = IID == Intrinsic::amdgcn_log; 52906c3fb27SDimitry Andric const bool IsExp = IID == Intrinsic::amdgcn_exp2; 53006c3fb27SDimitry Andric Value *Src = II.getArgOperand(0); 53106c3fb27SDimitry Andric Type *Ty = II.getType(); 53206c3fb27SDimitry Andric 53306c3fb27SDimitry Andric if (isa<PoisonValue>(Src)) 53406c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, Src); 53506c3fb27SDimitry Andric 53606c3fb27SDimitry Andric if (IC.getSimplifyQuery().isUndefValue(Src)) 53706c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, ConstantFP::getNaN(Ty)); 53806c3fb27SDimitry Andric 53906c3fb27SDimitry Andric if (ConstantFP *C = dyn_cast<ConstantFP>(Src)) { 54006c3fb27SDimitry Andric if (C->isInfinity()) { 54106c3fb27SDimitry Andric // exp2(+inf) -> +inf 54206c3fb27SDimitry Andric // log2(+inf) -> +inf 54306c3fb27SDimitry Andric if (!C->isNegative()) 54406c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, C); 54506c3fb27SDimitry Andric 54606c3fb27SDimitry Andric // exp2(-inf) -> 0 54706c3fb27SDimitry Andric if (IsExp && C->isNegative()) 54806c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, ConstantFP::getZero(Ty)); 54906c3fb27SDimitry Andric } 55006c3fb27SDimitry Andric 55106c3fb27SDimitry Andric if (II.isStrictFP()) 55206c3fb27SDimitry Andric break; 55306c3fb27SDimitry Andric 55406c3fb27SDimitry Andric if (C->isNaN()) { 55506c3fb27SDimitry Andric Constant *Quieted = ConstantFP::get(Ty, C->getValue().makeQuiet()); 55606c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, Quieted); 55706c3fb27SDimitry Andric } 55806c3fb27SDimitry Andric 55906c3fb27SDimitry Andric // f32 instruction doesn't handle denormals, f16 does. 56006c3fb27SDimitry Andric if (C->isZero() || (C->getValue().isDenormal() && Ty->isFloatTy())) { 56106c3fb27SDimitry Andric Constant *FoldedValue = IsLog ? ConstantFP::getInfinity(Ty, true) 56206c3fb27SDimitry Andric : ConstantFP::get(Ty, 1.0); 56306c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, FoldedValue); 56406c3fb27SDimitry Andric } 56506c3fb27SDimitry Andric 56606c3fb27SDimitry Andric if (IsLog && C->isNegative()) 56706c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, ConstantFP::getNaN(Ty)); 56806c3fb27SDimitry Andric 56906c3fb27SDimitry Andric // TODO: Full constant folding matching hardware behavior. 57006c3fb27SDimitry Andric } 57106c3fb27SDimitry Andric 57206c3fb27SDimitry Andric break; 57306c3fb27SDimitry Andric } 574e8d8bef9SDimitry Andric case Intrinsic::amdgcn_frexp_mant: 575e8d8bef9SDimitry Andric case Intrinsic::amdgcn_frexp_exp: { 576e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 577e8d8bef9SDimitry Andric if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) { 578e8d8bef9SDimitry Andric int Exp; 579e8d8bef9SDimitry Andric APFloat Significand = 580e8d8bef9SDimitry Andric frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven); 581e8d8bef9SDimitry Andric 582e8d8bef9SDimitry Andric if (IID == Intrinsic::amdgcn_frexp_mant) { 583e8d8bef9SDimitry Andric return IC.replaceInstUsesWith( 584e8d8bef9SDimitry Andric II, ConstantFP::get(II.getContext(), Significand)); 585e8d8bef9SDimitry Andric } 586e8d8bef9SDimitry Andric 587e8d8bef9SDimitry Andric // Match instruction special case behavior. 588e8d8bef9SDimitry Andric if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf) 589e8d8bef9SDimitry Andric Exp = 0; 590e8d8bef9SDimitry Andric 591e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp)); 592e8d8bef9SDimitry Andric } 593e8d8bef9SDimitry Andric 594e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 595e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 596e8d8bef9SDimitry Andric } 597e8d8bef9SDimitry Andric 598e8d8bef9SDimitry Andric break; 599e8d8bef9SDimitry Andric } 600e8d8bef9SDimitry Andric case Intrinsic::amdgcn_class: { 601e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 602e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 603e8d8bef9SDimitry Andric const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1); 60406c3fb27SDimitry Andric if (CMask) { 60506c3fb27SDimitry Andric II.setCalledOperand(Intrinsic::getDeclaration( 60606c3fb27SDimitry Andric II.getModule(), Intrinsic::is_fpclass, Src0->getType())); 60706c3fb27SDimitry Andric 60806c3fb27SDimitry Andric // Clamp any excess bits, as they're illegal for the generic intrinsic. 60906c3fb27SDimitry Andric II.setArgOperand(1, ConstantInt::get(Src1->getType(), 61006c3fb27SDimitry Andric CMask->getZExtValue() & fcAllFlags)); 61106c3fb27SDimitry Andric return &II; 612e8d8bef9SDimitry Andric } 613e8d8bef9SDimitry Andric 61406c3fb27SDimitry Andric // Propagate poison. 61506c3fb27SDimitry Andric if (isa<PoisonValue>(Src0) || isa<PoisonValue>(Src1)) 61606c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType())); 617e8d8bef9SDimitry Andric 61806c3fb27SDimitry Andric // llvm.amdgcn.class(_, undef) -> false 61906c3fb27SDimitry Andric if (IC.getSimplifyQuery().isUndefValue(Src1)) 620e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false)); 62106c3fb27SDimitry Andric 62206c3fb27SDimitry Andric // llvm.amdgcn.class(undef, mask) -> mask != 0 62306c3fb27SDimitry Andric if (IC.getSimplifyQuery().isUndefValue(Src0)) { 62406c3fb27SDimitry Andric Value *CmpMask = IC.Builder.CreateICmpNE( 62506c3fb27SDimitry Andric Src1, ConstantInt::getNullValue(Src1->getType())); 62606c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, CmpMask); 627e8d8bef9SDimitry Andric } 628e8d8bef9SDimitry Andric break; 629e8d8bef9SDimitry Andric } 630e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pkrtz: { 631e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 632e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 633e8d8bef9SDimitry Andric if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) { 634e8d8bef9SDimitry Andric if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) { 635e8d8bef9SDimitry Andric const fltSemantics &HalfSem = 636e8d8bef9SDimitry Andric II.getType()->getScalarType()->getFltSemantics(); 637e8d8bef9SDimitry Andric bool LosesInfo; 638e8d8bef9SDimitry Andric APFloat Val0 = C0->getValueAPF(); 639e8d8bef9SDimitry Andric APFloat Val1 = C1->getValueAPF(); 640e8d8bef9SDimitry Andric Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); 641e8d8bef9SDimitry Andric Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); 642e8d8bef9SDimitry Andric 643e8d8bef9SDimitry Andric Constant *Folded = 644e8d8bef9SDimitry Andric ConstantVector::get({ConstantFP::get(II.getContext(), Val0), 645e8d8bef9SDimitry Andric ConstantFP::get(II.getContext(), Val1)}); 646e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Folded); 647e8d8bef9SDimitry Andric } 648e8d8bef9SDimitry Andric } 649e8d8bef9SDimitry Andric 650e8d8bef9SDimitry Andric if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) { 651e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 652e8d8bef9SDimitry Andric } 653e8d8bef9SDimitry Andric 654e8d8bef9SDimitry Andric break; 655e8d8bef9SDimitry Andric } 656e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pknorm_i16: 657e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pknorm_u16: 658e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pk_i16: 659e8d8bef9SDimitry Andric case Intrinsic::amdgcn_cvt_pk_u16: { 660e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 661e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 662e8d8bef9SDimitry Andric 663e8d8bef9SDimitry Andric if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) { 664e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 665e8d8bef9SDimitry Andric } 666e8d8bef9SDimitry Andric 667e8d8bef9SDimitry Andric break; 668e8d8bef9SDimitry Andric } 669e8d8bef9SDimitry Andric case Intrinsic::amdgcn_ubfe: 670e8d8bef9SDimitry Andric case Intrinsic::amdgcn_sbfe: { 671e8d8bef9SDimitry Andric // Decompose simple cases into standard shifts. 672e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 673e8d8bef9SDimitry Andric if (isa<UndefValue>(Src)) { 674e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 675e8d8bef9SDimitry Andric } 676e8d8bef9SDimitry Andric 677e8d8bef9SDimitry Andric unsigned Width; 678e8d8bef9SDimitry Andric Type *Ty = II.getType(); 679e8d8bef9SDimitry Andric unsigned IntSize = Ty->getIntegerBitWidth(); 680e8d8bef9SDimitry Andric 681e8d8bef9SDimitry Andric ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2)); 682e8d8bef9SDimitry Andric if (CWidth) { 683e8d8bef9SDimitry Andric Width = CWidth->getZExtValue(); 684e8d8bef9SDimitry Andric if ((Width & (IntSize - 1)) == 0) { 685e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(Ty)); 686e8d8bef9SDimitry Andric } 687e8d8bef9SDimitry Andric 688e8d8bef9SDimitry Andric // Hardware ignores high bits, so remove those. 689e8d8bef9SDimitry Andric if (Width >= IntSize) { 690e8d8bef9SDimitry Andric return IC.replaceOperand( 691e8d8bef9SDimitry Andric II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1))); 692e8d8bef9SDimitry Andric } 693e8d8bef9SDimitry Andric } 694e8d8bef9SDimitry Andric 695e8d8bef9SDimitry Andric unsigned Offset; 696e8d8bef9SDimitry Andric ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1)); 697e8d8bef9SDimitry Andric if (COffset) { 698e8d8bef9SDimitry Andric Offset = COffset->getZExtValue(); 699e8d8bef9SDimitry Andric if (Offset >= IntSize) { 700e8d8bef9SDimitry Andric return IC.replaceOperand( 701e8d8bef9SDimitry Andric II, 1, 702e8d8bef9SDimitry Andric ConstantInt::get(COffset->getType(), Offset & (IntSize - 1))); 703e8d8bef9SDimitry Andric } 704e8d8bef9SDimitry Andric } 705e8d8bef9SDimitry Andric 706e8d8bef9SDimitry Andric bool Signed = IID == Intrinsic::amdgcn_sbfe; 707e8d8bef9SDimitry Andric 708e8d8bef9SDimitry Andric if (!CWidth || !COffset) 709e8d8bef9SDimitry Andric break; 710e8d8bef9SDimitry Andric 711349cc55cSDimitry Andric // The case of Width == 0 is handled above, which makes this transformation 712e8d8bef9SDimitry Andric // safe. If Width == 0, then the ashr and lshr instructions become poison 713e8d8bef9SDimitry Andric // value since the shift amount would be equal to the bit size. 714e8d8bef9SDimitry Andric assert(Width != 0); 715e8d8bef9SDimitry Andric 716e8d8bef9SDimitry Andric // TODO: This allows folding to undef when the hardware has specific 717e8d8bef9SDimitry Andric // behavior? 718e8d8bef9SDimitry Andric if (Offset + Width < IntSize) { 719e8d8bef9SDimitry Andric Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width); 720e8d8bef9SDimitry Andric Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width) 721e8d8bef9SDimitry Andric : IC.Builder.CreateLShr(Shl, IntSize - Width); 722e8d8bef9SDimitry Andric RightShift->takeName(&II); 723e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, RightShift); 724e8d8bef9SDimitry Andric } 725e8d8bef9SDimitry Andric 726e8d8bef9SDimitry Andric Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset) 727e8d8bef9SDimitry Andric : IC.Builder.CreateLShr(Src, Offset); 728e8d8bef9SDimitry Andric 729e8d8bef9SDimitry Andric RightShift->takeName(&II); 730e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, RightShift); 731e8d8bef9SDimitry Andric } 732e8d8bef9SDimitry Andric case Intrinsic::amdgcn_exp: 73381ad6265SDimitry Andric case Intrinsic::amdgcn_exp_row: 734e8d8bef9SDimitry Andric case Intrinsic::amdgcn_exp_compr: { 735e8d8bef9SDimitry Andric ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1)); 736e8d8bef9SDimitry Andric unsigned EnBits = En->getZExtValue(); 737e8d8bef9SDimitry Andric if (EnBits == 0xf) 738e8d8bef9SDimitry Andric break; // All inputs enabled. 739e8d8bef9SDimitry Andric 740e8d8bef9SDimitry Andric bool IsCompr = IID == Intrinsic::amdgcn_exp_compr; 741e8d8bef9SDimitry Andric bool Changed = false; 742e8d8bef9SDimitry Andric for (int I = 0; I < (IsCompr ? 2 : 4); ++I) { 743e8d8bef9SDimitry Andric if ((!IsCompr && (EnBits & (1 << I)) == 0) || 744e8d8bef9SDimitry Andric (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) { 745e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(I + 2); 746e8d8bef9SDimitry Andric if (!isa<UndefValue>(Src)) { 747e8d8bef9SDimitry Andric IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType())); 748e8d8bef9SDimitry Andric Changed = true; 749e8d8bef9SDimitry Andric } 750e8d8bef9SDimitry Andric } 751e8d8bef9SDimitry Andric } 752e8d8bef9SDimitry Andric 753e8d8bef9SDimitry Andric if (Changed) { 754e8d8bef9SDimitry Andric return &II; 755e8d8bef9SDimitry Andric } 756e8d8bef9SDimitry Andric 757e8d8bef9SDimitry Andric break; 758e8d8bef9SDimitry Andric } 759e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fmed3: { 760e8d8bef9SDimitry Andric // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled 761e8d8bef9SDimitry Andric // for the shader. 762e8d8bef9SDimitry Andric 763e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 764e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 765e8d8bef9SDimitry Andric Value *Src2 = II.getArgOperand(2); 766e8d8bef9SDimitry Andric 767e8d8bef9SDimitry Andric // Checking for NaN before canonicalization provides better fidelity when 768e8d8bef9SDimitry Andric // mapping other operations onto fmed3 since the order of operands is 769e8d8bef9SDimitry Andric // unchanged. 770*0fca6ea1SDimitry Andric Value *V = nullptr; 771e8d8bef9SDimitry Andric if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) { 772*0fca6ea1SDimitry Andric V = IC.Builder.CreateMinNum(Src1, Src2); 773e8d8bef9SDimitry Andric } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) { 774*0fca6ea1SDimitry Andric V = IC.Builder.CreateMinNum(Src0, Src2); 775e8d8bef9SDimitry Andric } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) { 776*0fca6ea1SDimitry Andric V = IC.Builder.CreateMaxNum(Src0, Src1); 777e8d8bef9SDimitry Andric } 778e8d8bef9SDimitry Andric 779*0fca6ea1SDimitry Andric if (V) { 780*0fca6ea1SDimitry Andric if (auto *CI = dyn_cast<CallInst>(V)) { 781*0fca6ea1SDimitry Andric CI->copyFastMathFlags(&II); 782*0fca6ea1SDimitry Andric CI->takeName(&II); 783*0fca6ea1SDimitry Andric } 784*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, V); 785e8d8bef9SDimitry Andric } 786e8d8bef9SDimitry Andric 787e8d8bef9SDimitry Andric bool Swap = false; 788e8d8bef9SDimitry Andric // Canonicalize constants to RHS operands. 789e8d8bef9SDimitry Andric // 790e8d8bef9SDimitry Andric // fmed3(c0, x, c1) -> fmed3(x, c0, c1) 791e8d8bef9SDimitry Andric if (isa<Constant>(Src0) && !isa<Constant>(Src1)) { 792e8d8bef9SDimitry Andric std::swap(Src0, Src1); 793e8d8bef9SDimitry Andric Swap = true; 794e8d8bef9SDimitry Andric } 795e8d8bef9SDimitry Andric 796e8d8bef9SDimitry Andric if (isa<Constant>(Src1) && !isa<Constant>(Src2)) { 797e8d8bef9SDimitry Andric std::swap(Src1, Src2); 798e8d8bef9SDimitry Andric Swap = true; 799e8d8bef9SDimitry Andric } 800e8d8bef9SDimitry Andric 801e8d8bef9SDimitry Andric if (isa<Constant>(Src0) && !isa<Constant>(Src1)) { 802e8d8bef9SDimitry Andric std::swap(Src0, Src1); 803e8d8bef9SDimitry Andric Swap = true; 804e8d8bef9SDimitry Andric } 805e8d8bef9SDimitry Andric 806e8d8bef9SDimitry Andric if (Swap) { 807e8d8bef9SDimitry Andric II.setArgOperand(0, Src0); 808e8d8bef9SDimitry Andric II.setArgOperand(1, Src1); 809e8d8bef9SDimitry Andric II.setArgOperand(2, Src2); 810e8d8bef9SDimitry Andric return &II; 811e8d8bef9SDimitry Andric } 812e8d8bef9SDimitry Andric 813e8d8bef9SDimitry Andric if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) { 814e8d8bef9SDimitry Andric if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) { 815e8d8bef9SDimitry Andric if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) { 816e8d8bef9SDimitry Andric APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(), 817e8d8bef9SDimitry Andric C2->getValueAPF()); 818e8d8bef9SDimitry Andric return IC.replaceInstUsesWith( 819e8d8bef9SDimitry Andric II, ConstantFP::get(IC.Builder.getContext(), Result)); 820e8d8bef9SDimitry Andric } 821e8d8bef9SDimitry Andric } 822e8d8bef9SDimitry Andric } 823e8d8bef9SDimitry Andric 82406c3fb27SDimitry Andric if (!ST->hasMed3_16()) 82506c3fb27SDimitry Andric break; 82606c3fb27SDimitry Andric 82706c3fb27SDimitry Andric Value *X, *Y, *Z; 82806c3fb27SDimitry Andric 82906c3fb27SDimitry Andric // Repeat floating-point width reduction done for minnum/maxnum. 83006c3fb27SDimitry Andric // fmed3((fpext X), (fpext Y), (fpext Z)) -> fpext (fmed3(X, Y, Z)) 83106c3fb27SDimitry Andric if (matchFPExtFromF16(Src0, X) && matchFPExtFromF16(Src1, Y) && 83206c3fb27SDimitry Andric matchFPExtFromF16(Src2, Z)) { 83306c3fb27SDimitry Andric Value *NewCall = IC.Builder.CreateIntrinsic(IID, {X->getType()}, 83406c3fb27SDimitry Andric {X, Y, Z}, &II, II.getName()); 83506c3fb27SDimitry Andric return new FPExtInst(NewCall, II.getType()); 83606c3fb27SDimitry Andric } 83706c3fb27SDimitry Andric 838e8d8bef9SDimitry Andric break; 839e8d8bef9SDimitry Andric } 840e8d8bef9SDimitry Andric case Intrinsic::amdgcn_icmp: 841e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fcmp: { 842e8d8bef9SDimitry Andric const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2)); 843e8d8bef9SDimitry Andric // Guard against invalid arguments. 844e8d8bef9SDimitry Andric int64_t CCVal = CC->getZExtValue(); 845e8d8bef9SDimitry Andric bool IsInteger = IID == Intrinsic::amdgcn_icmp; 846e8d8bef9SDimitry Andric if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE || 847e8d8bef9SDimitry Andric CCVal > CmpInst::LAST_ICMP_PREDICATE)) || 848e8d8bef9SDimitry Andric (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE || 849e8d8bef9SDimitry Andric CCVal > CmpInst::LAST_FCMP_PREDICATE))) 850e8d8bef9SDimitry Andric break; 851e8d8bef9SDimitry Andric 852e8d8bef9SDimitry Andric Value *Src0 = II.getArgOperand(0); 853e8d8bef9SDimitry Andric Value *Src1 = II.getArgOperand(1); 854e8d8bef9SDimitry Andric 855e8d8bef9SDimitry Andric if (auto *CSrc0 = dyn_cast<Constant>(Src0)) { 856e8d8bef9SDimitry Andric if (auto *CSrc1 = dyn_cast<Constant>(Src1)) { 857*0fca6ea1SDimitry Andric Constant *CCmp = ConstantFoldCompareInstOperands( 858*0fca6ea1SDimitry Andric (ICmpInst::Predicate)CCVal, CSrc0, CSrc1, DL); 859*0fca6ea1SDimitry Andric if (CCmp && CCmp->isNullValue()) { 860e8d8bef9SDimitry Andric return IC.replaceInstUsesWith( 8615f757f3fSDimitry Andric II, IC.Builder.CreateSExt(CCmp, II.getType())); 862e8d8bef9SDimitry Andric } 863e8d8bef9SDimitry Andric 864e8d8bef9SDimitry Andric // The result of V_ICMP/V_FCMP assembly instructions (which this 865e8d8bef9SDimitry Andric // intrinsic exposes) is one bit per thread, masked with the EXEC 866e8d8bef9SDimitry Andric // register (which contains the bitmask of live threads). So a 867e8d8bef9SDimitry Andric // comparison that always returns true is the same as a read of the 868e8d8bef9SDimitry Andric // EXEC register. 869e8d8bef9SDimitry Andric Function *NewF = Intrinsic::getDeclaration( 870e8d8bef9SDimitry Andric II.getModule(), Intrinsic::read_register, II.getType()); 871e8d8bef9SDimitry Andric Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")}; 872e8d8bef9SDimitry Andric MDNode *MD = MDNode::get(II.getContext(), MDArgs); 873e8d8bef9SDimitry Andric Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)}; 874e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(NewF, Args); 875349cc55cSDimitry Andric NewCall->addFnAttr(Attribute::Convergent); 876e8d8bef9SDimitry Andric NewCall->takeName(&II); 877e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 878e8d8bef9SDimitry Andric } 879e8d8bef9SDimitry Andric 880e8d8bef9SDimitry Andric // Canonicalize constants to RHS. 881e8d8bef9SDimitry Andric CmpInst::Predicate SwapPred = 882e8d8bef9SDimitry Andric CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal)); 883e8d8bef9SDimitry Andric II.setArgOperand(0, Src1); 884e8d8bef9SDimitry Andric II.setArgOperand(1, Src0); 885e8d8bef9SDimitry Andric II.setArgOperand( 886e8d8bef9SDimitry Andric 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred))); 887e8d8bef9SDimitry Andric return &II; 888e8d8bef9SDimitry Andric } 889e8d8bef9SDimitry Andric 890e8d8bef9SDimitry Andric if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE) 891e8d8bef9SDimitry Andric break; 892e8d8bef9SDimitry Andric 893e8d8bef9SDimitry Andric // Canonicalize compare eq with true value to compare != 0 894e8d8bef9SDimitry Andric // llvm.amdgcn.icmp(zext (i1 x), 1, eq) 895e8d8bef9SDimitry Andric // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne) 896e8d8bef9SDimitry Andric // llvm.amdgcn.icmp(sext (i1 x), -1, eq) 897e8d8bef9SDimitry Andric // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne) 898e8d8bef9SDimitry Andric Value *ExtSrc; 899e8d8bef9SDimitry Andric if (CCVal == CmpInst::ICMP_EQ && 900e8d8bef9SDimitry Andric ((match(Src1, PatternMatch::m_One()) && 901e8d8bef9SDimitry Andric match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) || 902e8d8bef9SDimitry Andric (match(Src1, PatternMatch::m_AllOnes()) && 903e8d8bef9SDimitry Andric match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) && 904e8d8bef9SDimitry Andric ExtSrc->getType()->isIntegerTy(1)) { 905e8d8bef9SDimitry Andric IC.replaceOperand(II, 1, ConstantInt::getNullValue(Src1->getType())); 906e8d8bef9SDimitry Andric IC.replaceOperand(II, 2, 907e8d8bef9SDimitry Andric ConstantInt::get(CC->getType(), CmpInst::ICMP_NE)); 908e8d8bef9SDimitry Andric return &II; 909e8d8bef9SDimitry Andric } 910e8d8bef9SDimitry Andric 911e8d8bef9SDimitry Andric CmpInst::Predicate SrcPred; 912e8d8bef9SDimitry Andric Value *SrcLHS; 913e8d8bef9SDimitry Andric Value *SrcRHS; 914e8d8bef9SDimitry Andric 915e8d8bef9SDimitry Andric // Fold compare eq/ne with 0 from a compare result as the predicate to the 916e8d8bef9SDimitry Andric // intrinsic. The typical use is a wave vote function in the library, which 917e8d8bef9SDimitry Andric // will be fed from a user code condition compared with 0. Fold in the 918e8d8bef9SDimitry Andric // redundant compare. 919e8d8bef9SDimitry Andric 920e8d8bef9SDimitry Andric // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne) 921e8d8bef9SDimitry Andric // -> llvm.amdgcn.[if]cmp(a, b, pred) 922e8d8bef9SDimitry Andric // 923e8d8bef9SDimitry Andric // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq) 924e8d8bef9SDimitry Andric // -> llvm.amdgcn.[if]cmp(a, b, inv pred) 925e8d8bef9SDimitry Andric if (match(Src1, PatternMatch::m_Zero()) && 926e8d8bef9SDimitry Andric match(Src0, PatternMatch::m_ZExtOrSExt( 927e8d8bef9SDimitry Andric m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS), 928e8d8bef9SDimitry Andric PatternMatch::m_Value(SrcRHS))))) { 929e8d8bef9SDimitry Andric if (CCVal == CmpInst::ICMP_EQ) 930e8d8bef9SDimitry Andric SrcPred = CmpInst::getInversePredicate(SrcPred); 931e8d8bef9SDimitry Andric 932e8d8bef9SDimitry Andric Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) 933e8d8bef9SDimitry Andric ? Intrinsic::amdgcn_fcmp 934e8d8bef9SDimitry Andric : Intrinsic::amdgcn_icmp; 935e8d8bef9SDimitry Andric 936e8d8bef9SDimitry Andric Type *Ty = SrcLHS->getType(); 937e8d8bef9SDimitry Andric if (auto *CmpType = dyn_cast<IntegerType>(Ty)) { 938e8d8bef9SDimitry Andric // Promote to next legal integer type. 939e8d8bef9SDimitry Andric unsigned Width = CmpType->getBitWidth(); 940e8d8bef9SDimitry Andric unsigned NewWidth = Width; 941e8d8bef9SDimitry Andric 942e8d8bef9SDimitry Andric // Don't do anything for i1 comparisons. 943e8d8bef9SDimitry Andric if (Width == 1) 944e8d8bef9SDimitry Andric break; 945e8d8bef9SDimitry Andric 946e8d8bef9SDimitry Andric if (Width <= 16) 947e8d8bef9SDimitry Andric NewWidth = 16; 948e8d8bef9SDimitry Andric else if (Width <= 32) 949e8d8bef9SDimitry Andric NewWidth = 32; 950e8d8bef9SDimitry Andric else if (Width <= 64) 951e8d8bef9SDimitry Andric NewWidth = 64; 952*0fca6ea1SDimitry Andric else 953e8d8bef9SDimitry Andric break; // Can't handle this. 954e8d8bef9SDimitry Andric 955e8d8bef9SDimitry Andric if (Width != NewWidth) { 956e8d8bef9SDimitry Andric IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth); 957e8d8bef9SDimitry Andric if (CmpInst::isSigned(SrcPred)) { 958e8d8bef9SDimitry Andric SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy); 959e8d8bef9SDimitry Andric SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy); 960e8d8bef9SDimitry Andric } else { 961e8d8bef9SDimitry Andric SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy); 962e8d8bef9SDimitry Andric SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy); 963e8d8bef9SDimitry Andric } 964e8d8bef9SDimitry Andric } 965e8d8bef9SDimitry Andric } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy()) 966e8d8bef9SDimitry Andric break; 967e8d8bef9SDimitry Andric 968e8d8bef9SDimitry Andric Function *NewF = Intrinsic::getDeclaration( 969e8d8bef9SDimitry Andric II.getModule(), NewIID, {II.getType(), SrcLHS->getType()}); 970e8d8bef9SDimitry Andric Value *Args[] = {SrcLHS, SrcRHS, 971e8d8bef9SDimitry Andric ConstantInt::get(CC->getType(), SrcPred)}; 972e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(NewF, Args); 973e8d8bef9SDimitry Andric NewCall->takeName(&II); 974e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewCall); 975e8d8bef9SDimitry Andric } 976e8d8bef9SDimitry Andric 977e8d8bef9SDimitry Andric break; 978e8d8bef9SDimitry Andric } 97906c3fb27SDimitry Andric case Intrinsic::amdgcn_mbcnt_hi: { 98006c3fb27SDimitry Andric // exec_hi is all 0, so this is just a copy. 98106c3fb27SDimitry Andric if (ST->isWave32()) 98206c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, II.getArgOperand(1)); 98306c3fb27SDimitry Andric break; 98406c3fb27SDimitry Andric } 985e8d8bef9SDimitry Andric case Intrinsic::amdgcn_ballot: { 986e8d8bef9SDimitry Andric if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) { 987e8d8bef9SDimitry Andric if (Src->isZero()) { 988e8d8bef9SDimitry Andric // amdgcn.ballot(i1 0) is zero. 989e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType())); 990e8d8bef9SDimitry Andric } 991e8d8bef9SDimitry Andric } 992*0fca6ea1SDimitry Andric if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) { 993*0fca6ea1SDimitry Andric // %b64 = call i64 ballot.i64(...) 994*0fca6ea1SDimitry Andric // => 995*0fca6ea1SDimitry Andric // %b32 = call i32 ballot.i32(...) 996*0fca6ea1SDimitry Andric // %b64 = zext i32 %b32 to i64 997*0fca6ea1SDimitry Andric Value *Call = IC.Builder.CreateZExt( 998*0fca6ea1SDimitry Andric IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot, 999*0fca6ea1SDimitry Andric {IC.Builder.getInt32Ty()}, 1000*0fca6ea1SDimitry Andric {II.getArgOperand(0)}), 1001*0fca6ea1SDimitry Andric II.getType()); 1002*0fca6ea1SDimitry Andric Call->takeName(&II); 1003*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, Call); 1004*0fca6ea1SDimitry Andric } 1005e8d8bef9SDimitry Andric break; 1006e8d8bef9SDimitry Andric } 1007e8d8bef9SDimitry Andric case Intrinsic::amdgcn_wqm_vote: { 1008e8d8bef9SDimitry Andric // wqm_vote is identity when the argument is constant. 1009e8d8bef9SDimitry Andric if (!isa<Constant>(II.getArgOperand(0))) 1010e8d8bef9SDimitry Andric break; 1011e8d8bef9SDimitry Andric 1012e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, II.getArgOperand(0)); 1013e8d8bef9SDimitry Andric } 1014e8d8bef9SDimitry Andric case Intrinsic::amdgcn_kill: { 1015e8d8bef9SDimitry Andric const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0)); 1016e8d8bef9SDimitry Andric if (!C || !C->getZExtValue()) 1017e8d8bef9SDimitry Andric break; 1018e8d8bef9SDimitry Andric 1019e8d8bef9SDimitry Andric // amdgcn.kill(i1 1) is a no-op 1020e8d8bef9SDimitry Andric return IC.eraseInstFromFunction(II); 1021e8d8bef9SDimitry Andric } 1022e8d8bef9SDimitry Andric case Intrinsic::amdgcn_update_dpp: { 1023e8d8bef9SDimitry Andric Value *Old = II.getArgOperand(0); 1024e8d8bef9SDimitry Andric 1025e8d8bef9SDimitry Andric auto *BC = cast<ConstantInt>(II.getArgOperand(5)); 1026e8d8bef9SDimitry Andric auto *RM = cast<ConstantInt>(II.getArgOperand(3)); 1027e8d8bef9SDimitry Andric auto *BM = cast<ConstantInt>(II.getArgOperand(4)); 1028e8d8bef9SDimitry Andric if (BC->isZeroValue() || RM->getZExtValue() != 0xF || 1029e8d8bef9SDimitry Andric BM->getZExtValue() != 0xF || isa<UndefValue>(Old)) 1030e8d8bef9SDimitry Andric break; 1031e8d8bef9SDimitry Andric 1032e8d8bef9SDimitry Andric // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value. 1033e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, UndefValue::get(Old->getType())); 1034e8d8bef9SDimitry Andric } 1035e8d8bef9SDimitry Andric case Intrinsic::amdgcn_permlane16: 10365f757f3fSDimitry Andric case Intrinsic::amdgcn_permlane16_var: 10375f757f3fSDimitry Andric case Intrinsic::amdgcn_permlanex16: 10385f757f3fSDimitry Andric case Intrinsic::amdgcn_permlanex16_var: { 1039e8d8bef9SDimitry Andric // Discard vdst_in if it's not going to be read. 1040e8d8bef9SDimitry Andric Value *VDstIn = II.getArgOperand(0); 1041e8d8bef9SDimitry Andric if (isa<UndefValue>(VDstIn)) 1042e8d8bef9SDimitry Andric break; 1043e8d8bef9SDimitry Andric 10445f757f3fSDimitry Andric // FetchInvalid operand idx. 10455f757f3fSDimitry Andric unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 || 10465f757f3fSDimitry Andric IID == Intrinsic::amdgcn_permlanex16) 10475f757f3fSDimitry Andric ? 4 /* for permlane16 and permlanex16 */ 10485f757f3fSDimitry Andric : 3; /* for permlane16_var and permlanex16_var */ 10495f757f3fSDimitry Andric 10505f757f3fSDimitry Andric // BoundCtrl operand idx. 10515f757f3fSDimitry Andric // For permlane16 and permlanex16 it should be 5 10525f757f3fSDimitry Andric // For Permlane16_var and permlanex16_var it should be 4 10535f757f3fSDimitry Andric unsigned int BcIdx = FiIdx + 1; 10545f757f3fSDimitry Andric 10555f757f3fSDimitry Andric ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(FiIdx)); 10565f757f3fSDimitry Andric ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(BcIdx)); 1057e8d8bef9SDimitry Andric if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue()) 1058e8d8bef9SDimitry Andric break; 1059e8d8bef9SDimitry Andric 1060e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType())); 1061e8d8bef9SDimitry Andric } 106281ad6265SDimitry Andric case Intrinsic::amdgcn_permlane64: 106381ad6265SDimitry Andric // A constant value is trivially uniform. 106481ad6265SDimitry Andric if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) { 106581ad6265SDimitry Andric return IC.replaceInstUsesWith(II, C); 106681ad6265SDimitry Andric } 106781ad6265SDimitry Andric break; 1068e8d8bef9SDimitry Andric case Intrinsic::amdgcn_readfirstlane: 1069e8d8bef9SDimitry Andric case Intrinsic::amdgcn_readlane: { 1070e8d8bef9SDimitry Andric // A constant value is trivially uniform. 1071e8d8bef9SDimitry Andric if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) { 1072e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, C); 1073e8d8bef9SDimitry Andric } 1074e8d8bef9SDimitry Andric 1075e8d8bef9SDimitry Andric // The rest of these may not be safe if the exec may not be the same between 1076e8d8bef9SDimitry Andric // the def and use. 1077e8d8bef9SDimitry Andric Value *Src = II.getArgOperand(0); 1078e8d8bef9SDimitry Andric Instruction *SrcInst = dyn_cast<Instruction>(Src); 1079e8d8bef9SDimitry Andric if (SrcInst && SrcInst->getParent() != II.getParent()) 1080e8d8bef9SDimitry Andric break; 1081e8d8bef9SDimitry Andric 1082e8d8bef9SDimitry Andric // readfirstlane (readfirstlane x) -> readfirstlane x 1083e8d8bef9SDimitry Andric // readlane (readfirstlane x), y -> readfirstlane x 1084e8d8bef9SDimitry Andric if (match(Src, 1085e8d8bef9SDimitry Andric PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) { 1086e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 1087e8d8bef9SDimitry Andric } 1088e8d8bef9SDimitry Andric 1089e8d8bef9SDimitry Andric if (IID == Intrinsic::amdgcn_readfirstlane) { 1090e8d8bef9SDimitry Andric // readfirstlane (readlane x, y) -> readlane x, y 1091e8d8bef9SDimitry Andric if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) { 1092e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 1093e8d8bef9SDimitry Andric } 1094e8d8bef9SDimitry Andric } else { 1095e8d8bef9SDimitry Andric // readlane (readlane x, y), y -> readlane x, y 1096e8d8bef9SDimitry Andric if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>( 1097e8d8bef9SDimitry Andric PatternMatch::m_Value(), 1098e8d8bef9SDimitry Andric PatternMatch::m_Specific(II.getArgOperand(1))))) { 1099e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Src); 1100e8d8bef9SDimitry Andric } 1101e8d8bef9SDimitry Andric } 1102e8d8bef9SDimitry Andric 1103e8d8bef9SDimitry Andric break; 1104e8d8bef9SDimitry Andric } 1105*0fca6ea1SDimitry Andric case Intrinsic::amdgcn_trig_preop: { 1106*0fca6ea1SDimitry Andric // The intrinsic is declared with name mangling, but currently the 1107*0fca6ea1SDimitry Andric // instruction only exists for f64 1108*0fca6ea1SDimitry Andric if (!II.getType()->isDoubleTy()) 1109*0fca6ea1SDimitry Andric break; 1110*0fca6ea1SDimitry Andric 1111*0fca6ea1SDimitry Andric Value *Src = II.getArgOperand(0); 1112*0fca6ea1SDimitry Andric Value *Segment = II.getArgOperand(1); 1113*0fca6ea1SDimitry Andric if (isa<PoisonValue>(Src) || isa<PoisonValue>(Segment)) 1114*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType())); 1115*0fca6ea1SDimitry Andric 1116*0fca6ea1SDimitry Andric if (isa<UndefValue>(Src)) { 1117*0fca6ea1SDimitry Andric auto *QNaN = ConstantFP::get( 1118*0fca6ea1SDimitry Andric II.getType(), APFloat::getQNaN(II.getType()->getFltSemantics())); 1119*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, QNaN); 1120*0fca6ea1SDimitry Andric } 1121*0fca6ea1SDimitry Andric 1122*0fca6ea1SDimitry Andric const ConstantFP *Csrc = dyn_cast<ConstantFP>(Src); 1123*0fca6ea1SDimitry Andric if (!Csrc) 1124*0fca6ea1SDimitry Andric break; 1125*0fca6ea1SDimitry Andric 1126*0fca6ea1SDimitry Andric if (II.isStrictFP()) 1127*0fca6ea1SDimitry Andric break; 1128*0fca6ea1SDimitry Andric 1129*0fca6ea1SDimitry Andric const APFloat &Fsrc = Csrc->getValueAPF(); 1130*0fca6ea1SDimitry Andric if (Fsrc.isNaN()) { 1131*0fca6ea1SDimitry Andric auto *Quieted = ConstantFP::get(II.getType(), Fsrc.makeQuiet()); 1132*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, Quieted); 1133*0fca6ea1SDimitry Andric } 1134*0fca6ea1SDimitry Andric 1135*0fca6ea1SDimitry Andric const ConstantInt *Cseg = dyn_cast<ConstantInt>(Segment); 1136*0fca6ea1SDimitry Andric if (!Cseg) 1137*0fca6ea1SDimitry Andric break; 1138*0fca6ea1SDimitry Andric 1139*0fca6ea1SDimitry Andric unsigned Exponent = (Fsrc.bitcastToAPInt().getZExtValue() >> 52) & 0x7ff; 1140*0fca6ea1SDimitry Andric unsigned SegmentVal = Cseg->getValue().trunc(5).getZExtValue(); 1141*0fca6ea1SDimitry Andric unsigned Shift = SegmentVal * 53; 1142*0fca6ea1SDimitry Andric if (Exponent > 1077) 1143*0fca6ea1SDimitry Andric Shift += Exponent - 1077; 1144*0fca6ea1SDimitry Andric 1145*0fca6ea1SDimitry Andric // 2.0/PI table. 1146*0fca6ea1SDimitry Andric static const uint32_t TwoByPi[] = { 1147*0fca6ea1SDimitry Andric 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041, 1148*0fca6ea1SDimitry Andric 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 1149*0fca6ea1SDimitry Andric 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41, 1150*0fca6ea1SDimitry Andric 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f, 1151*0fca6ea1SDimitry Andric 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d, 1152*0fca6ea1SDimitry Andric 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 1153*0fca6ea1SDimitry Andric 0x56033046}; 1154*0fca6ea1SDimitry Andric 1155*0fca6ea1SDimitry Andric // Return 0 for outbound segment (hardware behavior). 1156*0fca6ea1SDimitry Andric unsigned Idx = Shift >> 5; 1157*0fca6ea1SDimitry Andric if (Idx + 2 >= std::size(TwoByPi)) { 1158*0fca6ea1SDimitry Andric APFloat Zero = APFloat::getZero(II.getType()->getFltSemantics()); 1159*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, ConstantFP::get(II.getType(), Zero)); 1160*0fca6ea1SDimitry Andric } 1161*0fca6ea1SDimitry Andric 1162*0fca6ea1SDimitry Andric unsigned BShift = Shift & 0x1f; 1163*0fca6ea1SDimitry Andric uint64_t Thi = Make_64(TwoByPi[Idx], TwoByPi[Idx + 1]); 1164*0fca6ea1SDimitry Andric uint64_t Tlo = Make_64(TwoByPi[Idx + 2], 0); 1165*0fca6ea1SDimitry Andric if (BShift) 1166*0fca6ea1SDimitry Andric Thi = (Thi << BShift) | (Tlo >> (64 - BShift)); 1167*0fca6ea1SDimitry Andric Thi = Thi >> 11; 1168*0fca6ea1SDimitry Andric APFloat Result = APFloat((double)Thi); 1169*0fca6ea1SDimitry Andric 1170*0fca6ea1SDimitry Andric int Scale = -53 - Shift; 1171*0fca6ea1SDimitry Andric if (Exponent >= 1968) 1172*0fca6ea1SDimitry Andric Scale += 128; 1173*0fca6ea1SDimitry Andric 1174*0fca6ea1SDimitry Andric Result = scalbn(Result, Scale, RoundingMode::NearestTiesToEven); 1175*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, ConstantFP::get(Src->getType(), Result)); 1176*0fca6ea1SDimitry Andric } 1177e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fmul_legacy: { 1178e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 1179e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 1180e8d8bef9SDimitry Andric 1181e8d8bef9SDimitry Andric // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or 1182e8d8bef9SDimitry Andric // infinity, gives +0.0. 1183e8d8bef9SDimitry Andric // TODO: Move to InstSimplify? 1184e8d8bef9SDimitry Andric if (match(Op0, PatternMatch::m_AnyZeroFP()) || 1185e8d8bef9SDimitry Andric match(Op1, PatternMatch::m_AnyZeroFP())) 118606c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType())); 1187e8d8bef9SDimitry Andric 1188e8d8bef9SDimitry Andric // If we can prove we don't have one of the special cases then we can use a 1189e8d8bef9SDimitry Andric // normal fmul instruction instead. 119006c3fb27SDimitry Andric if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) { 1191e8d8bef9SDimitry Andric auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II); 1192e8d8bef9SDimitry Andric FMul->takeName(&II); 1193e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, FMul); 1194e8d8bef9SDimitry Andric } 1195e8d8bef9SDimitry Andric break; 1196e8d8bef9SDimitry Andric } 1197e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fma_legacy: { 1198e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 1199e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 1200e8d8bef9SDimitry Andric Value *Op2 = II.getArgOperand(2); 1201e8d8bef9SDimitry Andric 1202e8d8bef9SDimitry Andric // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or 1203e8d8bef9SDimitry Andric // infinity, gives +0.0. 1204e8d8bef9SDimitry Andric // TODO: Move to InstSimplify? 1205e8d8bef9SDimitry Andric if (match(Op0, PatternMatch::m_AnyZeroFP()) || 1206e8d8bef9SDimitry Andric match(Op1, PatternMatch::m_AnyZeroFP())) { 1207e8d8bef9SDimitry Andric // It's tempting to just return Op2 here, but that would give the wrong 1208e8d8bef9SDimitry Andric // result if Op2 was -0.0. 120906c3fb27SDimitry Andric auto *Zero = ConstantFP::getZero(II.getType()); 1210e8d8bef9SDimitry Andric auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II); 1211e8d8bef9SDimitry Andric FAdd->takeName(&II); 1212e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, FAdd); 1213e8d8bef9SDimitry Andric } 1214e8d8bef9SDimitry Andric 1215e8d8bef9SDimitry Andric // If we can prove we don't have one of the special cases then we can use a 1216e8d8bef9SDimitry Andric // normal fma instead. 121706c3fb27SDimitry Andric if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) { 1218e8d8bef9SDimitry Andric II.setCalledOperand(Intrinsic::getDeclaration( 1219e8d8bef9SDimitry Andric II.getModule(), Intrinsic::fma, II.getType())); 1220e8d8bef9SDimitry Andric return &II; 1221e8d8bef9SDimitry Andric } 1222e8d8bef9SDimitry Andric break; 1223e8d8bef9SDimitry Andric } 12240eae32dcSDimitry Andric case Intrinsic::amdgcn_is_shared: 12250eae32dcSDimitry Andric case Intrinsic::amdgcn_is_private: { 12260eae32dcSDimitry Andric if (isa<UndefValue>(II.getArgOperand(0))) 12270eae32dcSDimitry Andric return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); 12280eae32dcSDimitry Andric 12290eae32dcSDimitry Andric if (isa<ConstantPointerNull>(II.getArgOperand(0))) 12300eae32dcSDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType())); 12310eae32dcSDimitry Andric break; 12320eae32dcSDimitry Andric } 123306c3fb27SDimitry Andric case Intrinsic::amdgcn_raw_buffer_store_format: 123406c3fb27SDimitry Andric case Intrinsic::amdgcn_struct_buffer_store_format: 123506c3fb27SDimitry Andric case Intrinsic::amdgcn_raw_tbuffer_store: 123606c3fb27SDimitry Andric case Intrinsic::amdgcn_struct_tbuffer_store: 123706c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_1d: 123806c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_1darray: 123906c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_2d: 124006c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_2darray: 124106c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_2darraymsaa: 124206c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_2dmsaa: 124306c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_3d: 124406c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_cube: 124506c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_mip_1d: 124606c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_mip_1darray: 124706c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_mip_2d: 124806c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_mip_2darray: 124906c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_mip_3d: 125006c3fb27SDimitry Andric case Intrinsic::amdgcn_image_store_mip_cube: { 125106c3fb27SDimitry Andric if (!isa<FixedVectorType>(II.getArgOperand(0)->getType())) 125206c3fb27SDimitry Andric break; 125306c3fb27SDimitry Andric 12547a6dacacSDimitry Andric APInt DemandedElts; 12557a6dacacSDimitry Andric if (ST->hasDefaultComponentBroadcast()) 12567a6dacacSDimitry Andric DemandedElts = defaultComponentBroadcast(II.getArgOperand(0)); 12577a6dacacSDimitry Andric else if (ST->hasDefaultComponentZero()) 12587a6dacacSDimitry Andric DemandedElts = trimTrailingZerosInVector(IC, II.getArgOperand(0), &II); 12597a6dacacSDimitry Andric else 12607a6dacacSDimitry Andric break; 126106c3fb27SDimitry Andric 126206c3fb27SDimitry Andric int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1; 126306c3fb27SDimitry Andric if (simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, DMaskIdx, 126406c3fb27SDimitry Andric false)) { 126506c3fb27SDimitry Andric return IC.eraseInstFromFunction(II); 126606c3fb27SDimitry Andric } 126706c3fb27SDimitry Andric 126806c3fb27SDimitry Andric break; 126906c3fb27SDimitry Andric } 127006c3fb27SDimitry Andric } 1271e8d8bef9SDimitry Andric if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = 1272e8d8bef9SDimitry Andric AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) { 1273e8d8bef9SDimitry Andric return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC); 1274e8d8bef9SDimitry Andric } 1275bdd1243dSDimitry Andric return std::nullopt; 1276e8d8bef9SDimitry Andric } 1277e8d8bef9SDimitry Andric 1278e8d8bef9SDimitry Andric /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics. 1279e8d8bef9SDimitry Andric /// 128006c3fb27SDimitry Andric /// The result of simplifying amdgcn image and buffer store intrinsics is updating 128106c3fb27SDimitry Andric /// definitions of the intrinsics vector argument, not Uses of the result like 128206c3fb27SDimitry Andric /// image and buffer loads. 1283e8d8bef9SDimitry Andric /// Note: This only supports non-TFE/LWE image intrinsic calls; those have 1284e8d8bef9SDimitry Andric /// struct returns. 1285e8d8bef9SDimitry Andric static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, 1286e8d8bef9SDimitry Andric IntrinsicInst &II, 1287e8d8bef9SDimitry Andric APInt DemandedElts, 128806c3fb27SDimitry Andric int DMaskIdx, bool IsLoad) { 1289e8d8bef9SDimitry Andric 129006c3fb27SDimitry Andric auto *IIVTy = cast<FixedVectorType>(IsLoad ? II.getType() 129106c3fb27SDimitry Andric : II.getOperand(0)->getType()); 1292e8d8bef9SDimitry Andric unsigned VWidth = IIVTy->getNumElements(); 1293e8d8bef9SDimitry Andric if (VWidth == 1) 1294e8d8bef9SDimitry Andric return nullptr; 1295bdd1243dSDimitry Andric Type *EltTy = IIVTy->getElementType(); 1296e8d8bef9SDimitry Andric 1297e8d8bef9SDimitry Andric IRBuilderBase::InsertPointGuard Guard(IC.Builder); 1298e8d8bef9SDimitry Andric IC.Builder.SetInsertPoint(&II); 1299e8d8bef9SDimitry Andric 1300e8d8bef9SDimitry Andric // Assume the arguments are unchanged and later override them, if needed. 1301e8d8bef9SDimitry Andric SmallVector<Value *, 16> Args(II.args()); 1302e8d8bef9SDimitry Andric 1303e8d8bef9SDimitry Andric if (DMaskIdx < 0) { 1304e8d8bef9SDimitry Andric // Buffer case. 1305e8d8bef9SDimitry Andric 1306e8d8bef9SDimitry Andric const unsigned ActiveBits = DemandedElts.getActiveBits(); 130706c3fb27SDimitry Andric const unsigned UnusedComponentsAtFront = DemandedElts.countr_zero(); 1308e8d8bef9SDimitry Andric 1309e8d8bef9SDimitry Andric // Start assuming the prefix of elements is demanded, but possibly clear 1310e8d8bef9SDimitry Andric // some other bits if there are trailing zeros (unused components at front) 1311e8d8bef9SDimitry Andric // and update offset. 1312e8d8bef9SDimitry Andric DemandedElts = (1 << ActiveBits) - 1; 1313e8d8bef9SDimitry Andric 1314e8d8bef9SDimitry Andric if (UnusedComponentsAtFront > 0) { 1315e8d8bef9SDimitry Andric static const unsigned InvalidOffsetIdx = 0xf; 1316e8d8bef9SDimitry Andric 1317e8d8bef9SDimitry Andric unsigned OffsetIdx; 1318e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 1319e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_buffer_load: 132006c3fb27SDimitry Andric case Intrinsic::amdgcn_raw_ptr_buffer_load: 1321e8d8bef9SDimitry Andric OffsetIdx = 1; 1322e8d8bef9SDimitry Andric break; 1323e8d8bef9SDimitry Andric case Intrinsic::amdgcn_s_buffer_load: 1324e8d8bef9SDimitry Andric // If resulting type is vec3, there is no point in trimming the 1325e8d8bef9SDimitry Andric // load with updated offset, as the vec3 would most likely be widened to 1326e8d8bef9SDimitry Andric // vec4 anyway during lowering. 1327e8d8bef9SDimitry Andric if (ActiveBits == 4 && UnusedComponentsAtFront == 1) 1328e8d8bef9SDimitry Andric OffsetIdx = InvalidOffsetIdx; 1329e8d8bef9SDimitry Andric else 1330e8d8bef9SDimitry Andric OffsetIdx = 1; 1331e8d8bef9SDimitry Andric break; 1332e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_buffer_load: 133306c3fb27SDimitry Andric case Intrinsic::amdgcn_struct_ptr_buffer_load: 1334e8d8bef9SDimitry Andric OffsetIdx = 2; 1335e8d8bef9SDimitry Andric break; 1336e8d8bef9SDimitry Andric default: 1337e8d8bef9SDimitry Andric // TODO: handle tbuffer* intrinsics. 1338e8d8bef9SDimitry Andric OffsetIdx = InvalidOffsetIdx; 1339e8d8bef9SDimitry Andric break; 1340e8d8bef9SDimitry Andric } 1341e8d8bef9SDimitry Andric 1342e8d8bef9SDimitry Andric if (OffsetIdx != InvalidOffsetIdx) { 1343e8d8bef9SDimitry Andric // Clear demanded bits and update the offset. 1344e8d8bef9SDimitry Andric DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1); 1345bdd1243dSDimitry Andric auto *Offset = Args[OffsetIdx]; 1346e8d8bef9SDimitry Andric unsigned SingleComponentSizeInBits = 1347bdd1243dSDimitry Andric IC.getDataLayout().getTypeSizeInBits(EltTy); 1348e8d8bef9SDimitry Andric unsigned OffsetAdd = 1349e8d8bef9SDimitry Andric UnusedComponentsAtFront * SingleComponentSizeInBits / 8; 1350e8d8bef9SDimitry Andric auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd); 1351e8d8bef9SDimitry Andric Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal); 1352e8d8bef9SDimitry Andric } 1353e8d8bef9SDimitry Andric } 1354e8d8bef9SDimitry Andric } else { 1355e8d8bef9SDimitry Andric // Image case. 1356e8d8bef9SDimitry Andric 1357bdd1243dSDimitry Andric ConstantInt *DMask = cast<ConstantInt>(Args[DMaskIdx]); 1358e8d8bef9SDimitry Andric unsigned DMaskVal = DMask->getZExtValue() & 0xf; 1359e8d8bef9SDimitry Andric 1360cb14a3feSDimitry Andric // dmask 0 has special semantics, do not simplify. 1361cb14a3feSDimitry Andric if (DMaskVal == 0) 1362cb14a3feSDimitry Andric return nullptr; 1363cb14a3feSDimitry Andric 1364e8d8bef9SDimitry Andric // Mask off values that are undefined because the dmask doesn't cover them 1365bdd1243dSDimitry Andric DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1; 1366e8d8bef9SDimitry Andric 1367e8d8bef9SDimitry Andric unsigned NewDMaskVal = 0; 136806c3fb27SDimitry Andric unsigned OrigLdStIdx = 0; 1369e8d8bef9SDimitry Andric for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) { 1370e8d8bef9SDimitry Andric const unsigned Bit = 1 << SrcIdx; 1371e8d8bef9SDimitry Andric if (!!(DMaskVal & Bit)) { 137206c3fb27SDimitry Andric if (!!DemandedElts[OrigLdStIdx]) 1373e8d8bef9SDimitry Andric NewDMaskVal |= Bit; 137406c3fb27SDimitry Andric OrigLdStIdx++; 1375e8d8bef9SDimitry Andric } 1376e8d8bef9SDimitry Andric } 1377e8d8bef9SDimitry Andric 1378e8d8bef9SDimitry Andric if (DMaskVal != NewDMaskVal) 1379e8d8bef9SDimitry Andric Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal); 1380e8d8bef9SDimitry Andric } 1381e8d8bef9SDimitry Andric 138206c3fb27SDimitry Andric unsigned NewNumElts = DemandedElts.popcount(); 1383e8d8bef9SDimitry Andric if (!NewNumElts) 1384cb14a3feSDimitry Andric return PoisonValue::get(IIVTy); 1385e8d8bef9SDimitry Andric 1386e8d8bef9SDimitry Andric if (NewNumElts >= VWidth && DemandedElts.isMask()) { 1387e8d8bef9SDimitry Andric if (DMaskIdx >= 0) 1388e8d8bef9SDimitry Andric II.setArgOperand(DMaskIdx, Args[DMaskIdx]); 1389e8d8bef9SDimitry Andric return nullptr; 1390e8d8bef9SDimitry Andric } 1391e8d8bef9SDimitry Andric 1392e8d8bef9SDimitry Andric // Validate function argument and return types, extracting overloaded types 1393e8d8bef9SDimitry Andric // along the way. 1394e8d8bef9SDimitry Andric SmallVector<Type *, 6> OverloadTys; 1395e8d8bef9SDimitry Andric if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys)) 1396e8d8bef9SDimitry Andric return nullptr; 1397e8d8bef9SDimitry Andric 1398e8d8bef9SDimitry Andric Type *NewTy = 1399e8d8bef9SDimitry Andric (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts); 1400e8d8bef9SDimitry Andric OverloadTys[0] = NewTy; 1401e8d8bef9SDimitry Andric 140206c3fb27SDimitry Andric if (!IsLoad) { 140306c3fb27SDimitry Andric SmallVector<int, 8> EltMask; 140406c3fb27SDimitry Andric for (unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx) 140506c3fb27SDimitry Andric if (DemandedElts[OrigStoreIdx]) 140606c3fb27SDimitry Andric EltMask.push_back(OrigStoreIdx); 140706c3fb27SDimitry Andric 140806c3fb27SDimitry Andric if (NewNumElts == 1) 140906c3fb27SDimitry Andric Args[0] = IC.Builder.CreateExtractElement(II.getOperand(0), EltMask[0]); 141006c3fb27SDimitry Andric else 141106c3fb27SDimitry Andric Args[0] = IC.Builder.CreateShuffleVector(II.getOperand(0), EltMask); 141206c3fb27SDimitry Andric } 141306c3fb27SDimitry Andric 1414bdd1243dSDimitry Andric Function *NewIntrin = Intrinsic::getDeclaration( 1415bdd1243dSDimitry Andric II.getModule(), II.getIntrinsicID(), OverloadTys); 1416e8d8bef9SDimitry Andric CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args); 1417e8d8bef9SDimitry Andric NewCall->takeName(&II); 1418e8d8bef9SDimitry Andric NewCall->copyMetadata(II); 1419e8d8bef9SDimitry Andric 142006c3fb27SDimitry Andric if (IsLoad) { 1421e8d8bef9SDimitry Andric if (NewNumElts == 1) { 1422cb14a3feSDimitry Andric return IC.Builder.CreateInsertElement(PoisonValue::get(IIVTy), NewCall, 142306c3fb27SDimitry Andric DemandedElts.countr_zero()); 1424e8d8bef9SDimitry Andric } 1425e8d8bef9SDimitry Andric 1426e8d8bef9SDimitry Andric SmallVector<int, 8> EltMask; 1427e8d8bef9SDimitry Andric unsigned NewLoadIdx = 0; 1428e8d8bef9SDimitry Andric for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) { 1429e8d8bef9SDimitry Andric if (!!DemandedElts[OrigLoadIdx]) 1430e8d8bef9SDimitry Andric EltMask.push_back(NewLoadIdx++); 1431e8d8bef9SDimitry Andric else 1432e8d8bef9SDimitry Andric EltMask.push_back(NewNumElts); 1433e8d8bef9SDimitry Andric } 1434e8d8bef9SDimitry Andric 143506c3fb27SDimitry Andric auto *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask); 1436e8d8bef9SDimitry Andric 1437e8d8bef9SDimitry Andric return Shuffle; 1438e8d8bef9SDimitry Andric } 1439e8d8bef9SDimitry Andric 144006c3fb27SDimitry Andric return NewCall; 144106c3fb27SDimitry Andric } 144206c3fb27SDimitry Andric 1443bdd1243dSDimitry Andric std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic( 1444e8d8bef9SDimitry Andric InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 1445e8d8bef9SDimitry Andric APInt &UndefElts2, APInt &UndefElts3, 1446e8d8bef9SDimitry Andric std::function<void(Instruction *, unsigned, APInt, APInt &)> 1447e8d8bef9SDimitry Andric SimplifyAndSetOp) const { 1448e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 1449e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_buffer_load: 145006c3fb27SDimitry Andric case Intrinsic::amdgcn_raw_ptr_buffer_load: 1451e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_buffer_load_format: 145206c3fb27SDimitry Andric case Intrinsic::amdgcn_raw_ptr_buffer_load_format: 1453e8d8bef9SDimitry Andric case Intrinsic::amdgcn_raw_tbuffer_load: 145406c3fb27SDimitry Andric case Intrinsic::amdgcn_raw_ptr_tbuffer_load: 1455e8d8bef9SDimitry Andric case Intrinsic::amdgcn_s_buffer_load: 1456e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_buffer_load: 145706c3fb27SDimitry Andric case Intrinsic::amdgcn_struct_ptr_buffer_load: 1458e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_buffer_load_format: 145906c3fb27SDimitry Andric case Intrinsic::amdgcn_struct_ptr_buffer_load_format: 1460e8d8bef9SDimitry Andric case Intrinsic::amdgcn_struct_tbuffer_load: 146106c3fb27SDimitry Andric case Intrinsic::amdgcn_struct_ptr_tbuffer_load: 1462e8d8bef9SDimitry Andric return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts); 1463e8d8bef9SDimitry Andric default: { 1464e8d8bef9SDimitry Andric if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) { 1465e8d8bef9SDimitry Andric return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0); 1466e8d8bef9SDimitry Andric } 1467e8d8bef9SDimitry Andric break; 1468e8d8bef9SDimitry Andric } 1469e8d8bef9SDimitry Andric } 1470bdd1243dSDimitry Andric return std::nullopt; 1471e8d8bef9SDimitry Andric } 1472