1349cc55cSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===// 2349cc55cSDimitry Andric // 3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6349cc55cSDimitry Andric // 7349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 8349cc55cSDimitry Andric 9349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h" 10349cc55cSDimitry Andric #include "GCNSubtarget.h" 11349cc55cSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 125f757f3fSDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 13349cc55cSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 14349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 15349cc55cSDimitry Andric #include "llvm/Target/TargetMachine.h" 16349cc55cSDimitry Andric 17349cc55cSDimitry Andric using namespace llvm; 18349cc55cSDimitry Andric using namespace MIPatternMatch; 19349cc55cSDimitry Andric 20349cc55cSDimitry Andric LLVM_READNONE 21349cc55cSDimitry Andric static bool fnegFoldsIntoMI(const MachineInstr &MI) { 22349cc55cSDimitry Andric switch (MI.getOpcode()) { 23349cc55cSDimitry Andric case AMDGPU::G_FADD: 24349cc55cSDimitry Andric case AMDGPU::G_FSUB: 25349cc55cSDimitry Andric case AMDGPU::G_FMUL: 26349cc55cSDimitry Andric case AMDGPU::G_FMA: 27349cc55cSDimitry Andric case AMDGPU::G_FMAD: 28349cc55cSDimitry Andric case AMDGPU::G_FMINNUM: 29349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM: 30349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE: 31349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE: 325f757f3fSDimitry Andric case AMDGPU::G_FMINIMUM: 335f757f3fSDimitry Andric case AMDGPU::G_FMAXIMUM: 34349cc55cSDimitry Andric case AMDGPU::G_FSIN: 35349cc55cSDimitry Andric case AMDGPU::G_FPEXT: 36349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_TRUNC: 37349cc55cSDimitry Andric case AMDGPU::G_FPTRUNC: 38349cc55cSDimitry Andric case AMDGPU::G_FRINT: 39349cc55cSDimitry Andric case AMDGPU::G_FNEARBYINT: 40349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUND: 41349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUNDEVEN: 42349cc55cSDimitry Andric case AMDGPU::G_FCANONICALIZE: 43349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_RCP_IFLAG: 44349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY: 45349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY: 46349cc55cSDimitry Andric return true; 47349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC: { 48*0fca6ea1SDimitry Andric Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID(); 49349cc55cSDimitry Andric switch (IntrinsicID) { 50349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp: 51349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp_legacy: 52349cc55cSDimitry Andric case Intrinsic::amdgcn_sin: 53349cc55cSDimitry Andric case Intrinsic::amdgcn_fmul_legacy: 54349cc55cSDimitry Andric case Intrinsic::amdgcn_fmed3: 55349cc55cSDimitry Andric case Intrinsic::amdgcn_fma_legacy: 56349cc55cSDimitry Andric return true; 57349cc55cSDimitry Andric default: 58349cc55cSDimitry Andric return false; 59349cc55cSDimitry Andric } 60349cc55cSDimitry Andric } 61349cc55cSDimitry Andric default: 62349cc55cSDimitry Andric return false; 63349cc55cSDimitry Andric } 64349cc55cSDimitry Andric } 65349cc55cSDimitry Andric 66349cc55cSDimitry Andric /// \p returns true if the operation will definitely need to use a 64-bit 67349cc55cSDimitry Andric /// encoding, and thus will use a VOP3 encoding regardless of the source 68349cc55cSDimitry Andric /// modifiers. 69349cc55cSDimitry Andric LLVM_READONLY 70349cc55cSDimitry Andric static bool opMustUseVOP3Encoding(const MachineInstr &MI, 71349cc55cSDimitry Andric const MachineRegisterInfo &MRI) { 725f757f3fSDimitry Andric return MI.getNumOperands() > (isa<GIntrinsic>(MI) ? 4u : 3u) || 73349cc55cSDimitry Andric MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64; 74349cc55cSDimitry Andric } 75349cc55cSDimitry Andric 76349cc55cSDimitry Andric // Most FP instructions support source modifiers. 77349cc55cSDimitry Andric LLVM_READONLY 78349cc55cSDimitry Andric static bool hasSourceMods(const MachineInstr &MI) { 79349cc55cSDimitry Andric if (!MI.memoperands().empty()) 80349cc55cSDimitry Andric return false; 81349cc55cSDimitry Andric 82349cc55cSDimitry Andric switch (MI.getOpcode()) { 83349cc55cSDimitry Andric case AMDGPU::COPY: 84349cc55cSDimitry Andric case AMDGPU::G_SELECT: 85349cc55cSDimitry Andric case AMDGPU::G_FDIV: 86349cc55cSDimitry Andric case AMDGPU::G_FREM: 87349cc55cSDimitry Andric case TargetOpcode::INLINEASM: 88349cc55cSDimitry Andric case TargetOpcode::INLINEASM_BR: 89349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: 905f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: 91349cc55cSDimitry Andric case AMDGPU::G_BITCAST: 92349cc55cSDimitry Andric case AMDGPU::G_ANYEXT: 93349cc55cSDimitry Andric case AMDGPU::G_BUILD_VECTOR: 94349cc55cSDimitry Andric case AMDGPU::G_BUILD_VECTOR_TRUNC: 95349cc55cSDimitry Andric case AMDGPU::G_PHI: 96349cc55cSDimitry Andric return false; 975f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC: 985f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC_CONVERGENT: { 99*0fca6ea1SDimitry Andric Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID(); 100349cc55cSDimitry Andric switch (IntrinsicID) { 101349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p1: 102349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p2: 103349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_mov: 104349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p1_f16: 105349cc55cSDimitry Andric case Intrinsic::amdgcn_interp_p2_f16: 106349cc55cSDimitry Andric case Intrinsic::amdgcn_div_scale: 107349cc55cSDimitry Andric return false; 108349cc55cSDimitry Andric default: 109349cc55cSDimitry Andric return true; 110349cc55cSDimitry Andric } 111349cc55cSDimitry Andric } 112349cc55cSDimitry Andric default: 113349cc55cSDimitry Andric return true; 114349cc55cSDimitry Andric } 115349cc55cSDimitry Andric } 116349cc55cSDimitry Andric 117349cc55cSDimitry Andric static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, 118349cc55cSDimitry Andric unsigned CostThreshold = 4) { 119349cc55cSDimitry Andric // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus 120349cc55cSDimitry Andric // it is truly free to use a source modifier in all cases. If there are 121349cc55cSDimitry Andric // multiple users but for each one will necessitate using VOP3, there will be 122349cc55cSDimitry Andric // a code size increase. Try to avoid increasing code size unless we know it 123349cc55cSDimitry Andric // will save on the instruction count. 124349cc55cSDimitry Andric unsigned NumMayIncreaseSize = 0; 125349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 126349cc55cSDimitry Andric for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) { 127349cc55cSDimitry Andric if (!hasSourceMods(Use)) 128349cc55cSDimitry Andric return false; 129349cc55cSDimitry Andric 130349cc55cSDimitry Andric if (!opMustUseVOP3Encoding(Use, MRI)) { 131349cc55cSDimitry Andric if (++NumMayIncreaseSize > CostThreshold) 132349cc55cSDimitry Andric return false; 133349cc55cSDimitry Andric } 134349cc55cSDimitry Andric } 135349cc55cSDimitry Andric return true; 136349cc55cSDimitry Andric } 137349cc55cSDimitry Andric 138349cc55cSDimitry Andric static bool mayIgnoreSignedZero(MachineInstr &MI) { 139349cc55cSDimitry Andric const TargetOptions &Options = MI.getMF()->getTarget().Options; 140349cc55cSDimitry Andric return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz); 141349cc55cSDimitry Andric } 142349cc55cSDimitry Andric 143349cc55cSDimitry Andric static bool isInv2Pi(const APFloat &APF) { 144349cc55cSDimitry Andric static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118)); 145349cc55cSDimitry Andric static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983)); 146349cc55cSDimitry Andric static const APFloat KF64(APFloat::IEEEdouble(), 147349cc55cSDimitry Andric APInt(64, 0x3fc45f306dc9c882)); 148349cc55cSDimitry Andric 149349cc55cSDimitry Andric return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) || 150349cc55cSDimitry Andric APF.bitwiseIsEqual(KF64); 151349cc55cSDimitry Andric } 152349cc55cSDimitry Andric 153349cc55cSDimitry Andric // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an 154349cc55cSDimitry Andric // additional cost to negate them. 155349cc55cSDimitry Andric static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, 156349cc55cSDimitry Andric MachineRegisterInfo &MRI) { 157bdd1243dSDimitry Andric std::optional<FPValueAndVReg> FPValReg; 158349cc55cSDimitry Andric if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) { 159349cc55cSDimitry Andric if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative()) 160349cc55cSDimitry Andric return true; 161349cc55cSDimitry Andric 162349cc55cSDimitry Andric const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>(); 163349cc55cSDimitry Andric if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value)) 164349cc55cSDimitry Andric return true; 165349cc55cSDimitry Andric } 166349cc55cSDimitry Andric return false; 167349cc55cSDimitry Andric } 168349cc55cSDimitry Andric 169349cc55cSDimitry Andric static unsigned inverseMinMax(unsigned Opc) { 170349cc55cSDimitry Andric switch (Opc) { 171349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM: 172349cc55cSDimitry Andric return AMDGPU::G_FMINNUM; 173349cc55cSDimitry Andric case AMDGPU::G_FMINNUM: 174349cc55cSDimitry Andric return AMDGPU::G_FMAXNUM; 175349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE: 176349cc55cSDimitry Andric return AMDGPU::G_FMINNUM_IEEE; 177349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE: 178349cc55cSDimitry Andric return AMDGPU::G_FMAXNUM_IEEE; 1795f757f3fSDimitry Andric case AMDGPU::G_FMAXIMUM: 1805f757f3fSDimitry Andric return AMDGPU::G_FMINIMUM; 1815f757f3fSDimitry Andric case AMDGPU::G_FMINIMUM: 1825f757f3fSDimitry Andric return AMDGPU::G_FMAXIMUM; 183349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY: 184349cc55cSDimitry Andric return AMDGPU::G_AMDGPU_FMIN_LEGACY; 185349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY: 186349cc55cSDimitry Andric return AMDGPU::G_AMDGPU_FMAX_LEGACY; 187349cc55cSDimitry Andric default: 188349cc55cSDimitry Andric llvm_unreachable("invalid min/max opcode"); 189349cc55cSDimitry Andric } 190349cc55cSDimitry Andric } 191349cc55cSDimitry Andric 192349cc55cSDimitry Andric bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI, 193349cc55cSDimitry Andric MachineInstr *&MatchInfo) { 194349cc55cSDimitry Andric Register Src = MI.getOperand(1).getReg(); 195349cc55cSDimitry Andric MatchInfo = MRI.getVRegDef(Src); 196349cc55cSDimitry Andric 197349cc55cSDimitry Andric // If the input has multiple uses and we can either fold the negate down, or 198349cc55cSDimitry Andric // the other uses cannot, give up. This both prevents unprofitable 199349cc55cSDimitry Andric // transformations and infinite loops: we won't repeatedly try to fold around 200349cc55cSDimitry Andric // a negate that has no 'good' form. 201349cc55cSDimitry Andric if (MRI.hasOneNonDBGUse(Src)) { 202349cc55cSDimitry Andric if (allUsesHaveSourceMods(MI, MRI, 0)) 203349cc55cSDimitry Andric return false; 204349cc55cSDimitry Andric } else { 205349cc55cSDimitry Andric if (fnegFoldsIntoMI(*MatchInfo) && 206349cc55cSDimitry Andric (allUsesHaveSourceMods(MI, MRI) || 207349cc55cSDimitry Andric !allUsesHaveSourceMods(*MatchInfo, MRI))) 208349cc55cSDimitry Andric return false; 209349cc55cSDimitry Andric } 210349cc55cSDimitry Andric 211349cc55cSDimitry Andric switch (MatchInfo->getOpcode()) { 212349cc55cSDimitry Andric case AMDGPU::G_FMINNUM: 213349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM: 214349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE: 215349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE: 2165f757f3fSDimitry Andric case AMDGPU::G_FMINIMUM: 2175f757f3fSDimitry Andric case AMDGPU::G_FMAXIMUM: 218349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY: 219349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY: 220349cc55cSDimitry Andric // 0 doesn't have a negated inline immediate. 221349cc55cSDimitry Andric return !isConstantCostlierToNegate(*MatchInfo, 222349cc55cSDimitry Andric MatchInfo->getOperand(2).getReg(), MRI); 223349cc55cSDimitry Andric case AMDGPU::G_FADD: 224349cc55cSDimitry Andric case AMDGPU::G_FSUB: 225349cc55cSDimitry Andric case AMDGPU::G_FMA: 226349cc55cSDimitry Andric case AMDGPU::G_FMAD: 227349cc55cSDimitry Andric return mayIgnoreSignedZero(*MatchInfo); 228349cc55cSDimitry Andric case AMDGPU::G_FMUL: 229349cc55cSDimitry Andric case AMDGPU::G_FPEXT: 230349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_TRUNC: 231349cc55cSDimitry Andric case AMDGPU::G_FPTRUNC: 232349cc55cSDimitry Andric case AMDGPU::G_FRINT: 233349cc55cSDimitry Andric case AMDGPU::G_FNEARBYINT: 234349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUND: 235349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUNDEVEN: 236349cc55cSDimitry Andric case AMDGPU::G_FSIN: 237349cc55cSDimitry Andric case AMDGPU::G_FCANONICALIZE: 238349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_RCP_IFLAG: 239349cc55cSDimitry Andric return true; 2405f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC: 2415f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC_CONVERGENT: { 242*0fca6ea1SDimitry Andric Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID(); 243349cc55cSDimitry Andric switch (IntrinsicID) { 244349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp: 245349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp_legacy: 246349cc55cSDimitry Andric case Intrinsic::amdgcn_sin: 247349cc55cSDimitry Andric case Intrinsic::amdgcn_fmul_legacy: 248349cc55cSDimitry Andric case Intrinsic::amdgcn_fmed3: 249349cc55cSDimitry Andric return true; 250349cc55cSDimitry Andric case Intrinsic::amdgcn_fma_legacy: 251349cc55cSDimitry Andric return mayIgnoreSignedZero(*MatchInfo); 252349cc55cSDimitry Andric default: 253349cc55cSDimitry Andric return false; 254349cc55cSDimitry Andric } 255349cc55cSDimitry Andric } 256349cc55cSDimitry Andric default: 257349cc55cSDimitry Andric return false; 258349cc55cSDimitry Andric } 259349cc55cSDimitry Andric } 260349cc55cSDimitry Andric 261349cc55cSDimitry Andric void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI, 262349cc55cSDimitry Andric MachineInstr *&MatchInfo) { 263349cc55cSDimitry Andric // Transform: 264349cc55cSDimitry Andric // %A = inst %Op1, ... 265349cc55cSDimitry Andric // %B = fneg %A 266349cc55cSDimitry Andric // 267349cc55cSDimitry Andric // into: 268349cc55cSDimitry Andric // 269349cc55cSDimitry Andric // (if %A has one use, specifically fneg above) 270349cc55cSDimitry Andric // %B = inst (maybe fneg %Op1), ... 271349cc55cSDimitry Andric // 272349cc55cSDimitry Andric // (if %A has multiple uses) 273349cc55cSDimitry Andric // %B = inst (maybe fneg %Op1), ... 274349cc55cSDimitry Andric // %A = fneg %B 275349cc55cSDimitry Andric 276349cc55cSDimitry Andric // Replace register in operand with a register holding negated value. 277349cc55cSDimitry Andric auto NegateOperand = [&](MachineOperand &Op) { 278349cc55cSDimitry Andric Register Reg = Op.getReg(); 279349cc55cSDimitry Andric if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg)))) 280349cc55cSDimitry Andric Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0); 281349cc55cSDimitry Andric replaceRegOpWith(MRI, Op, Reg); 282349cc55cSDimitry Andric }; 283349cc55cSDimitry Andric 284349cc55cSDimitry Andric // Replace either register in operands with a register holding negated value. 285349cc55cSDimitry Andric auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) { 286349cc55cSDimitry Andric Register XReg = X.getReg(); 287349cc55cSDimitry Andric Register YReg = Y.getReg(); 288349cc55cSDimitry Andric if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg)))) 289349cc55cSDimitry Andric replaceRegOpWith(MRI, X, XReg); 290349cc55cSDimitry Andric else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg)))) 291349cc55cSDimitry Andric replaceRegOpWith(MRI, Y, YReg); 292349cc55cSDimitry Andric else { 293349cc55cSDimitry Andric YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0); 294349cc55cSDimitry Andric replaceRegOpWith(MRI, Y, YReg); 295349cc55cSDimitry Andric } 296349cc55cSDimitry Andric }; 297349cc55cSDimitry Andric 298349cc55cSDimitry Andric Builder.setInstrAndDebugLoc(*MatchInfo); 299349cc55cSDimitry Andric 300349cc55cSDimitry Andric // Negate appropriate operands so that resulting value of MatchInfo is 301349cc55cSDimitry Andric // negated. 302349cc55cSDimitry Andric switch (MatchInfo->getOpcode()) { 303349cc55cSDimitry Andric case AMDGPU::G_FADD: 304349cc55cSDimitry Andric case AMDGPU::G_FSUB: 305349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(1)); 306349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2)); 307349cc55cSDimitry Andric break; 308349cc55cSDimitry Andric case AMDGPU::G_FMUL: 309349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2)); 310349cc55cSDimitry Andric break; 311349cc55cSDimitry Andric case AMDGPU::G_FMINNUM: 312349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM: 313349cc55cSDimitry Andric case AMDGPU::G_FMINNUM_IEEE: 314349cc55cSDimitry Andric case AMDGPU::G_FMAXNUM_IEEE: 3155f757f3fSDimitry Andric case AMDGPU::G_FMINIMUM: 3165f757f3fSDimitry Andric case AMDGPU::G_FMAXIMUM: 317349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMIN_LEGACY: 318349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_FMAX_LEGACY: { 319349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(1)); 320349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2)); 321349cc55cSDimitry Andric unsigned Opposite = inverseMinMax(MatchInfo->getOpcode()); 322349cc55cSDimitry Andric replaceOpcodeWith(*MatchInfo, Opposite); 323349cc55cSDimitry Andric break; 324349cc55cSDimitry Andric } 325349cc55cSDimitry Andric case AMDGPU::G_FMA: 326349cc55cSDimitry Andric case AMDGPU::G_FMAD: 327349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2)); 328349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(3)); 329349cc55cSDimitry Andric break; 330349cc55cSDimitry Andric case AMDGPU::G_FPEXT: 331349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_TRUNC: 332349cc55cSDimitry Andric case AMDGPU::G_FRINT: 333349cc55cSDimitry Andric case AMDGPU::G_FNEARBYINT: 334349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUND: 335349cc55cSDimitry Andric case AMDGPU::G_INTRINSIC_ROUNDEVEN: 336349cc55cSDimitry Andric case AMDGPU::G_FSIN: 337349cc55cSDimitry Andric case AMDGPU::G_FCANONICALIZE: 338349cc55cSDimitry Andric case AMDGPU::G_AMDGPU_RCP_IFLAG: 339349cc55cSDimitry Andric case AMDGPU::G_FPTRUNC: 340349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(1)); 341349cc55cSDimitry Andric break; 3425f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC: 3435f757f3fSDimitry Andric case AMDGPU::G_INTRINSIC_CONVERGENT: { 344*0fca6ea1SDimitry Andric Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID(); 345349cc55cSDimitry Andric switch (IntrinsicID) { 346349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp: 347349cc55cSDimitry Andric case Intrinsic::amdgcn_rcp_legacy: 348349cc55cSDimitry Andric case Intrinsic::amdgcn_sin: 349349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2)); 350349cc55cSDimitry Andric break; 351349cc55cSDimitry Andric case Intrinsic::amdgcn_fmul_legacy: 352349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3)); 353349cc55cSDimitry Andric break; 354349cc55cSDimitry Andric case Intrinsic::amdgcn_fmed3: 355349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(2)); 356349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(3)); 357349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(4)); 358349cc55cSDimitry Andric break; 359349cc55cSDimitry Andric case Intrinsic::amdgcn_fma_legacy: 360349cc55cSDimitry Andric NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3)); 361349cc55cSDimitry Andric NegateOperand(MatchInfo->getOperand(4)); 362349cc55cSDimitry Andric break; 363349cc55cSDimitry Andric default: 364349cc55cSDimitry Andric llvm_unreachable("folding fneg not supported for this intrinsic"); 365349cc55cSDimitry Andric } 366349cc55cSDimitry Andric break; 367349cc55cSDimitry Andric } 368349cc55cSDimitry Andric default: 369349cc55cSDimitry Andric llvm_unreachable("folding fneg not supported for this instruction"); 370349cc55cSDimitry Andric } 371349cc55cSDimitry Andric 372349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 373349cc55cSDimitry Andric Register MatchInfoDst = MatchInfo->getOperand(0).getReg(); 374349cc55cSDimitry Andric 375349cc55cSDimitry Andric if (MRI.hasOneNonDBGUse(MatchInfoDst)) { 376349cc55cSDimitry Andric // MatchInfo now has negated value so use that instead of old Dst. 377349cc55cSDimitry Andric replaceRegWith(MRI, Dst, MatchInfoDst); 378349cc55cSDimitry Andric } else { 379349cc55cSDimitry Andric // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa 380349cc55cSDimitry Andric // but replaceRegWith will replace defs as well. It is easier to replace one 381349cc55cSDimitry Andric // def with a new register. 382349cc55cSDimitry Andric LLT Type = MRI.getType(Dst); 383349cc55cSDimitry Andric Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type); 384349cc55cSDimitry Andric replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo); 385349cc55cSDimitry Andric 386349cc55cSDimitry Andric // MatchInfo now has negated value so use that instead of old Dst. 387349cc55cSDimitry Andric replaceRegWith(MRI, Dst, NegatedMatchInfo); 388349cc55cSDimitry Andric 389349cc55cSDimitry Andric // Recreate non negated value for other uses of old MatchInfoDst 39081ad6265SDimitry Andric auto NextInst = ++MatchInfo->getIterator(); 39181ad6265SDimitry Andric Builder.setInstrAndDebugLoc(*NextInst); 392349cc55cSDimitry Andric Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags()); 393349cc55cSDimitry Andric } 394349cc55cSDimitry Andric 395349cc55cSDimitry Andric MI.eraseFromParent(); 396349cc55cSDimitry Andric } 39706c3fb27SDimitry Andric 39806c3fb27SDimitry Andric // TODO: Should return converted value / extension source and avoid introducing 39906c3fb27SDimitry Andric // intermediate fptruncs in the apply function. 40006c3fb27SDimitry Andric static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, 40106c3fb27SDimitry Andric Register Reg) { 40206c3fb27SDimitry Andric const MachineInstr *Def = MRI.getVRegDef(Reg); 40306c3fb27SDimitry Andric if (Def->getOpcode() == TargetOpcode::G_FPEXT) { 40406c3fb27SDimitry Andric Register SrcReg = Def->getOperand(1).getReg(); 40506c3fb27SDimitry Andric return MRI.getType(SrcReg) == LLT::scalar(16); 40606c3fb27SDimitry Andric } 40706c3fb27SDimitry Andric 40806c3fb27SDimitry Andric if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) { 40906c3fb27SDimitry Andric APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF(); 41006c3fb27SDimitry Andric bool LosesInfo = true; 41106c3fb27SDimitry Andric Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &LosesInfo); 41206c3fb27SDimitry Andric return !LosesInfo; 41306c3fb27SDimitry Andric } 41406c3fb27SDimitry Andric 41506c3fb27SDimitry Andric return false; 41606c3fb27SDimitry Andric } 41706c3fb27SDimitry Andric 41806c3fb27SDimitry Andric bool AMDGPUCombinerHelper::matchExpandPromotedF16FMed3(MachineInstr &MI, 41906c3fb27SDimitry Andric Register Src0, 42006c3fb27SDimitry Andric Register Src1, 42106c3fb27SDimitry Andric Register Src2) { 42206c3fb27SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC); 42306c3fb27SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 42406c3fb27SDimitry Andric if (!MRI.hasOneNonDBGUse(SrcReg) || MRI.getType(SrcReg) != LLT::scalar(32)) 42506c3fb27SDimitry Andric return false; 42606c3fb27SDimitry Andric 42706c3fb27SDimitry Andric return isFPExtFromF16OrConst(MRI, Src0) && isFPExtFromF16OrConst(MRI, Src1) && 42806c3fb27SDimitry Andric isFPExtFromF16OrConst(MRI, Src2); 42906c3fb27SDimitry Andric } 43006c3fb27SDimitry Andric 43106c3fb27SDimitry Andric void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI, 43206c3fb27SDimitry Andric Register Src0, 43306c3fb27SDimitry Andric Register Src1, 43406c3fb27SDimitry Andric Register Src2) { 43506c3fb27SDimitry Andric // We expect fptrunc (fpext x) to fold out, and to constant fold any constant 43606c3fb27SDimitry Andric // sources. 43706c3fb27SDimitry Andric Src0 = Builder.buildFPTrunc(LLT::scalar(16), Src0).getReg(0); 43806c3fb27SDimitry Andric Src1 = Builder.buildFPTrunc(LLT::scalar(16), Src1).getReg(0); 43906c3fb27SDimitry Andric Src2 = Builder.buildFPTrunc(LLT::scalar(16), Src2).getReg(0); 44006c3fb27SDimitry Andric 44106c3fb27SDimitry Andric LLT Ty = MRI.getType(Src0); 44206c3fb27SDimitry Andric auto A1 = Builder.buildFMinNumIEEE(Ty, Src0, Src1); 44306c3fb27SDimitry Andric auto B1 = Builder.buildFMaxNumIEEE(Ty, Src0, Src1); 44406c3fb27SDimitry Andric auto C1 = Builder.buildFMaxNumIEEE(Ty, A1, Src2); 44506c3fb27SDimitry Andric Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1); 44606c3fb27SDimitry Andric MI.eraseFromParent(); 44706c3fb27SDimitry Andric } 448