Lines Matching defs:SDValue

1440     const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
1963 SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
1965 SDValue Chain,
1985 SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
1991 SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
1998 SDValue SITargetLowering::getLDSKernelId(SelectionDAG &DAG,
2006 return SDValue();
2009 SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
2010 const SDLoc &SL, SDValue Val,
2040 SDValue SITargetLowering::lowerKernargMemParameter(
2041 SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain,
2058 SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, AlignDownOffset);
2059 SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr, PtrInfo, Align(4),
2063 SDValue ShiftAmt = DAG.getConstant(OffsetDiff * 8, SL, MVT::i32);
2064 SDValue Extract = DAG.getNode(ISD::SRL, SL, MVT::i32, Load, ShiftAmt);
2066 SDValue ArgVal = DAG.getNode(ISD::TRUNCATE, SL, IntVT, Extract);
2074 SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
2075 SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Alignment,
2079 SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
2083 SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
2084 const SDLoc &SL, SDValue Chain,
2101 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
2102 SDValue ArgValue;
2132 SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
2795 SDValue SITargetLowering::LowerFormalArguments(
2796 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2798 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2908 SmallVector<SDValue, 16> Chains;
2935 SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, Chain, Offset);
2949 SDValue NewArg;
2965 SDValue Copy = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i32);
2967 SDValue ShiftAmt = DAG.getConstant(OffsetDiff * 8, DL, MVT::i32);
2968 SDValue Extract = DAG.getNode(ISD::SRL, DL, MVT::i32, Copy, ShiftAmt);
2970 SDValue ArgVal = DAG.getNode(ISD::TRUNCATE, DL, IntVT, Extract);
2983 SDValue Copy;
2997 SmallVector<SDValue, 4> Elts;
3049 SDValue Val = lowerStackParameter(DAG, VA, DL, Chain, Arg);
3069 SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
3158 SDValue
3159 SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3162 const SmallVectorImpl<SDValue> &OutVals,
3188 SDValue Glue;
3189 SmallVector<SDValue, 48> RetOps;
3198 SDValue Arg = OutVals[RealRVLocIdx];
3253 SDValue SITargetLowering::LowerCallResult(
3254 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
3256 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool IsThisReturn,
3257 SDValue ThisVal) const {
3268 SDValue Val;
3314 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
3315 SmallVectorImpl<SDValue> &MemOpChains,
3316 SDValue Chain) const {
3381 SDValue InputReg;
3410 SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
3440 SDValue InputReg;
3459 SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
3468 SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
3501 SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
3524 SDValue Callee, CallingConv::ID CalleeCC, bool IsVarArg,
3526 const SmallVectorImpl<SDValue> &OutVals,
3613 SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
3614 SmallVectorImpl<SDValue> &InVals) const {
3647 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3649 SDValue Chain = CLI.Chain;
3650 SDValue Callee = CLI.Callee;
3699 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3700 SmallVector<SDValue, 8> MemOpChains;
3738 SmallVector<SDValue, 4> CopyFromChains;
3741 SDValue ScratchRSrcReg
3757 SDValue Arg = OutVals[i];
3787 SDValue DstAddr;
3793 SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT);
3823 SDValue SP = DAG.getCopyFromReg(Chain, DL, Info->getStackPtrOffsetReg(),
3832 SDValue SizeNode =
3834 SDValue Cpy =
3843 SDValue Store =
3855 SDValue InGlue;
3872 std::vector<SDValue> Ops;
3907 if (SDValue Token = CLI.ConvergenceControlToken) {
3908 SmallVector<SDValue, 2> GlueOps;
3913 InGlue = SDValue(DAG.getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, DL,
3942 SDValue Call = DAG.getNode(AMDGPUISD::CALL, DL, NodeTys, Ops);
3954 InVals, /*IsThisReturn=*/false, SDValue());
3959 SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(
3960 SDValue Op, SelectionDAG &DAG) const {
3966 SDValue Tmp1 = Op;
3967 SDValue Tmp2 = Op.getValue(1);
3968 SDValue Tmp3 = Op.getOperand(2);
3969 SDValue Chain = Tmp1.getOperand(0);
3977 SDValue Size = Tmp2.getOperand(1);
3978 SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
3986 SDValue ScaledSize = DAG.getNode(
4000 Tmp2 = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
4005 SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4012 SDValue Size = Op.getOperand(1);
4019 SDValue SITargetLowering::LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const {
4026 SDValue CopyFromSP = DAG.getCopyFromReg(Op->getOperand(0), SL, SP, MVT::i32);
4031 SDValue VectorAddress =
4036 SDValue SITargetLowering::lowerGET_ROUNDING(SDValue Op,
4043 SDValue GetRoundBothImm = DAG.getTargetConstant(BothRoundHwReg, SL, MVT::i32);
4045 SDValue IntrinID =
4047 SDValue GetReg = DAG.getNode(ISD::INTRINSIC_W_CHAIN, SL, Op->getVTList(),
4078 SDValue BitTable =
4081 SDValue Two = DAG.getConstant(2, SL, MVT::i32);
4082 SDValue RoundModeTimesNumBits =
4087 SDValue TableValue =
4089 SDValue TruncTable = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, TableValue);
4091 SDValue EntryMask = DAG.getConstant(0xf, SL, MVT::i32);
4092 SDValue TableEntry =
4097 SDValue Four = DAG.getConstant(4, SL, MVT::i32);
4098 SDValue IsStandardValue =
4100 SDValue EnumOffset = DAG.getNode(ISD::ADD, SL, MVT::i32, TableEntry, Four);
4101 SDValue Result = DAG.getNode(ISD::SELECT, SL, MVT::i32, IsStandardValue,
4107 SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
4111 SDValue NewMode = Op.getOperand(1);
4132 SDValue BitTable = DAG.getConstant(
4135 SDValue Two = DAG.getConstant(2, SL, MVT::i32);
4136 SDValue RoundModeTimesNumBits =
4147 SDValue BitTable =
4150 SDValue Four = DAG.getConstant(4, SL, MVT::i32);
4151 SDValue OffsetEnum = DAG.getNode(ISD::SUB, SL, MVT::i32, NewMode, Four);
4152 SDValue IndexVal =
4155 SDValue Two = DAG.getConstant(2, SL, MVT::i32);
4156 SDValue RoundModeTimesNumBits =
4159 SDValue TableValue =
4161 SDValue TruncTable = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, TableValue);
4171 SDValue ReadFirstLaneID =
4179 SDValue IntrinID =
4183 SDValue RoundBothImm = DAG.getTargetConstant(BothRoundHwReg, SL, MVT::i32);
4185 SDValue SetReg =
4192 SDValue SITargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const {
4194 return SDValue();
4203 return SDValue();
4210 SDValue SITargetLowering::lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
4212 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
4219 SDValue BitCast =
4229 SDValue SITargetLowering::lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const {
4236 SDValue ModeHwRegImm = DAG.getTargetConstant(ModeHwReg, SL, MVT::i32);
4239 SDValue TrapHwRegImm = DAG.getTargetConstant(TrapHwReg, SL, MVT::i32);
4242 SDValue IntrinID =
4244 SDValue GetModeReg = DAG.getNode(ISD::INTRINSIC_W_CHAIN, SL, VTList,
4246 SDValue GetTrapReg = DAG.getNode(ISD::INTRINSIC_W_CHAIN, SL, VTList,
4248 SDValue TokenReg =
4252 SDValue CvtPtr =
4254 SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr);
4259 SDValue SITargetLowering::lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const {
4264 SDValue Input = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op.getOperand(1));
4265 SDValue NewModeReg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Input,
4267 SDValue NewTrapReg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Input,
4270 SDValue ReadFirstLaneID =
4279 SDValue ModeHwRegImm = DAG.getTargetConstant(ModeHwReg, SL, MVT::i32);
4282 SDValue TrapHwRegImm = DAG.getTargetConstant(TrapHwReg, SL, MVT::i32);
4284 SDValue IntrinID =
4286 SDValue SetModeReg =
4289 SDValue SetTrapReg =
5672 SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
5681 SDValue Lo, Hi;
5685 SDValue OpLo = DAG.getNode(Opc, SL, Lo.getValueType(), Lo,
5687 SDValue OpHi = DAG.getNode(Opc, SL, Hi.getValueType(), Hi,
5695 SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
5704 SDValue Lo0, Hi0;
5706 SDValue Lo1, Hi1;
5711 SDValue OpLo = DAG.getNode(Opc, SL, Lo0.getValueType(), Lo0, Lo1,
5713 SDValue OpHi = DAG.getNode(Opc, SL, Hi0.getValueType(), Hi0, Hi1,
5719 SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
5730 SDValue Lo0, Hi0;
5731 SDValue Op0 = Op.getOperand(0);
5735 SDValue Lo1, Hi1;
5737 SDValue Lo2, Hi2;
5743 SDValue OpLo = DAG.getNode(Opc, SL, ResVT.first, Lo0, Lo1, Lo2,
5745 SDValue OpHi = DAG.getNode(Opc, SL, ResVT.second, Hi0, Hi1, Hi2,
5752 SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
5758 SDValue Result = LowerLOAD(Op, DAG);
5770 return SDValue();
5809 return SDValue();
5818 return SDValue();
5889 return SDValue();
5894 static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT,
5916 SmallVector<SDValue, 4> Elts;
5918 for (SDValue &Elt : Elts)
5935 SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
5938 ArrayRef<SDValue> Ops,
5961 SDValue Load
5967 SDValue Adjusted = adjustLoadValueTypeImpl(Load, LoadVT, DL, DAG, Unpacked);
5972 SDValue SITargetLowering::lowerIntrinsicLoad(MemSDNode *M, bool IsFormat,
5974 ArrayRef<SDValue> Ops) const {
6006 SDValue MemNode = getMemIntrinsicNode(Opc, DL, VTList, Ops, CastVT,
6013 static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
6022 SDValue LHS = N->getOperand(1);
6023 SDValue RHS = N->getOperand(2);
6040 SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, DL, CCVT, LHS, RHS,
6047 static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
6055 SDValue Src0 = N->getOperand(1);
6056 SDValue Src1 = N->getOperand(2);
6069 SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, SL, CCVT, Src0,
6076 static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
6079 SDValue Src = N->getOperand(1);
6100 return SDValue();
6113 static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
6123 auto createLaneOp = [&DAG, &SL, N, IID](SDValue Src0, SDValue Src1,
6124 SDValue Src2, MVT ValT) -> SDValue {
6125 SmallVector<SDValue, 8> Operands;
6154 SDValue(GL, 0)));
6160 SDValue Src0 = N->getOperand(1);
6161 SDValue Src1, Src2;
6171 return SDValue();
6189 SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32);
6190 SDValue Trunc = DAG.getAnyExtOrTrunc(LaneOp, SL, IntVT);
6195 return SDValue();
6197 auto unrollLaneOp = [&DAG, &SL](SDNode *N) -> SDValue {
6201 SmallVector<SDValue, 8> Scalars;
6203 SmallVector<SDValue, 4> Operands(NumOperands);
6212 SDValue Operand = N->getOperand(j);
6228 SDValue(GL->getOperand(0).getNode(), 0));
6242 SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VT.getSimpleVT());
6249 SmallVector<SDValue, 4> Pieces;
6250 SDValue Src0SubVec, Src1SubVec, Src2SubVec;
6286 SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VecVT);
6287 SDValue UnrolledLaneOp = unrollLaneOp(LaneOp.getNode());
6292 SmallVectorImpl<SDValue> &Results,
6296 if (SDValue Res = lowerINSERT_VECTOR_ELT(SDValue(N, 0), DAG))
6301 if (SDValue Res = lowerEXTRACT_VECTOR_ELT(SDValue(N, 0), DAG))
6312 SDValue Src0 = N->getOperand(1);
6313 SDValue Src1 = N->getOperand(2);
6315 SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, SL, MVT::i32,
6324 SDValue Src0 = N->getOperand(1);
6325 SDValue Src1 = N->getOperand(2);
6342 SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
6355 SDValue Op = SDValue(N, 0);
6356 SDValue Rsrc = Op.getOperand(1);
6357 SDValue Offset = Op.getOperand(2);
6358 SDValue CachePolicy = Op.getOperand(3);
6371 SDValue LoadVal;
6373 SDValue Ops[] = {Rsrc, // source register
6375 SDValue BufferLoad =
6380 SDValue Ops[] = {
6400 if (SDValue Res = LowerINTRINSIC_W_CHAIN(SDValue(N, 0), DAG)) {
6419 SDValue LHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(1));
6420 SDValue RHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(2));
6429 SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, SelectVT,
6442 SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::i32, N->getOperand(0));
6444 SDValue Op = DAG.getNode(ISD::XOR, SL, MVT::i32,
6455 SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::i32, N->getOperand(0));
6457 SDValue Op = DAG.getNode(ISD::AND, SL, MVT::i32,
6466 Results.push_back(lowerFSQRTF16(SDValue(N, 0), DAG));
6476 static SDNode *findUser(SDValue Value, unsigned Opcode) {
6544 SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
6549 SDValue Target = BRCOND.getOperand(2);
6580 SmallVector<SDValue, 4> Ops;
6593 SDValue Ops[] = {
6594 SDValue(Result, 0),
6603 SDValue Ops[] = {
6607 SDValue NewBR = DAG.getNode(ISD::BR, DL, BR->getVTList(), Ops);
6611 SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
6615 SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
6622 SDValue(Result, i - 1),
6623 SDValue());
6625 DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
6630 SDValue(Intr, Intr->getNumValues() - 1),
6636 SDValue SITargetLowering::LowerRETURNADDR(SDValue Op,
6661 SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
6662 SDValue Op,
6671 SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
6675 SDValue Src = Op.getOperand(0);
6686 SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src);
6687 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16);
6691 SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
6711 SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const {
6716 SDValue Exp = Op.getOperand(IsStrict ? 2 : 1);
6729 SDValue MinExp = DAG.getConstant(minIntN(16), DL, ExpVT);
6730 SDValue ClampMin = DAG.getNode(ISD::SMAX, DL, ExpVT, Exp, MinExp);
6732 SDValue MaxExp = DAG.getConstant(maxIntN(16), DL, ExpVT);
6733 SDValue Clamp = DAG.getNode(ISD::SMIN, DL, ExpVT, ClampMin, MaxExp);
6735 SDValue TruncExp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Clamp);
6746 SDValue SITargetLowering::lowerMUL(SDValue Op, SelectionDAG &DAG) const {
6781 return SDValue();
6783 SDValue Op0 = Op.getOperand(0);
6784 SDValue Op1 = Op.getOperand(1);
6794 return SDValue(
6799 return SDValue(
6805 SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const {
6808 SDValue LHS = Op.getOperand(0);
6809 SDValue RHS = Op.getOperand(1);
6818 SDValue ShiftAmt = DAG.getConstant(C.logBase2(), SL, MVT::i32);
6819 SDValue Result = DAG.getNode(ISD::SHL, SL, VT, LHS, ShiftAmt);
6820 SDValue Overflow = DAG.getSetCC(SL, MVT::i1,
6828 SDValue Result = DAG.getNode(ISD::MUL, SL, VT, LHS, RHS);
6829 SDValue Top = DAG.getNode(isSigned ? ISD::MULHS : ISD::MULHU,
6832 SDValue Sign = isSigned
6836 SDValue Overflow = DAG.getSetCC(SL, MVT::i1, Top, Sign, ISD::SETNE);
6841 SDValue SITargetLowering::lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const {
6848 return SDValue();
6856 SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
6865 SDValue SITargetLowering::lowerTrapEndpgm(
6866 SDValue Op, SelectionDAG &DAG) const {
6868 SDValue Chain = Op.getOperand(0);
6872 SDValue SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT,
6876 SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), Offset);
6883 SDValue SITargetLowering::lowerTrapHsaQueuePtr(
6884 SDValue Op, SelectionDAG &DAG) const {
6886 SDValue Chain = Op.getOperand(0);
6888 SDValue QueuePtr;
6910 SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
6911 SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
6912 QueuePtr, SDValue());
6915 SDValue Ops[] = {
6924 SDValue SITargetLowering::lowerTrapHsa(
6925 SDValue Op, SelectionDAG &DAG) const {
6927 SDValue Chain = Op.getOperand(0);
6935 SDValue Ops[] = {
6942 SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
6944 SDValue Chain = Op.getOperand(0);
6959 SDValue Ops[] = {
6966 SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
6994 {SDValue(Mov, 0), DAG.getConstant(32, DL, MVT::i64)}));
7015 SDValue QueuePtr = CreateLiveInRegister(
7022 SDValue Ptr =
7037 static bool isKnownNonNull(SDValue Val, SelectionDAG &DAG,
7051 SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
7059 SDValue Src;
7075 SDValue FlatNullPtr = DAG.getConstant(0, SL, MVT::i64);
7081 SDValue Ptr = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
7087 SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
7088 SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
7100 SDValue Aperture = getSegmentAperture(SrcAS, SL, DAG);
7101 SDValue CvtPtr =
7109 SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
7111 SDValue NonNull
7123 SDValue Hi = DAG.getConstant(Info->get32BitAddressHighBits(), SL, MVT::i32);
7124 SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Hi);
7147 SDValue SITargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
7149 SDValue Vec = Op.getOperand(0);
7150 SDValue Ins = Op.getOperand(1);
7151 SDValue Idx = Op.getOperand(2);
7174 SDValue Elt;
7189 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Ins,
7197 SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
7199 SDValue Vec = Op.getOperand(0);
7200 SDValue InsVal = Op.getOperand(1);
7201 SDValue Idx = Op.getOperand(2);
7212 SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec);
7214 SDValue LoHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BCVec,
7216 SDValue HiHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BCVec,
7219 SDValue LoVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, LoHalf);
7220 SDValue HiVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, HiHalf);
7224 SDValue InsHalf = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, MVT::v2i16,
7231 SDValue Concat = InsertLo ?
7241 return SDValue();
7253 SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);
7254 SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, ScaleFactor);
7255 SDValue BFM = DAG.getNode(ISD::SHL, SL, IntVT,
7259 SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT,
7263 SDValue LHS = DAG.getNode(ISD::AND, SL, IntVT, BFM, ExtVal);
7266 SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec);
7267 SDValue RHS = DAG.getNode(ISD::AND, SL, IntVT,
7271 SDValue BFI = DAG.getNode(ISD::OR, SL, IntVT, LHS, RHS);
7276 SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
7281 SDValue Vec = Op.getOperand(0);
7282 SDValue Idx = Op.getOperand(1);
7293 if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI))
7297 SDValue Lo, Hi;
7302 SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec);
7310 SDValue V2 = DAG.getBitcast(MVT::v4i64, Vec);
7311 SDValue Parts[4];
7324 SDValue V2 = DAG.getBitcast(MVT::v8i64, Vec);
7325 SDValue Parts[8];
7342 SDValue IdxMask = DAG.getConstant(NElem / 2 - 1, SL, IdxVT);
7343 SDValue NewIdx = DAG.getNode(ISD::AND, SL, IdxVT, Idx, IdxMask);
7344 SDValue Half = DAG.getSelectCC(SL, Idx, IdxMask, Hi, Lo, ISD::SETUGT);
7353 SDValue VecBC = peekThroughBitcasts(Vec);
7355 SDValue Src = VecBC.getOperand(0);
7363 SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);
7366 SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, ScaleFactor);
7368 SDValue BC = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec);
7369 SDValue Elt = DAG.getNode(ISD::SRL, SL, IntVT, BC, ScaledIdx);
7372 SDValue Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i16, Elt);
7384 SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
7404 SmallVector<SDValue, 4> Pieces;
7410 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL,
7422 SDValue Vec0 = SVN->getOperand(VecIdx0);
7423 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
7426 SDValue Vec1 = SVN->getOperand(VecIdx1);
7427 SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
7436 SDValue SITargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
7438 SDValue SVal = Op.getOperand(0);
7441 SDValue UndefVal = DAG.getUNDEF(SValVT);
7444 SmallVector<SDValue, 8> VElts;
7452 SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
7465 SmallVector<SDValue, 4> LoOps, HiOps;
7470 SDValue Lo = DAG.getBuildVector(HalfVT, SL, LoOps);
7471 SDValue Hi = DAG.getBuildVector(HalfVT, SL, HiOps);
7473 SDValue CastLo = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Lo);
7474 SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Hi);
7476 SDValue Blend = DAG.getBuildVector(MVT::getVectorVT(HalfIntVT, 2), SL,
7486 SmallVector<SDValue, 4> Parts[4];
7491 SDValue Casts[4];
7493 SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]);
7497 SDValue Blend =
7507 SmallVector<SDValue, 8> Parts[8];
7512 SDValue Casts[8];
7514 SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]);
7518 SDValue Blend =
7526 SDValue Lo = Op.getOperand(0);
7527 SDValue Hi = Op.getOperand(1);
7532 SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
7539 SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
7547 SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
7571 static SDValue
7599 SDValue PtrLo = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset, GAFlags);
7600 SDValue PtrHi;
7608 SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
7609 SDValue Op,
7634 return SDValue(
7642 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, GSD->getOffset(),
7648 SDValue AddrLo = DAG.getTargetGlobalAddress(
7652 SDValue AddrHi = DAG.getTargetGlobalAddress(
7666 SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT,
7681 SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain,
7682 const SDLoc &DL, SDValue V) const {
7691 // A Null SDValue creates a glue result.
7694 return SDValue(M0, 0);
7697 SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
7698 SDValue Op,
7702 SDValue Param = lowerKernargMemParameter(
7709 static SDValue emitNonHSAIntrinsicError(SelectionDAG &DAG, const SDLoc &DL,
7718 static SDValue emitRemovedIntrinsicError(SelectionDAG &DAG, const SDLoc &DL,
7727 static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
7728 ArrayRef<SDValue> Elts) {
7741 SmallVector<SDValue, 16> VecElts(NumElts);
7743 SDValue Elt = Elts[i];
7756 static SDValue padEltsToUndef(SelectionDAG &DAG, const SDLoc &DL, EVT CastVT,
7757 SDValue Src, int ExtraElts) {
7760 SmallVector<SDValue, 8> Elts;
7767 SDValue Undef = DAG.getUNDEF(SrcVT.getScalarType());
7777 static SDValue constructRetValue(SelectionDAG &DAG, MachineSDNode *Result,
7797 SDValue Data(Result, 0);
7798 SDValue TexFail;
7801 SDValue ZeroIdx = DAG.getConstant(0, DL, MVT::i32);
7804 SDValue(Result, 0), ZeroIdx);
7807 SDValue(Result, 0), ZeroIdx);
7837 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, SDValue(Result, 0),
7840 return DAG.getMergeValues({Data, TexFail, SDValue(Result, 1)}, DL);
7846 return DAG.getMergeValues({Data, SDValue(Result, 1)}, DL);
7849 static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
7850 SDValue *LWE, bool &IsTexFail) {
7867 static void packImage16bitOpsToDwords(SelectionDAG &DAG, SDValue Op,
7869 SmallVectorImpl<SDValue> &PackedAddrs,
7874 SDValue Addr = Op.getOperand(I);
7896 SDValue SITargetLowering::lowerImage(SDValue Op,
7915 SDValue VData;
7935 SDValue VData2 = Op.getOperand(3);
7995 SmallVector<SDValue, 4> VAddrs;
8015 SDValue Bias = DAG.getBuildVector(
8101 SDValue VAddr;
8110 SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
8111 SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
8112 SDValue Unorm;
8122 SDValue TFE;
8123 SDValue LWE;
8124 SDValue TexFail = Op.getOperand(ArgOffset + Intr->TexFailCtrlIndex);
8147 SDValue Undef = DAG.getUNDEF(Op.getValueType());
8173 SmallVector<SDValue, 26> Ops;
8254 SmallVector<SDValue, 1> Elt;
8255 DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1);
8256 return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL);
8259 return SDValue(NewNode, 0);
8265 SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
8266 SDValue Offset, SDValue CachePolicy,
8281 SDValue Ops[] = {Rsrc, Offset, CachePolicy};
8288 SDValue BufferLoad =
8313 SDValue Ops[] = {
8328 SmallVector<SDValue, 4> Loads;
8360 SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const {
8366 SDValue TTMP8 = DAG.getCopyFromReg(DAG.getEntryNode(), SL, AMDGPU::TTMP8, VT);
8371 SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
8380 SDValue Val = loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
8396 SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8453 return SDValue();
8466 SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
8467 SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
8584 return SDValue();
8608 SDValue Numerator = Op.getOperand(1);
8609 SDValue Denominator = Op.getOperand(2);
8616 SDValue Src0 = Param->isAllOnes() ? Numerator : Denominator;
8675 SDValue Node = DAG.getNode(Opcode, DL, MVT::i32,
8684 return SDValue(DAG.getMachineNode(AMDGPU::SI_IF_BREAK, DL, VT,
8695 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
8704 SDValue Aperture = getSegmentAperture(AS, SL, DAG);
8705 SDValue SrcVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32,
8708 SDValue SrcHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, SrcVec,
8721 SDValue GA = DAG.getTargetGlobalAddress(RelocSymbol, DL, MVT::i32, 0,
8734 return SDValue();
8746 return SDValue();
8775 static SDValue selectSOffset(SDValue SOffset, SelectionDAG &DAG,
8782 SDValue SITargetLowering::lowerRawBufferAtomicIntrin(SDValue Op,
8787 SDValue VData = Op.getOperand(2);
8788 SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
8791 SDValue Ops[] = {
8810 SDValue
8811 SITargetLowering::lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
8815 SDValue VData = Op.getOperand(2);
8816 SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
8819 SDValue Ops[] = {
8838 SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
8847 SDValue Chain = M->getOperand(0);
8848 SDValue M0 = M->getOperand(2);
8849 SDValue Value = M->getOperand(3);
8888 SDValue Ops[] = {
8908 SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
8911 SDValue Ops[] = {
8933 SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
8936 SDValue Ops[] = {
8953 SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
8957 SDValue Ops[] = {
8980 SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
8984 SDValue Ops[] = {
9107 SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(4), DAG);
9110 SDValue Ops[] = {
9130 SDValue Rsrc = bufferRsrcPtrToVector(Op->getOperand(4), DAG);
9133 SDValue Ops[] = {
9153 SDValue NodePtr = M->getOperand(2);
9154 SDValue RayExtent = M->getOperand(3);
9155 SDValue RayOrigin = M->getOperand(4);
9156 SDValue RayDir = M->getOperand(5);
9157 SDValue RayInvDir = M->getOperand(6);
9158 SDValue TDescr = M->getOperand(7);
9167 return SDValue();
9201 SmallVector<SDValue, 16> Ops;
9203 auto packLanes = [&DAG, &Ops, &DL] (SDValue Op, bool IsAligned) {
9204 SmallVector<SDValue, 3> Lanes;
9217 SDValue Elt0 = Ops.pop_back_val();
9235 SmallVector<SDValue, 3> DirLanes, InvDirLanes, MergedLanes;
9264 SDValue Undef = DAG.getUNDEF(MVT::i32);
9268 SDValue MergedOps = DAG.getBuildVector(
9281 return SDValue(NewNode, 0);
9292 SDValue Ops[] = {
9320 SDValue Chain = Op->getOperand(0);
9321 SmallVector<SDValue, 2> Ops;
9333 SDValue K = DAG.getTargetConstant(BarID, DL, MVT::i32);
9337 SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(2));
9342 return SDValue(NewMI, 0);
9350 return SDValue();
9356 SDValue SITargetLowering::getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL,
9358 ArrayRef<SDValue> Ops, EVT MemVT,
9374 SDValue Op = getMemIntrinsicNode(Opcode, DL, OpDWordsVTList, Ops,
9376 SDValue Status = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Op,
9378 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
9379 SDValue ValueDWords =
9385 SDValue Value = DAG.getNode(ISD::BITCAST, DL, VT, ValueDWords);
9386 return DAG.getMergeValues({Value, Status, SDValue(Op.getNode(), 1)}, DL);
9395 SDValue Op = DAG.getMemIntrinsicNode(Opcode, DL, WidenedVTList, Ops,
9397 SDValue Value = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Op,
9399 return DAG.getMergeValues({Value, SDValue(Op.getNode(), 1)}, DL);
9405 SDValue SITargetLowering::handleD16VData(SDValue VData, SelectionDAG &DAG,
9419 SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);
9423 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, EquivStoreVT, IntVData);
9433 SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);
9436 SmallVector<SDValue, 4> Elts;
9440 SmallVector<SDValue, 4> PackedElts;
9442 SDValue Pair =
9444 SDValue IntPair = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Pair);
9450 SDValue Pair = DAG.getBuildVector(MVT::v2i16, DL,
9452 SDValue IntPair = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Pair);
9468 SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);
9474 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenedIntVT, IntVData);
9482 SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9485 SDValue Chain = Op.getOperand(0);
9497 SDValue Src0 = Op.getOperand(4);
9498 SDValue Src1 = Op.getOperand(5);
9501 return SDValue();
9504 SDValue Undef = DAG.getUNDEF(MVT::f32);
9505 const SDValue Ops[] = {
9518 return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
9525 return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
9531 SDValue K =
9533 SDValue BarSignal =
9534 SDValue(DAG.getMachineNode(AMDGPU::S_BARRIER_SIGNAL_IMM, DL,
9537 SDValue BarWait =
9538 SDValue(DAG.getMachineNode(AMDGPU::S_BARRIER_WAIT, DL, MVT::Other, K,
9544 return SDValue();
9549 SDValue VData = Op.getOperand(2);
9553 SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
9556 SDValue Ops[] = {
9577 SDValue VData = Op.getOperand(2);
9581 SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
9584 SDValue Ops[] = {
9611 SDValue VData = Op.getOperand(2);
9626 SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
9629 SDValue Ops[] = {
9661 SDValue VData = Op.getOperand(2);
9680 SDValue Ops[] = {
9714 SDValue VOffset = Op.getOperand(5 + OpOffset);
9720 return SDValue();
9741 SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3));
9743 SmallVector<SDValue, 8> Ops;
9754 SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
9791 return SDValue(Load, 0);
9798 return SDValue();
9811 SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3));
9813 SmallVector<SDValue, 6> Ops;
9815 SDValue Addr = Op.getOperand(2); // Global ptr
9816 SDValue VOffset;
9820 SDValue LHS = Addr.getOperand(0);
9821 SDValue RHS = Addr.getOperand(1);
9838 VOffset = SDValue(
9869 return SDValue(Load, 0);
9872 return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
9877 SDValue Chain = Op->getOperand(0);
9878 SmallVector<SDValue, 2> Ops;
9879 SDValue BarOp = Op->getOperand(2);
9892 return SDValue();
9904 SDValue K = DAG.getTargetConstant(BarVal, DL, MVT::i32);
9909 return SDValue();
9923 SDValue M0Val;
9932 M0Val = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32,
9942 return SDValue(NewMI, 0);
9960 std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
9961 SDValue Offset, SelectionDAG &DAG) const {
9964 SDValue N0 = Offset;
9968 N0 = SDValue();
9996 SDValue Ops[] = { N0, OverflowVal };
10005 return {N0, SDValue(C1, 0)};
10009 // the three offsets (voffset, soffset and instoffset) into the SDValue[3] array
10011 void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
10012 SelectionDAG &DAG, SDValue *Offsets,
10027 SDValue N0 = CombinedOffset.getOperand(0);
10028 SDValue N1 = CombinedOffset.getOperand(1);
10040 SDValue SOffsetZero = Subtarget->hasRestrictedSOffset()
10049 SDValue SITargetLowering::bufferRsrcPtrToVector(SDValue MaybePointer,
10056 SDValue Rsrc = DAG.getBitcast(MVT::v4i32, MaybePointer);
10062 SDValue SITargetLowering::lowerPointerAsRsrcIntrin(SDNode *Op,
10066 SDValue Pointer = Op->getOperand(1);
10067 SDValue Stride = Op->getOperand(2);
10068 SDValue NumRecords = Op->getOperand(3);
10069 SDValue Flags = Op->getOperand(4);
10072 SDValue Mask = DAG.getConstant(0x0000ffff, Loc, MVT::i32);
10073 SDValue Masked = DAG.getNode(ISD::AND, Loc, MVT::i32, HighHalf, Mask);
10078 SDValue NewHighHalf = Masked;
10080 SDValue ShiftedStride;
10084 SDValue ExtStride = DAG.getAnyExtOrTrunc(Stride, Loc, MVT::i32);
10092 SDValue Rsrc = DAG.getNode(ISD::BUILD_VECTOR, Loc, MVT::v4i32, LowHalf,
10094 SDValue RsrcPtr = DAG.getNode(ISD::BITCAST, Loc, MVT::i128, Rsrc);
10099 SDValue SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG,
10101 ArrayRef<SDValue> Ops,
10113 SDValue Op = getMemIntrinsicNode(Opc, DL, VTs, Ops, MVT::v2i32, OpMMO, DAG);
10114 SDValue Status = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Op,
10116 SDValue Data = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Op,
10118 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, IntVT, Data);
10119 SDValue Value = DAG.getNode(ISD::BITCAST, DL, LoadVT, Trunc);
10120 return DAG.getMergeValues({Value, Status, SDValue(Op.getNode(), 1)}, DL);
10127 SDValue BufferLoad =
10129 SDValue LoadVal = DAG.getNode(ISD::TRUNCATE, DL, IntVT, BufferLoad);
10136 SDValue SITargetLowering::handleByteShortBufferStores(SelectionDAG &DAG,
10138 SDValue Ops[],
10143 SDValue BufferStoreExt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Ops[1]);
10147 ArrayRef<SDValue> OpsRef = ArrayRef(&Ops[0], 9);
10152 static SDValue getLoadExtOrTrunc(SelectionDAG &DAG,
10153 ISD::LoadExtType ExtType, SDValue Op,
10174 SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const {
10177 return SDValue();
10184 return SDValue();
10192 return SDValue();
10200 SDValue Ptr = Ld->getBasePtr();
10201 SDValue NewLoad = DAG.getLoad(
10214 SDValue Cvt = NewLoad;
10249 SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
10257 return SDValue();
10262 SDValue Chain = Load->getChain();
10263 SDValue BasePtr = Load->getBasePtr();
10268 SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
10272 SDValue Ops[] = {
10280 SmallVector<SDValue, 3> Elts;
10282 SDValue Elt = DAG.getNode(ISD::SRL, DL, MVT::i32, NewLD,
10288 SDValue Ops[] = {
10297 return SDValue();
10325 return SDValue();
10342 return SDValue();
10361 return SDValue();
10369 SDValue Ops[2];
10376 return SDValue();
10385 return SDValue();
10395 return SDValue();
10403 SDValue Ops[2];
10408 return SDValue();
10411 SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
10420 SDValue Cond = Op.getOperand(0);
10422 SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
10423 SDValue One = DAG.getConstant(1, DL, MVT::i32);
10425 SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1));
10426 SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2));
10428 SDValue Lo0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, Zero);
10429 SDValue Lo1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, Zero);
10431 SDValue Lo = DAG.getSelect(DL, MVT::i32, Cond, Lo0, Lo1);
10433 SDValue Hi0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, One);
10434 SDValue Hi1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, One);
10436 SDValue Hi = DAG.getSelect(DL, MVT::i32, Cond, Hi0, Hi1);
10438 SDValue Res = DAG.getBuildVector(MVT::v2i32, DL, {Lo, Hi});
10444 SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
10447 SDValue LHS = Op.getOperand(0);
10448 SDValue RHS = Op.getOperand(1);
10460 return SDValue();
10481 SDValue FNegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
10489 return SDValue();
10493 SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
10497 SDValue SITargetLowering::lowerFastUnsafeFDIV64(SDValue Op,
10500 SDValue X = Op.getOperand(0);
10501 SDValue Y = Op.getOperand(1);
10508 return SDValue();
10510 SDValue NegY = DAG.getNode(ISD::FNEG, SL, VT, Y);
10511 SDValue One = DAG.getConstantFP(1.0, SL, VT);
10513 SDValue R = DAG.getNode(AMDGPUISD::RCP, SL, VT, Y);
10514 SDValue Tmp0 = DAG.getNode(ISD::FMA, SL, VT, NegY, R, One);
10517 SDValue Tmp1 = DAG.getNode(ISD::FMA, SL, VT, NegY, R, One);
10519 SDValue Ret = DAG.getNode(ISD::FMUL, SL, VT, X, R);
10520 SDValue Tmp2 = DAG.getNode(ISD::FMA, SL, VT, NegY, Ret, X);
10524 static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
10525 EVT VT, SDValue A, SDValue B, SDValue GlueChain,
10546 static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
10547 EVT VT, SDValue A, SDValue B, SDValue C,
10548 SDValue GlueChain, SDNodeFlags Flags) {
10568 SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
10569 if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
10573 SDValue Src0 = Op.getOperand(0);
10574 SDValue Src1 = Op.getOperand(1);
10576 SDValue CvtSrc0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
10577 SDValue CvtSrc1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
10579 SDValue RcpSrc1 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, CvtSrc1);
10580 SDValue Quot = DAG.getNode(ISD::FMUL, SL, MVT::f32, CvtSrc0, RcpSrc1);
10582 SDValue FPRoundFlag = DAG.getTargetConstant(0, SL, MVT::i32);
10583 SDValue BestQuot = DAG.getNode(ISD::FP_ROUND, SL, MVT::f16, Quot, FPRoundFlag);
10589 SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
10592 SDValue LHS = Op.getOperand(1);
10593 SDValue RHS = Op.getOperand(2);
10595 SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS, Flags);
10598 const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
10601 const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
10603 const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
10608 SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
10610 SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One, Flags);
10615 SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1, Flags);
10617 SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0, Flags);
10624 static SDValue getSPDenormModeValue(uint32_t SPDenormMode, SelectionDAG &DAG,
10633 SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
10634 if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
10645 SDValue LHS = Op.getOperand(0);
10646 SDValue RHS = Op.getOperand(1);
10648 const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
10652 SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
10654 SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
10658 SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32,
10660 SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32,
10665 const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
10676 SDValue SavedDenormMode;
10685 SDValue Glue = DAG.getEntryNode();
10690 SavedDenormMode = SDValue(GetReg, 0);
10693 {DAG.getEntryNode(), SDValue(GetReg, 0), SDValue(GetReg, 1)}, SL);
10698 const SDValue EnableDenormValue =
10705 const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
10711 SDValue Ops[3] = {
10713 SDValue(EnableDenorm, 0),
10714 SDValue(EnableDenorm, 1)
10720 SDValue Fma0 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0,
10723 SDValue Fma1 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp,
10726 SDValue Mul = getFPBinOp(DAG, ISD::FMUL, SL, MVT::f32, NumeratorScaled,
10729 SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
10732 SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32,
10735 SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
10741 const SDValue DisableDenormValue = getSPDenormModeValue(
10749 const SDValue DisableDenormValue =
10759 SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
10760 SDValue(DisableDenorm, 0), DAG.getRoot());
10764 SDValue Scale = NumeratorScaled.getValue(1);
10765 SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32,
10771 SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
10772 if (SDValue FastLowered = lowerFastUnsafeFDIV64(Op, DAG))
10776 SDValue X = Op.getOperand(0);
10777 SDValue Y = Op.getOperand(1);
10779 const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
10783 SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);
10785 SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
10787 SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0);
10789 SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One);
10791 SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp);
10793 SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One);
10795 SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);
10797 SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1);
10798 SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3);
10800 SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64,
10803 SDValue Scale;
10809 const SDValue Hi = DAG.getConstant(1, SL, MVT::i32);
10812 SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
10813 SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y);
10814 SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
10815 SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
10817 SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi);
10818 SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi);
10820 SDValue Scale0Hi
10822 SDValue Scale1Hi
10825 SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ);
10826 SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ);
10832 SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64,
10838 SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
10853 SDValue SITargetLowering::LowerFFREXP(SDValue Op, SelectionDAG &DAG) const {
10855 SDValue Val = Op.getOperand(0);
10860 SDValue Mant = DAG.getNode(
10864 SDValue Exp = DAG.getNode(
10869 SDValue Fabs = DAG.getNode(ISD::FABS, dl, VT, Val);
10870 SDValue Inf = DAG.getConstantFP(
10873 SDValue IsFinite = DAG.getSetCC(dl, MVT::i1, Fabs, Inf, ISD::SETOLT);
10874 SDValue Zero = DAG.getConstant(0, dl, InstrExpVT);
10879 SDValue CastExp = DAG.getSExtOrTrunc(Exp, dl, ResultExpVT);
10883 SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
10926 return SDValue();
10935 return SDValue();
10940 return SDValue();
10950 return SDValue();
10959 return SDValue();
10963 SDValue SITargetLowering::lowerFSQRTF16(SDValue Op, SelectionDAG &DAG) const {
10967 SDValue Ext =
10970 SDValue SqrtID = DAG.getTargetConstant(Intrinsic::amdgcn_sqrt, SL, MVT::i32);
10971 SDValue Sqrt =
10978 SDValue SITargetLowering::lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const {
10982 const SDValue X = Op.getOperand(0);
10991 SDValue ScaleThreshold = DAG.getConstantFP(0x1.0p-96f, DL, VT);
10992 SDValue NeedScale = DAG.getSetCC(DL, MVT::i1, X, ScaleThreshold, ISD::SETOLT);
10994 SDValue ScaleUpFactor = DAG.getConstantFP(0x1.0p+32f, DL, VT);
10996 SDValue ScaledX = DAG.getNode(ISD::FMUL, DL, VT, X, ScaleUpFactor, Flags);
10998 SDValue SqrtX =
11001 SDValue SqrtS;
11003 SDValue SqrtID =
11007 SDValue SqrtSAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, SqrtS);
11008 SDValue SqrtSNextDownInt = DAG.getNode(ISD::ADD, DL, MVT::i32, SqrtSAsInt,
11010 SDValue SqrtSNextDown = DAG.getNode(ISD::BITCAST, DL, VT, SqrtSNextDownInt);
11012 SDValue NegSqrtSNextDown =
11015 SDValue SqrtVP =
11018 SDValue SqrtSNextUpInt = DAG.getNode(ISD::ADD, DL, MVT::i32, SqrtSAsInt,
11020 SDValue SqrtSNextUp = DAG.getNode(ISD::BITCAST, DL, VT, SqrtSNextUpInt);
11022 SDValue NegSqrtSNextUp = DAG.getNode(ISD::FNEG, DL, VT, SqrtSNextUp, Flags);
11023 SDValue SqrtVS =
11026 SDValue Zero = DAG.getConstantFP(0.0f, DL, VT);
11027 SDValue SqrtVPLE0 = DAG.getSetCC(DL, MVT::i1, SqrtVP, Zero, ISD::SETOLE);
11032 SDValue SqrtVPVSGT0 = DAG.getSetCC(DL, MVT::i1, SqrtVS, Zero, ISD::SETOGT);
11036 SDValue SqrtR = DAG.getNode(AMDGPUISD::RSQ, DL, VT, SqrtX, Flags);
11040 SDValue Half = DAG.getConstantFP(0.5f, DL, VT);
11041 SDValue SqrtH = DAG.getNode(ISD::FMUL, DL, VT, SqrtR, Half, Flags);
11042 SDValue NegSqrtH = DAG.getNode(ISD::FNEG, DL, VT, SqrtH, Flags);
11044 SDValue SqrtE = DAG.getNode(ISD::FMA, DL, VT, NegSqrtH, SqrtS, Half, Flags);
11048 SDValue NegSqrtS = DAG.getNode(ISD::FNEG, DL, VT, SqrtS, Flags);
11049 SDValue SqrtD =
11054 SDValue ScaleDownFactor = DAG.getConstantFP(0x1.0p-16f, DL, VT);
11056 SDValue ScaledDown =
11060 SDValue IsZeroOrInf =
11067 SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const {
11092 SDValue X = Op.getOperand(0);
11093 SDValue ScaleConstant = DAG.getConstantFP(0x1.0p-767, DL, MVT::f64);
11095 SDValue Scaling = DAG.getSetCC(DL, MVT::i1, X, ScaleConstant, ISD::SETOLT);
11097 SDValue ZeroInt = DAG.getConstant(0, DL, MVT::i32);
11100 SDValue ScaleUpFactor = DAG.getConstant(256, DL, MVT::i32);
11101 SDValue ScaleUp =
11103 SDValue SqrtX = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, X, ScaleUp, Flags);
11105 SDValue SqrtY = DAG.getNode(AMDGPUISD::RSQ, DL, MVT::f64, SqrtX);
11107 SDValue SqrtS0 = DAG.getNode(ISD::FMUL, DL, MVT::f64, SqrtX, SqrtY);
11109 SDValue Half = DAG.getConstantFP(0.5, DL, MVT::f64);
11110 SDValue SqrtH0 = DAG.getNode(ISD::FMUL, DL, MVT::f64, SqrtY, Half);
11112 SDValue NegSqrtH0 = DAG.getNode(ISD::FNEG, DL, MVT::f64, SqrtH0);
11113 SDValue SqrtR0 = DAG.getNode(ISD::FMA, DL, MVT::f64, NegSqrtH0, SqrtS0, Half);
11115 SDValue SqrtH1 = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtH0, SqrtR0, SqrtH0);
11117 SDValue SqrtS1 = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtS0, SqrtR0, SqrtS0);
11119 SDValue NegSqrtS1 = DAG.getNode(ISD::FNEG, DL, MVT::f64, SqrtS1);
11120 SDValue SqrtD0 = DAG.getNode(ISD::FMA, DL, MVT::f64, NegSqrtS1, SqrtS1, SqrtX);
11122 SDValue SqrtS2 = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD0, SqrtH1, SqrtS1);
11124 SDValue NegSqrtS2 = DAG.getNode(ISD::FNEG, DL, MVT::f64, SqrtS2);
11125 SDValue SqrtD1 =
11128 SDValue SqrtRet = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD1, SqrtH1, SqrtS2);
11130 SDValue ScaleDownFactor = DAG.getConstant(-128, DL, MVT::i32);
11131 SDValue ScaleDown =
11139 SDValue IsZeroOrInf =
11148 SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
11151 SDValue Arg = Op.getOperand(0);
11152 SDValue TrigVal;
11158 SDValue OneOver2Pi = DAG.getConstantFP(0.5 * numbers::inv_pi, DL, VT);
11161 SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi, Flags);
11177 SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
11189 SDValue ChainIn = Op.getOperand(0);
11190 SDValue Addr = Op.getOperand(1);
11191 SDValue Old = Op.getOperand(2);
11192 SDValue New = Op.getOperand(3);
11197 SDValue NewOld = DAG.getBuildVector(VecType, DL, {New, Old});
11198 SDValue Ops[] = { ChainIn, Addr, NewOld };
11208 SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
11213 return SDValue();
11218 SDValue Src = N->getOperand(0);
11227 SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, MVT::f32, Src);
11239 return SDValue();
11242 SDValue SITargetLowering::performFCopySignCombine(SDNode *N,
11244 SDValue MagnitudeOp = N->getOperand(0);
11245 SDValue SignOp = N->getOperand(1);
11253 SDValue MagAsVector = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, MagnitudeOp);
11254 SDValue MagLo =
11257 SDValue MagHi =
11261 SDValue HiOp =
11264 SDValue Vector = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32, MagLo, HiOp);
11270 return SDValue();
11277 SDValue SignAsVector = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, SignOp);
11278 SDValue SignAsF32 =
11302 SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
11306 SDValue N0 = N->getOperand(0);
11307 SDValue N1 = N->getOperand(1);
11313 return SDValue();
11317 return SDValue();
11321 return SDValue();
11327 return SDValue();
11338 return SDValue();
11343 SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1);
11344 SDValue COffset = DAG.getConstant(Offset, SL, VT);
11368 SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N,
11374 SDValue Ptr = N->getOperand(PtrIdx);
11378 SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), N->getAddressSpace(),
11381 SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
11384 return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
11388 return SDValue();
11402 SDValue SITargetLowering::splitBinaryBitConstantOp(
11405 unsigned Opc, SDValue LHS,
11421 return SDValue();
11424 bool llvm::isBoolSGPR(SDValue V) {
11463 static uint32_t getPermuteMask(SDValue V) {
11504 SDValue SITargetLowering::performAndCombine(SDNode *N,
11507 return SDValue();
11511 SDValue LHS = N->getOperand(0);
11512 SDValue RHS = N->getOperand(1);
11517 if (SDValue Split
11537 SDValue BFE = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
11542 SDValue Ext = DAG.getNode(ISD::AssertZext, SL, VT, BFE,
11544 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(LHS), VT, Ext,
11556 return SDValue();
11572 SDValue X = LHS.getOperand(0);
11573 SDValue Y = RHS.getOperand(0);
11576 return SDValue();
11580 return SDValue();
11585 return SDValue();
11688 return SDValue();
11731 static const std::optional<ByteProvider<SDValue>>
11732 calculateSrcByte(const SDValue Op, uint64_t DestByte, uint64_t SrcIndex = 0,
11742 return ByteProvider<SDValue>::getSrc(Op, DestByte, SrcIndex);
11752 SDValue NarrowOp = Op->getOperand(0);
11784 return ByteProvider<SDValue>::getSrc(Op, DestByte, SrcIndex);
11796 static const std::optional<ByteProvider<SDValue>>
11797 calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
11852 return ByteProvider<SDValue>::getConstantZero();
11880 SDValue NextOp = Op.getOperand(NewIndex >= BytesProvided ? 0 : 1);
11911 : ByteProvider<SDValue>::getConstantZero();
11932 ? ByteProvider<SDValue>::getConstantZero()
11945 SDValue NarrowOp = Op->getOperand(0);
11959 ? std::optional<ByteProvider<SDValue>>(
11960 ByteProvider<SDValue>::getConstantZero())
11999 ? std::optional<ByteProvider<SDValue>>(
12000 ByteProvider<SDValue>::getConstantZero())
12048 : ByteProvider<SDValue>(
12049 ByteProvider<SDValue>::getConstantZero());
12061 static bool isExtendedFrom16Bits(SDValue &Operand) {
12106 static bool hasNon16BitAccesses(uint64_t PermMask, SDValue &Op,
12107 SDValue &OtherOp) {
12128 static SDValue getDWordFromOffset(SelectionDAG &DAG, SDLoc SL, SDValue Src,
12130 SDValue Ret;
12166 SmallVector<SDValue, 4> VecSrcs;
12183 static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
12186 SmallVector<ByteProvider<SDValue>, 8> PermNodes;
12192 std::optional<ByteProvider<SDValue>> P =
12193 calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
12196 return SDValue();
12201 return SDValue();
12219 return SDValue();
12231 SDValue Op = *PermNodes[FirstSrc.first].Src;
12248 SDValue OtherOp = SecondSrc ? *PermNodes[SecondSrc->first].Src : Op;
12271 return SDValue();
12274 SDValue SITargetLowering::performOrCombine(SDNode *N,
12277 SDValue LHS = N->getOperand(0);
12278 SDValue RHS = N->getOperand(1);
12285 SDValue Src = LHS.getOperand(0);
12287 return SDValue();
12292 return SDValue();
12303 return SDValue();
12312 return SDValue();
12349 return SDValue();
12387 if (SDValue Perm = matchPERM(N, DCI))
12393 return SDValue();
12405 SDValue ExtSrc = RHS.getOperand(0);
12409 SDValue LowLHS, HiBits;
12411 SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc);
12416 SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
12424 if (SDValue Split
12430 return SDValue();
12433 SDValue SITargetLowering::performXorCombine(SDNode *N,
12435 if (SDValue RV = reassociateScalarOps(N, DCI.DAG))
12438 SDValue LHS = N->getOperand(0);
12439 SDValue RHS = N->getOperand(1);
12446 if (SDValue Split
12460 SDValue CastLHS =
12462 SDValue CastRHS =
12464 SDValue FNegLHS = DAG.getNode(ISD::FNEG, DL, MVT::f32, CastLHS);
12465 SDValue FNegRHS = DAG.getNode(ISD::FNEG, DL, MVT::f32, CastRHS);
12466 SDValue NewSelect = DAG.getNode(ISD::SELECT, DL, MVT::f32,
12472 return SDValue();
12475 SDValue SITargetLowering::performZeroExtendCombine(SDNode *N,
12479 return SDValue();
12483 return SDValue();
12485 SDValue Src = N->getOperand(0);
12487 return SDValue();
12489 return SDValue();
12492 SDValue
12495 SDValue Src = N->getOperand(0);
12513 SDValue Ops[] = {
12519 SDValue BufferLoad = DCI.DAG.getMemIntrinsicNode(
12521 SDValue LoadVal = DCI.DAG.getNode(ISD::TRUNCATE, DL, VT, BufferLoad);
12530 SDValue Ops[] = {
12545 SDValue BufferLoadSignExt = DCI.DAG.getMemIntrinsicNode(Opc, SDLoc(N),
12552 return SDValue();
12555 SDValue SITargetLowering::performClassCombine(SDNode *N,
12558 SDValue Mask = N->getOperand(1);
12567 return SDValue();
12570 SDValue SITargetLowering::performRcpCombine(SDNode *N,
12573 SDValue N0 = N->getOperand(0);
12597 bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
12728 SDValue SrcOp = Op.getOperand(i);
12755 SDValue TruncSrc = Op.getOperand(0);
12914 SDValue SITargetLowering::getCanonicalConstantFP(
12926 return SDValue();
12949 static bool vectorEltWillFoldAway(SDValue Op) {
12953 SDValue SITargetLowering::performFCanonicalizeCombine(
12957 SDValue N0 = N->getOperand(0);
12981 SDValue NewElts[2];
12982 SDValue Lo = N0.getOperand(0);
12983 SDValue Hi = N0.getOperand(1);
12988 SDValue Op = N0.getOperand(I);
13018 return SDValue();
13046 SDValue SITargetLowering::performIntMed3ImmCombine(SelectionDAG &DAG,
13047 const SDLoc &SL, SDValue Src,
13048 SDValue MinVal,
13049 SDValue MaxVal,
13062 return SDValue();
13066 return SDValue();
13069 return SDValue();
13081 return SDValue();
13084 static ConstantFPSDNode *getSplatConstantFP(SDValue Op) {
13096 SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
13098 SDValue Op0,
13099 SDValue Op1) const {
13102 return SDValue();
13106 return SDValue();
13110 return SDValue();
13131 SDValue Var = Op0.getOperand(0);
13133 return SDValue();
13140 Var, SDValue(K0, 0), SDValue(K1, 0));
13144 return SDValue();
13174 SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
13180 SDValue Op0 = N->getOperand(0);
13181 SDValue Op1 = N->getOperand(1);
13216 if (SDValue Med3 = performIntMed3ImmCombine(
13221 if (SDValue Med3 = performIntMed3ImmCombine(
13227 if (SDValue Med3 = performIntMed3ImmCombine(
13232 if (SDValue Med3 = performIntMed3ImmCombine(
13246 if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
13250 return SDValue();
13253 static bool isClampZeroToOne(SDValue A, SDValue B) {
13266 SDValue SITargetLowering::performFMed3Combine(SDNode *N,
13275 SDValue Src0 = N->getOperand(0);
13276 SDValue Src1 = N->getOperand(1);
13277 SDValue Src2 = N->getOperand(2);
13307 return SDValue();
13310 SDValue SITargetLowering::performCvtPkRTZCombine(SDNode *N,
13312 SDValue Src0 = N->getOperand(0);
13313 SDValue Src1 = N->getOperand(1);
13316 return SDValue();
13358 SDValue Idx = N->getOperand(N->getNumOperands() - 1);
13362 SDValue Vec = N->getOperand(0);
13372 SDValue SITargetLowering::performExtractVectorEltCombine(
13374 SDValue Vec = N->getOperand(0);
13387 SDValue Idx = N->getOperand(1);
13388 SDValue Elt =
13400 SDValue Idx = N->getOperand(1);
13421 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
13423 SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
13436 SDValue Idx = N->getOperand(1);
13437 SDValue V;
13439 SDValue IC = DAG.getVectorIdxConstant(I, SL);
13440 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec, IC);
13450 return SDValue();
13465 SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
13468 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
13471 SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
13476 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VecEltAsIntVT, Srl);
13487 return SDValue();
13490 SDValue
13493 SDValue Vec = N->getOperand(0);
13494 SDValue Idx = N->getOperand(2);
13501 return SDValue();
13505 SDValue Ins = N->getOperand(1);
13508 SmallVector<SDValue, 16> Ops;
13510 SDValue IC = DAG.getConstant(I, SL, IdxVT);
13511 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
13512 SDValue V = DAG.getSelectCC(SL, Idx, IC, Ins, Elt, ISD::SETEQ);
13521 static SDValue strictFPExtFromF16(SelectionDAG &DAG, SDValue Src) {
13535 return SDValue();
13538 SDValue SITargetLowering::performFPRoundCombine(SDNode *N,
13543 SDValue TruncSrc = N->getOperand(0);
13546 return SDValue();
13550 return SDValue();
13561 SDValue A = strictFPExtFromF16(DAG, TruncSrc.getOperand(0));
13563 return SDValue();
13565 SDValue B = strictFPExtFromF16(DAG, TruncSrc.getOperand(1));
13567 return SDValue();
13569 SDValue C = strictFPExtFromF16(DAG, TruncSrc.getOperand(2));
13571 return SDValue();
13577 SDValue A1 = DAG.getNode(ISD::FMINNUM_IEEE, SL, VT, A, B);
13578 SDValue B1 = DAG.getNode(ISD::FMAXNUM_IEEE, SL, VT, A, B);
13579 SDValue C1 = DAG.getNode(ISD::FMAXNUM_IEEE, SL, VT, A1, C);
13610 SDValue SITargetLowering::reassociateScalarOps(SDNode *N,
13614 return SDValue();
13616 if (DAG.isBaseWithConstantOffset(SDValue(N, 0)))
13617 return SDValue();
13620 SDValue Op0 = N->getOperand(0);
13621 SDValue Op1 = N->getOperand(1);
13624 return SDValue();
13630 return SDValue();
13632 SDValue Op2 = Op1.getOperand(1);
13635 return SDValue();
13641 SDValue Add1 = DAG.getNode(Opc, SL, VT, Op0, Op1);
13645 static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL,
13647 SDValue N0, SDValue N1, SDValue N2,
13651 SDValue Mad = DAG.getNode(MadOpc, SL, VTs, N0, N1, N2);
13663 SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
13670 SDValue LHS = N->getOperand(0);
13671 SDValue RHS = N->getOperand(1);
13674 return SDValue();
13679 return SDValue();
13683 return SDValue();
13699 return SDValue();
13705 return SDValue();
13709 SDValue MulLHS = LHS.getOperand(0);
13710 SDValue MulRHS = LHS.getOperand(1);
13711 SDValue AddRHS = RHS;
13747 SDValue One = DAG.getConstant(1, SL, MVT::i32);
13751 SDValue Accum =
13755 SDValue AccumLo, AccumHi;
13761 SDValue MulHi = DAG.getNode(ISD::MUL, SL, MVT::i32, MulLHSHi, MulRHSLo);
13768 SDValue MulHi = DAG.getNode(ISD::MUL, SL, MVT::i32, MulLHSLo, MulRHSHi);
13783 static std::optional<ByteProvider<SDValue>>
13784 handleMulOperand(const SDValue &MulOperand) {
13811 SDValue SrcOp;
13816 static void placeSources(ByteProvider<SDValue> &Src0,
13817 ByteProvider<SDValue> &Src1,
13832 std::pair<ByteProvider<SDValue>, ByteProvider<SDValue>> BPP = {Src0, Src1};
13843 // Attempt to find Src vector which contains our SDValue, if so, add our
13845 // first SDValue, attempt to find match for the second.
13894 static SDValue resolveSources(SelectionDAG &DAG, SDLoc SL,
13914 SmallVector<SDValue, 2> Perms;
13970 static bool isMul(const SDValue Op) {
13978 checkDot4MulSignedness(const SDValue &N, ByteProvider<SDValue> &Src0,
13979 ByteProvider<SDValue> &Src1, const SDValue &S0Op,
13980 const SDValue &S1Op, const SelectionDAG &DAG) {
14040 SDValue SITargetLowering::performAddCombine(SDNode *N,
14045 SDValue LHS = N->getOperand(0);
14046 SDValue RHS = N->getOperand(1);
14050 if (SDValue Folded = tryFoldToMad64_32(N, DCI))
14055 if (SDValue V = reassociateScalarOps(N, DAG)) {
14061 SDValue TempNode(N, 0);
14065 SmallVector<SDValue, 4> Src2s;
14129 return SDValue();
14139 SDValue Src0, Src1;
14187 SDValue Src2 =
14190 SDValue IID = DAG.getTargetConstant(*IsSigned ? Intrinsic::amdgcn_sdot4
14202 return SDValue();
14223 SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
14231 SDValue Args[] = { LHS, RHS.getOperand(0), RHS.getOperand(2) };
14235 return SDValue();
14238 SDValue SITargetLowering::performSubCombine(SDNode *N,
14244 return SDValue();
14247 SDValue LHS = N->getOperand(0);
14248 SDValue RHS = N->getOperand(1);
14264 SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
14273 return SDValue();
14274 SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) };
14277 return SDValue();
14280 SDValue SITargetLowering::performAddCarrySubCarryCombine(SDNode *N,
14284 return SDValue();
14287 return SDValue();
14290 SDValue LHS = N->getOperand(0);
14298 SDValue Args[] = { LHS.getOperand(0), LHS.getOperand(1), N->getOperand(2) };
14301 return SDValue();
14304 SDValue SITargetLowering::performFAddCombine(SDNode *N,
14307 return SDValue();
14313 SDValue LHS = N->getOperand(0);
14314 SDValue RHS = N->getOperand(1);
14321 SDValue A = LHS.getOperand(0);
14325 const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
14333 SDValue A = RHS.getOperand(0);
14337 const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
14343 return SDValue();
14346 SDValue SITargetLowering::performFSubCombine(SDNode *N,
14349 return SDValue();
14361 SDValue LHS = N->getOperand(0);
14362 SDValue RHS = N->getOperand(1);
14365 SDValue A = LHS.getOperand(0);
14369 const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
14370 SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
14380 SDValue A = RHS.getOperand(0);
14384 const SDValue NegTwo = DAG.getConstantFP(-2.0, SL, VT);
14390 return SDValue();
14393 SDValue SITargetLowering::performFDivCombine(SDNode *N,
14399 return SDValue();
14401 SDValue LHS = N->getOperand(0);
14402 SDValue RHS = N->getOperand(1);
14408 return SDValue();
14418 SDValue Rsq =
14425 return SDValue();
14428 SDValue SITargetLowering::performFMACombine(SDNode *N,
14435 return SDValue();
14439 SDValue Op1 = N->getOperand(0);
14440 SDValue Op2 = N->getOperand(1);
14441 SDValue FMA = N->getOperand(2);
14446 return SDValue();
14459 return SDValue();
14461 SDValue Vec1 = Op1.getOperand(0);
14462 SDValue Idx1 = Op1.getOperand(1);
14463 SDValue Vec2 = Op2.getOperand(0);
14465 SDValue FMAOp1 = FMA.getOperand(0);
14466 SDValue FMAOp2 = FMA.getOperand(1);
14467 SDValue FMAAcc = FMA.getOperand(2);
14471 return SDValue();
14477 return SDValue();
14479 SDValue Vec3 = FMAOp1.getOperand(0);
14480 SDValue Vec4 = FMAOp2.getOperand(0);
14481 SDValue Idx2 = FMAOp1.getOperand(1);
14486 return SDValue();
14489 return SDValue();
14492 return SDValue();
14500 return SDValue();
14503 SDValue SITargetLowering::performSetCCCombine(SDNode *N,
14508 SDValue LHS = N->getOperand(0);
14509 SDValue RHS = N->getOperand(1);
14569 return SDValue();
14578 return SDValue();
14596 return SDValue();
14599 SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
14605 SDValue Src = N->getOperand(0);
14606 SDValue Shift = N->getOperand(0);
14619 SDValue Shifted = DAG.getZExtOrTrunc(Shift.getOperand(0),
14642 return SDValue(N, 0);
14646 if (SDValue DemandedSrc =
14650 return SDValue();
14653 SDValue SITargetLowering::performClampCombine(SDNode *N,
14657 return SDValue();
14671 return SDValue(CSrc, 0);
14675 SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
14678 return SDValue();
14741 SDValue Src = N->getOperand(0);
14769 SDValue Src = N->getOperand(0);
14774 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Src);
14787 if (SDValue Widened = widenLoad(cast<LoadSDNode>(N), DCI))
14924 SmallVector<SDValue, 12> Ops;
14944 DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), SDValue(NewNode, 1));
14951 SDValue(NewNode, 0));
14965 SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
14966 SDNode *NewUser = DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
14968 DAG.ReplaceAllUsesWith(SDValue(User, 0), SDValue(NewUser, 0));
14986 static bool isFrameIndexOp(SDValue Op) {
15000 SDValue SrcVal = Node->getOperand(2);
15007 SDValue VReg = DAG.getRegister(
15011 SDValue ToVReg
15013 SDValue(Glued, Glued ? Glued->getNumValues() - 1 : 0));
15014 SDValue ToResultReg
15015 = DAG.getCopyToReg(ToVReg, SL, SDValue(DestReg, 0),
15023 SmallVector<SDValue, 8> Ops;
15031 Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL,
15064 SDValue Src0 = Node->getOperand(1);
15065 SDValue Src1 = Node->getOperand(3);
15066 SDValue Src2 = Node->getOperand(5);
15078 SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT);
15080 SDValue ImpDef = DAG.getCopyToReg(DAG.getEntryNode(), SDLoc(Node),
15081 UndefReg, Src0, SDValue());
15101 SmallVector<SDValue, 9> Ops(Node->op_begin(), Node->op_end());
15281 static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL,
15283 SDValue K = DAG.getTargetConstant(Val, DL, MVT::i32);
15284 return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);
15289 SDValue Ptr) const {
15295 const SDValue Ops0[] = {
15303 SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
15307 const SDValue Ops1[] = {
15323 SDValue Ptr, uint32_t RsrcDword1,
15325 SDValue PtrLo = DAG.getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
15326 SDValue PtrHi = DAG.getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
15328 PtrHi = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
15333 SDValue DataLo = buildSMovImm32(DAG, DL,
15335 SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32);
15337 const SDValue Ops[] = {
15507 void SITargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15509 std::vector<SDValue> &Ops,
15523 bool SITargetLowering::getAsmOperandConstVal(SDValue Op, uint64_t &Val) const {
15557 bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
15589 bool SITargetLowering::checkAsmConstraintValA(SDValue Op, uint64_t Val,
15734 void SITargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
16044 bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
16425 ComputeConstraintToUse(TC, SDValue());
16449 bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
16450 SDValue N1) const {