Lines Matching defs:SDValue
2605 SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2610 return SDValue(Node, 0);
2640 bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
2659 bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
2673 bool X86::mayFoldIntoStore(SDValue Op) {
2677 bool X86::mayFoldIntoZeroExtend(SDValue Op) {
2750 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
2839 bool isFP, SDValue &LHS, SDValue &RHS,
3106 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
3164 SDValue C) const {
3209 bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
3304 bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
3320 bool X86TargetLowering::hasAndNot(SDValue Y) const {
3337 bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
3343 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
3475 bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
3779 bool X86::isZeroNode(SDValue Elt) {
3786 static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
3789 SmallVector<SDValue, 32> Ops;
3803 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
3810 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
3816 static SDValue getConstVector(ArrayRef<APInt> Bits, const APInt &Undefs,
3820 SmallVector<SDValue, 32> Ops;
3853 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
3857 static SDValue getConstVector(ArrayRef<APInt> Bits, MVT VT,
3864 static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
3873 SDValue Vec;
3893 static SDValue getSplitVectorSrc(SDValue LHS, SDValue RHS, bool AllowCommute) {
3898 return SDValue();
3900 SDValue Src = LHS.getOperand(0);
3902 return SDValue();
3911 return SDValue();
3914 static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3941 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
3951 static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
3959 static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
3965 static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
3985 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
3995 static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
4003 static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
4009 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
4017 static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
4041 static SDValue widenMaskVector(SDValue Vec, bool ZeroNewElements,
4051 static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops,
4061 SDValue Src = N->getOperand(0);
4062 SDValue Sub = N->getOperand(1);
4080 SDValue Lo = Src.getOperand(1);
4081 SDValue Hi = Sub;
4082 SmallVector<SDValue, 2> LoOps, HiOps;
4115 static SDValue isUpperSubvectorUndef(SDValue V, const SDLoc &DL,
4117 SmallVector<SDValue> SubOps;
4119 return SDValue();
4125 ArrayRef<SDValue> UpperOps(SubOps.begin() + HalfNumSubOps, SubOps.end());
4126 if (any_of(UpperOps, [](SDValue Op) { return !Op.isUndef(); }))
4127 return SDValue();
4130 ArrayRef<SDValue> LowerOps(SubOps.begin(), SubOps.begin() + HalfNumSubOps);
4137 SmallVector<SDValue> Ops;
4141 static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG,
4151 SDValue Lo = extractSubVector(Op, 0, DAG, dl, SizeInBits / 2);
4155 SDValue Hi = extractSubVector(Op, NumElems / 2, DAG, dl, SizeInBits / 2);
4160 static SDValue splitVectorOp(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) {
4165 SmallVector<SDValue> LoOps(NumOps, SDValue());
4166 SmallVector<SDValue> HiOps(NumOps, SDValue());
4168 SDValue SrcOp = Op.getOperand(I);
4185 static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG,
4201 static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG,
4217 // SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
4219 SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
4220 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
4245 SmallVector<SDValue, 4> Subs;
4247 SmallVector<SDValue, 2> SubOps;
4248 for (SDValue Op : Ops) {
4261 static SDValue getAVX512Node(unsigned Opcode, const SDLoc &DL, MVT VT,
4262 ArrayRef<SDValue> Ops, SelectionDAG &DAG,
4269 auto MakeBroadcastOp = [&](SDValue Op, MVT OpVT, MVT DstVT) {
4275 return SDValue();
4278 return SDValue();
4288 return SDValue();
4298 SmallVector<SDValue> SrcOps(Ops.begin(), Ops.end());
4299 for (SDValue &Op : SrcOps) {
4306 if (SDValue BroadcastOp = MakeBroadcastOp(Op, OpVT, DstVT)) {
4316 SDValue Res = DAG.getNode(Opcode, DL, DstVT, SrcOps);
4325 static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
4329 SDValue Vec = Op.getOperand(0);
4330 SDValue SubVec = Op.getOperand(1);
4331 SDValue Idx = Op.getOperand(2);
4343 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
4364 SDValue Undef = DAG.getUNDEF(WideOpVT);
4368 SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8);
4395 [](SDValue V) { return V.isUndef(); })) {
4427 SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8);
4449 SDValue CMask0 = DAG.getConstant(Mask0, dl, MVT::getIntegerVT(NumElems));
4450 SDValue VMask0 = DAG.getNode(ISD::BITCAST, dl, WideOpVT, CMask0);
4470 SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec,
4477 SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
4490 static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG,
4498 SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth);
4505 static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4509 SDValue Vec = DAG.getAllOnesConstant(dl, MVT::getVectorVT(MVT::i32, NumElts));
4513 static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT,
4514 SDValue In, SelectionDAG &DAG) {
4539 static SDValue getBitSelect(const SDLoc &DL, MVT VT, SDValue LHS, SDValue RHS,
4540 SDValue Mask, SelectionDAG &DAG) {
4578 static SDValue getVectorShuffle(SelectionDAG &DAG, EVT VT, const SDLoc &dl,
4579 SDValue V1, SDValue V2, ArrayRef<int> Mask) {
4582 SmallVector<SDValue> Ops(Mask.size(), DAG.getUNDEF(VT.getScalarType()));
4587 SDValue V = (M < NumElts) ? V1 : V2;
4599 static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
4600 SDValue V1, SDValue V2) {
4607 static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
4608 SDValue V1, SDValue V2) {
4617 static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget,
4618 const SDLoc &dl, MVT VT, SDValue LHS, SDValue RHS,
4658 SDValue Amt = DAG.getTargetConstant(EltSizeInBits, dl, MVT::i8);
4664 SDValue Mask = DAG.getConstant((1ULL << EltSizeInBits) - 1, dl, OpVT);
4684 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
4689 SDValue V1 = IsZero
4699 static ConstantPoolSDNode *getTargetConstantPoolFromBasePtr(SDValue Ptr) {
4707 static const Constant *getTargetConstantFromBasePtr(SDValue Ptr) {
4720 static const Constant *getTargetConstantFromNode(SDValue Op) {
4732 static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
4903 SDValue Ptr = MemIntr->getBasePtr();
4924 SDValue Ptr = MemIntr->getBasePtr();
5063 bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs) {
5090 static bool getTargetShuffleMaskIndices(SDValue MaskNode,
5112 static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
5120 if (SDValue Not = IsNOT(V.getOperand(0), DAG)) {
5149 SmallVector<SDValue, 2> CatOps;
5151 for (SDValue &CatOp : CatOps) {
5152 SDValue NotCat = IsNOT(CatOp, DAG);
5153 if (!NotCat) return SDValue();
5158 return SDValue();
5226 static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero,
5227 SmallVectorImpl<SDValue> &Ops,
5383 SDValue MaskNode = N.getOperand(1);
5396 SDValue MaskNode = N.getOperand(1);
5449 SDValue MaskNode = N.getOperand(2);
5450 SDValue CtrlNode = N.getOperand(3);
5466 SDValue MaskNode = N.getOperand(2);
5478 SDValue MaskNode = N.getOperand(0);
5493 SDValue MaskNode = N.getOperand(1);
5533 static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero,
5534 SmallVectorImpl<SDValue> &Ops,
5549 SDValue V1, SDValue V2,
5577 SDValue V = M < Size ? V1 : V2;
5588 SDValue Op = V.getOperand(M / Scale);
5614 SDValue Op = V.getOperand((M * Scale) + j);
5631 static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
5632 SmallVectorImpl<SDValue> &Ops,
5643 SDValue V1 = Ops[0];
5644 SDValue V2 = IsUnary ? V1 : Ops[1];
5680 SDValue V = M < Size ? V1 : V2;
5707 SDValue Vec = V.getOperand(0);
5767 SDValue Cond, bool IsBLENDV = false) {
5795 static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts,
5796 SmallVectorImpl<SDValue> &Inputs,
5805 static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
5807 SmallVectorImpl<SDValue> &Ops,
5841 SDValue N0 = N.getOperand(0);
5842 SDValue N1 = N.getOperand(1);
5864 SDValue N0 = peekThroughBitcasts(N.getOperand(0));
5865 SDValue N1 = peekThroughBitcasts(N.getOperand(1));
5870 SmallVector<SDValue, 2> SrcInputs0, SrcInputs1;
5902 SDValue Src = N.getOperand(0);
5903 SDValue Sub = N.getOperand(1);
5908 SDValue SubBC = peekThroughBitcasts(Sub);
5914 SDValue SubBCSrc = SubBC.getOperand(0);
5951 SmallVector<SDValue, 2> SubInputs;
5952 SDValue SubSrc = peekThroughOneUseBitcasts(Sub);
5963 if (llvm::any_of(SubInputs, [SubVT](SDValue SubInput) {
6007 SDValue Scl = N.getOperand(Opcode == ISD::SCALAR_TO_VECTOR ? 0 : 1);
6044 SDValue SrcExtract;
6054 SDValue SrcVec = SrcExtract.getOperand(0);
6085 SDValue N0 = N.getOperand(0);
6086 SDValue N1 = N.getOperand(1);
6143 SDValue Cond = N.getOperand(0);
6152 SDValue Src = N.getOperand(0);
6216 SDValue Src = N.getOperand(0);
6230 SDValue Src = N.getOperand(0);
6256 SDValue Src = N.getOperand(0);
6277 static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
6280 SmallVector<SDValue, 16> UsedInputs;
6321 static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts,
6322 SmallVectorImpl<SDValue> &Inputs,
6347 static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts,
6348 SmallVectorImpl<SDValue> &Inputs,
6357 static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
6372 static SDValue getBROADCAST_LOAD(unsigned Opcode, const SDLoc &DL, EVT VT,
6381 return SDValue();
6383 SDValue Ptr = DAG.getMemBasePlusOffset(Mem->getBasePtr(),
6386 SDValue Ops[] = {Mem->getChain(), Ptr};
6387 SDValue BcstLd = DAG.getMemIntrinsicNode(
6391 DAG.makeEquivalentMemoryOrdering(SDValue(Mem, 1), BcstLd.getValue(1));
6397 static SDValue getShuffleScalarElt(SDValue Op, unsigned Index,
6400 return SDValue(); // Limit search depth.
6413 SDValue Src = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1);
6423 SmallVector<SDValue, 16> ShuffleOps;
6425 return SDValue();
6435 SDValue Src = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
6441 SDValue Vec = Op.getOperand(0);
6442 SDValue Sub = Op.getOperand(1);
6462 SDValue Src = Op.getOperand(0);
6469 SDValue Src = Op.getOperand(0);
6473 return SDValue();
6494 return SDValue();
6498 static SDValue LowerBuildVectorAsInsert(SDValue Op, const SDLoc &DL,
6509 SDValue V;
6540 static SDValue LowerBuildVectorv16i8(SDValue Op, const SDLoc &DL,
6546 return SDValue();
6553 SDValue V;
6562 SDValue Elt = DAG.getZExtOrTrunc(Op.getOperand(I), DL, MVT::i32);
6579 SDValue Elt;
6588 SDValue NextElt = Op.getOperand(i + 1);
6622 static SDValue LowerBuildVectorv8i16(SDValue Op, const SDLoc &DL,
6628 return SDValue();
6636 static SDValue LowerBuildVectorv4x32(SDValue Op, const SDLoc &DL,
6651 SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
6653 SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops));
6654 SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV);
6661 SDValue Elt = Op.getOperand(i);
6670 SDValue FirstNonZero;
6675 SDValue Elt = Op.getOperand(i);
6678 return SDValue();
6682 return SDValue();
6690 SDValue V1 = FirstNonZero.getOperand(0);
6694 SDValue Elt;
6714 SDValue VZeroOrUndef = (Zeroable == Undefs)
6724 return SDValue();
6726 SDValue V2 = Elt.getOperand(0);
6728 V1 = SDValue();
6735 SDValue Current = Op->getOperand(i);
6736 SDValue SrcVector = Current->getOperand(0);
6743 return SDValue();
6756 SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
6762 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
6770 SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8);
6774 static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
6781 SDValue Ptr = LD->getBasePtr();
6783 return SDValue();
6786 return SDValue();
6799 return SDValue();
6805 SDValue Chain = LD->getChain();
6814 return SDValue();
6823 return SDValue();
6825 return SDValue();
6837 SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
6845 return SDValue();
6849 static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
6875 SDValue Src = Elt.getOperand(0);
6896 static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
6901 return SDValue();
6916 SDValue Elt = peekThroughBitcasts(Elts[i]);
6918 return SDValue();
6932 return SDValue();
6935 return SDValue();
6938 return SDValue();
6956 SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
6970 return SDValue();
7007 SDValue NewLd =
7030 return SDValue();
7036 return SDValue();
7058 SDValue V = CreateLoad(VT, LDBase);
7059 SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
7072 SDValue HalfLD =
7095 SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
7096 SDValue ResNode = DAG.getMemIntrinsicNode(
7122 SmallVector<SDValue, 8> RepeatedLoads(SubElems, DAG.getUNDEF(EltBaseVT));
7126 SDValue Elt = peekThroughBitcasts(Elts[i]);
7150 if (SDValue RepeatLoad = EltsFromConsecutiveLoads(
7152 SDValue Broadcast = RepeatLoad;
7162 return SDValue();
7172 return SDValue();
7178 static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL,
7182 SmallVector<SDValue, 64> Elts;
7184 if (SDValue Elt = getShuffleScalarElt(Op, i, DAG, 0)) {
7188 return SDValue();
7277 /// or SDValue() otherwise.
7278 static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
7286 return SDValue();
7295 SDValue Ld;
7297 SmallVector<SDValue, 16> Sequence;
7316 [](SDValue V) { return !V || isNullConstantOrUndef(V); });
7317 SDValue Op0 = Sequence[0];
7321 SDValue BOperand = Op0.getOpcode() == ISD::BITCAST
7333 SDValue Bcst = DAG.getNode(X86ISD::VBROADCASTM, dl, BcstVT, BOperand);
7353 return SDValue();
7362 SDValue CP = DAG.getConstantPool(C, PVT);
7367 SDValue Ops[] = {DAG.getEntryNode(), CP};
7370 SDValue Brdcst =
7378 SDValue VCP = DAG.getConstantPool(VecC, PVT);
7383 SDValue Ops[] = {DAG.getEntryNode(), VCP};
7398 return SDValue();
7401 return SDValue();
7414 return SDValue();
7452 SDValue CP =
7457 SDValue Ops[] = {DAG.getEntryNode(), CP};
7472 return SDValue();
7476 return SDValue();
7482 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
7483 SDValue BCast =
7486 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
7496 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
7497 SDValue BCast =
7500 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
7508 return SDValue();
7516 static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
7517 SDValue ExtIdx) {
7533 SDValue ShuffleVec = SVOp->getOperand(0);
7546 static SDValue buildFromShuffleMostly(SDValue Op, const SDLoc &DL,
7553 return SDValue();
7556 SDValue VecIn1;
7557 SDValue VecIn2;
7570 return SDValue();
7576 SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
7577 SDValue ExtIdx = Op.getOperand(i).getOperand(1);
7581 return SDValue();
7586 return SDValue();
7595 return SDValue();
7605 return SDValue();
7608 SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);
7618 static SDValue LowerBUILD_VECTORvXbf16(SDValue Op, SelectionDAG &DAG,
7623 SmallVector<SDValue, 16> NewOps;
7627 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
7632 static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl,
7649 SDValue In = Op.getOperand(idx);
7670 SDValue Cond = Op.getOperand(SplatIdx);
7678 SDValue Select = DAG.getSelect(dl, MVT::i32, Cond,
7685 SDValue Select = DAG.getSelect(dl, ImmVT, Cond,
7696 SDValue DstVec;
7699 SDValue ImmL = DAG.getConstant(Lo_32(Immediate), dl, MVT::i32);
7700 SDValue ImmH = DAG.getConstant(Hi_32(Immediate), dl, MVT::i32);
7706 SDValue Imm = DAG.getConstant(Immediate, dl, ImmVT);
7759 SDValue &V0, SDValue &V1) {
7775 SDValue Op = N->getOperand(i + BaseIdx);
7791 SDValue Op0 = Op.getOperand(0);
7792 SDValue Op1 = Op.getOperand(1);
7823 SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
7870 static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
7879 SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
7880 SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
7881 SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
7882 SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
7885 SDValue LO = DAG.getUNDEF(NewVT);
7886 SDValue HI = DAG.getUNDEF(NewVT);
7913 SDValue &Opnd0, SDValue &Opnd1,
7922 SDValue InVec0 = DAG.getUNDEF(VT);
7923 SDValue InVec1 = DAG.getUNDEF(VT);
7933 SDValue Op = BV->getOperand(i);
7944 SDValue Op0 = Op.getOperand(0);
7945 SDValue Op1 = Op.getOperand(1);
8040 SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2,
8066 static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
8070 SDValue Opnd0, Opnd1;
8075 return SDValue();
8080 SDValue Opnd2;
8088 return SDValue();
8098 SDValue Sub = DAG.getNode(ISD::FSUB, DL, VT, Opnd0, Opnd1);
8099 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, Opnd0, Opnd1);
8107 unsigned &HOpcode, SDValue &V0, SDValue &V1) {
8125 SDValue Op = BV->getOperand(i * NumEltsIn128Bits + j);
8147 SDValue Op0 = Op.getOperand(0);
8148 SDValue Op1 = Op.getOperand(1);
8166 SDValue SourceVec = (j < NumEltsIn64Bits) ? V0 : V1;
8195 static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
8197 unsigned HOpcode, SDValue V0, SDValue V1) {
8225 SDValue Half = DAG.getNode(HOpcode, DL, HalfVT, V0, V1);
8233 static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL,
8238 count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); });
8240 return SDValue();
8251 SDValue V0, V1;
8258 return SDValue();
8273 SDValue InVec0, InVec1;
8275 SDValue InVec2, InVec3;
8299 return SDValue();
8304 SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0;
8305 SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1;
8330 return SDValue();
8335 return SDValue();
8345 return SDValue();
8348 static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
8357 static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, const SDLoc &DL,
8369 return SDValue();
8375 return SDValue();
8387 return SDValue();
8389 return SDValue();
8393 SmallVector<SDValue, 4> LHSElts, RHSElts;
8394 for (SDValue Elt : Op->ops()) {
8395 SDValue LHS = Elt.getOperand(0);
8396 SDValue RHS = Elt.getOperand(1);
8400 return SDValue();
8405 return SDValue();
8416 if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; }))
8417 return SDValue();
8419 SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
8420 SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
8421 SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS);
8434 static SDValue materializeVectorConstant(SDValue Op, const SDLoc &DL,
8453 return SDValue();
8459 static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
8494 SDValue NewSrcVec =
8498 return SDValue();
8503 return SDValue();
8506 auto ScaleIndices = [&DAG](SDValue Idx, uint64_t Scale) {
8577 SDValue LoSrc = extract128BitVector(SrcVec, 0, DAG, DL);
8578 SDValue HiSrc = extract128BitVector(SrcVec, 16, DAG, DL);
8579 SDValue LoIdx = extract128BitVector(IndicesVec, 0, DAG, DL);
8580 SDValue HiIdx = extract128BitVector(IndicesVec, 16, DAG, DL);
8586 SDValue Lo = extract128BitVector(SrcVec, 0, DAG, DL);
8587 SDValue Hi = extract128BitVector(SrcVec, 16, DAG, DL);
8588 SDValue LoLo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Lo);
8589 SDValue HiHi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Hi, Hi);
8591 ArrayRef<SDValue> Ops) {
8595 SDValue Idx = Ops[2];
8602 SDValue Ops[] = {LoLo, HiHi, IndicesVec};
8625 SDValue LoLo = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
8627 SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
8635 SDValue Res = DAG.getSelectCC(
8652 SDValue Res = createVariablePermute(WidenSrcVT, SrcVec, IndicesVec, DL,
8659 SDValue LoLo =
8661 SDValue HiHi =
8671 SDValue Res = DAG.getSelectCC(
8696 return SDValue();
8710 SDValue Res = Opcode == X86ISD::VPERMV
8727 static SDValue
8728 LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL,
8731 SDValue SrcVec, IndicesVec;
8736 SDValue Op = V.getOperand(Idx);
8738 return SDValue();
8746 return SDValue();
8747 SDValue ExtractedIndex = Op->getOperand(1);
8753 return SDValue();
8761 return SDValue();
8765 return SDValue();
8772 SDValue
8773 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
8789 if (SDValue VectorCst = materializeVectorConstant(Op, dl, DAG, Subtarget))
8799 SmallSet<SDValue, 8> Values;
8802 SDValue Elt = Op.getOperand(i);
8844 SmallVector<SDValue, 16> Elts(NumElems, DAG.getUNDEF(OpEltVT));
8856 SDValue EltsBV = DAG.getBuildVector(VT, dl, Elts);
8857 SDValue FrozenUndefElt = DAG.getFreeze(DAG.getUNDEF(OpEltVT));
8858 SDValue FrozenUndefBV = DAG.getSplatBuildVector(VT, dl, FrozenUndefElt);
8879 SDValue NewBV =
8885 if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, dl, Subtarget, DAG))
8887 if (SDValue HorizontalOp = LowerToHorizontalOp(BV, dl, Subtarget, DAG))
8889 if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, dl, Subtarget, DAG))
8891 if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG))
8913 SDValue VarElt;
8914 SDValue InsIndex;
8916 SDValue Elt = Op.getOperand(i);
8929 SDValue DAGConstVec = DAG.getConstantPool(CV, VT);
8937 SDValue LegalDAGConstVec = LowerConstantPool(DAGConstVec, DAG);
8940 SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI);
8954 SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt);
8961 SDValue Item = Op.getOperand(Idx);
9006 return SDValue();
9027 SDValue Item = Op.getOperand(Idx);
9031 return SDValue();
9037 return SDValue();
9039 if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, dl, DAG, Subtarget))
9044 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
9045 if (SDValue LD =
9054 SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
9056 auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef<SDValue> Ops) {
9067 SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2),
9082 SDValue Lower =
9084 SDValue Upper = DAG.getBuildVector(
9096 SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
9100 return SDValue();
9105 if (SDValue V = LowerBuildVectorv16i8(Op, dl, NonZeroMask, NumNonZero,
9110 if (SDValue V = LowerBuildVectorv8i16(Op, dl, NonZeroMask, NumNonZero,
9116 if (SDValue V = LowerBuildVectorv4x32(Op, dl, DAG, Subtarget))
9121 SmallVector<SDValue, 8> Ops(NumElems);
9162 if (SDValue Sh = buildFromShuffleMostly(Op, dl, DAG))
9167 SDValue Result;
9184 SmallVector<SDValue, 8> Ops(NumElems);
9214 static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
9228 SDValue SubVec = Op.getOperand(i);
9251 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
9253 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
9259 SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl)
9281 static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
9294 SDValue SubVec = Op.getOperand(i);
9312 SDValue SubVec = Op.getOperand(Idx);
9324 SDValue Vec = Zeros ? DAG.getConstant(0, dl, ResVT) : DAG.getUNDEF(ResVT);
9328 SDValue SubVec = Op.getOperand(Idx);
9337 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
9339 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
9349 SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT,
9356 static SDValue LowerCONCAT_VECTORS(SDValue Op,
9562 static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp,
9623 SDValue V1 = SDValue(),
9624 SDValue V2 = SDValue()) {
9634 SDValue MaskV = MaskIdx < Size ? V1 : V2;
9635 SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
9657 SDValue V1 = SDValue(),
9658 SDValue V2 = SDValue()) {
9673 V1 = SDValue();
9676 V2 = SDValue();
9690 SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
9700 SDValue MaskV = MaskIdx < Size ? V1 : V2;
9701 SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
9798 static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
9836 static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
9837 ArrayRef<int> Mask, SDValue V1,
9838 SDValue V2, const APInt &Zeroable,
9850 SmallVector<SDValue, 64> PSHUFBMask(NumBytes);
9852 SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);
9854 SDValue V;
9867 SDValue SrcV = (M >= Size ? V2 : V1);
9869 return SDValue();
9875 return SDValue();
9889 static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
9894 static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT,
9896 ArrayRef<int> Mask, SDValue &V1,
9897 SDValue &V2, SelectionDAG &DAG,
9902 return SDValue();
9906 SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType);
9910 SDValue VMask = getMaskNode(MaskNode, MVT::getVectorVT(MVT::i1, NumElts),
9912 SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL);
9913 SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1;
9917 static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
10006 static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT,
10007 ArrayRef<int> Mask, SDValue V1, SDValue V2,
10028 return SDValue();
10033 static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT,
10034 ArrayRef<int> Mask, SDValue V1,
10035 SDValue V2, SelectionDAG &DAG) {
10046 return SDValue();
10097 static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src,
10107 return SDValue();
10115 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src);
10121 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src);
10129 SDValue NewSrc = widenSubVector(Src, ZeroUppers, Subtarget, DAG, DL, 512);
10135 SDValue Trunc = DAG.getNode(X86ISD::VTRUNC, DL, TruncVT, Src);
10158 static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1,
10159 SDValue V2, ArrayRef<int> Mask,
10165 return SDValue();
10180 SDValue Src = peekThroughBitcasts(V1);
10192 return SDValue();
10194 return SDValue();
10198 return SDValue();
10204 return SDValue();
10208 static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
10209 SDValue V2, ArrayRef<int> Mask,
10216 return SDValue();
10246 auto IsCheapConcat = [&](SDValue Lo, SDValue Hi) {
10266 SDValue Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
10283 return SDValue();
10362 static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2,
10372 auto MatchPACK = [&](SDValue N1, SDValue N2, MVT PackVT) {
10433 static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
10434 SDValue V1, SDValue V2, SelectionDAG &DAG,
10443 return SDValue();
10450 return SDValue();
10460 SDValue Res;
10482 static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
10483 SDValue V2, ArrayRef<int> Mask,
10489 SDValue Zero, AllOnes;
10509 SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
10510 SDValue V;
10515 return SDValue(); // Not a blend.
10519 return SDValue(); // Can only let one input through the mask.
10524 return SDValue(); // No non-zeroable elements!
10526 SDValue VMask = DAG.getBuildVector(MaskVT, DL, VMaskOps);
10529 SDValue And = DAG.getNode(ISD::AND, DL, LogicVT, V, VMask);
10538 static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
10539 SDValue V2, ArrayRef<int> Mask,
10543 SDValue Zero = DAG.getConstant(0, DL, EltVT);
10544 SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
10545 SmallVector<SDValue, 16> MaskOps;
10548 return SDValue(); // Shuffled input!
10552 SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps);
10556 static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
10557 SDValue PreservedSrc,
10561 static bool matchShuffleAsBlend(MVT VT, SDValue V1, SDValue V2,
10645 static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
10646 SDValue V2, ArrayRef<int> Original,
10655 return SDValue();
10701 SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
10703 SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
10718 if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
10724 SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
10730 if (SDValue BitBlend =
10761 SmallVector<SDValue, 32> VSELECTMask;
10785 if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
10793 SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
10806 static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
10807 SDValue V1, SDValue V2,
10825 return SDValue(); // Can't blend in the needed input!
10834 return SDValue();
10836 SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
10845 static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
10846 SDValue V1, SDValue V2,
10855 SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
10865 SDValue &Op = Ops[Elt & 1];
10872 return SDValue();
10888 return SDValue();
10915 SDValue Unpck = DAG.getNode(UnpckOp, DL, VT, Ops);
10928 static SDValue lowerShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
10929 SDValue V1, SDValue V2,
10938 return SDValue();
10961 return SDValue();
10974 return SDValue();
10996 if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize))
11003 return SDValue();
11034 return SDValue();
11039 static SDValue lowerShuffleAsByteRotateAndPermute(
11040 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
11045 return SDValue();
11049 return SDValue();
11088 return SDValue();
11091 return SDValue();
11094 auto RotateAndPermute = [&](SDValue Lo, SDValue Hi, int RotAmt, int Ofs) {
11096 SDValue Rotate = DAG.getBitcast(
11120 return SDValue();
11156 static SDValue lowerShuffleAsDecomposedShuffleMerge(
11157 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
11186 &DAG](SDValue &Input,
11221 if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
11232 if (SDValue UnpackPerm =
11235 if (SDValue RotatePerm = lowerShuffleAsByteRotateAndPermute(
11239 if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
11243 if (SDValue PermUnpack = lowerShuffleAsPermuteAndUnpack(
11294 static SDValue lowerShuffleAsBitRotate(const SDLoc &DL, MVT VT, SDValue V1,
11303 return SDValue();
11309 return SDValue();
11316 return SDValue();
11321 SDValue SHL = DAG.getNode(X86ISD::VSHLI, DL, RotateVT, V1,
11323 SDValue SRL = DAG.getNode(X86ISD::VSRLI, DL, RotateVT, V1,
11325 SDValue Rot = DAG.getNode(ISD::OR, DL, RotateVT, SHL, SRL);
11329 SDValue Rot =
11338 static int matchShuffleAsElementRotate(SDValue &V1, SDValue &V2,
11350 SDValue Lo, Hi;
11376 SDValue MaskV = M < NumElts ? V1 : V2;
11381 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
11423 static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
11445 static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
11446 SDValue V2, ArrayRef<int> Mask,
11451 SDValue Lo = V1, Hi = V2;
11454 return SDValue();
11482 SDValue LoShift =
11485 SDValue HiShift =
11502 static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1,
11503 SDValue V2, ArrayRef<int> Mask,
11514 SDValue Lo = V1, Hi = V2;
11529 return SDValue();
11532 SDValue Src = Mask[ZeroLo] < (int)NumElts ? V1 : V2;
11541 SDValue Src = Mask[0] < (int)NumElts ? V1 : V2;
11549 return SDValue();
11553 static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1,
11554 SDValue V2, ArrayRef<int> Mask,
11566 return SDValue();
11571 return SDValue();
11577 return SDValue();
11579 SDValue Res = Mask[ZeroLo] < (int)NumElts ? V1 : V2;
11612 return SDValue();
11702 static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
11703 SDValue V2, ArrayRef<int> Mask,
11711 SDValue V = V1;
11726 return SDValue();
11729 return SDValue();
11741 static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
11762 SDValue Src;
11768 SDValue &V = (M < Size ? V1 : V2);
11797 static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,
11809 SDValue Base;
11825 SDValue Insert;
11863 static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
11864 SDValue V2, ArrayRef<int> Mask,
11878 return SDValue();
11890 static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
11891 const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
11911 auto ShuffleOffset = [&](SDValue V) {
11929 return SDValue();
11973 SDValue Lo = DAG.getBitcast(
11982 SDValue Hi = DAG.getBitcast(
11995 SDValue PSHUFBMask[16];
12031 SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
12054 static SDValue lowerShuffleAsZeroOrAnyExtend(
12055 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
12068 auto Lower = [&](int Scale) -> SDValue {
12069 SDValue InputV;
12080 return SDValue();
12089 SDValue V = M < NumElements ? V1 : V2;
12095 return SDValue(); // Flip-flopping inputs.
12102 return SDValue();
12107 return SDValue();
12110 return SDValue(); // Non-consecutive strided elements.
12118 return SDValue();
12123 return SDValue();
12139 if (SDValue V = Lower(NumElements / NumExtElements))
12145 return SDValue();
12152 return SDValue();
12157 return SDValue();
12160 if (SDValue V = CanZExtLowHalf()) {
12167 return SDValue();
12173 static SDValue getScalarValueForVectorElement(SDValue V, int Idx,
12183 return SDValue();
12189 SDValue S = V.getOperand(Idx);
12194 return SDValue();
12201 static bool isShuffleFoldableLoad(SDValue V) {
12216 static SDValue lowerShuffleAsElementInsertion(
12217 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
12226 return SDValue();
12244 return SDValue();
12252 SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
12262 return SDValue();
12273 SDValue BitMask = getConstVector(Bits, VT, DAG, DL);
12285 return SDValue();
12293 return SDValue();
12295 return SDValue();
12312 return SDValue();
12342 static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0,
12357 return SDValue();
12364 return SDValue();
12375 return SDValue();
12377 SDValue Scalar = V0.getOperand(V0BroadcastIdx);
12431 static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
12432 SDValue N1, ArrayRef<int> Mask,
12444 return SDValue();
12446 SDValue WideVec = N0.getOperand(0);
12449 return SDValue();
12460 return SDValue();
12467 return SDValue();
12473 SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT),
12485 static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
12486 SDValue V2, ArrayRef<int> Mask,
12493 return SDValue();
12506 return SDValue();
12516 SDValue V = V1;
12540 SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
12569 if (SDValue TruncBroadcast = lowerShuffleAsTruncBroadcast(
12581 return SDValue();
12590 SDValue BaseAddr = Ld->getOperand(1);
12594 SDValue NewAddr =
12602 SDValue Ops[] = {Ld->getChain(), NewAddr};
12617 return SDValue();
12623 return SDValue();
12627 return SDValue();
12631 return SDValue();
12679 static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2,
12690 auto matchAsInsertPS = [&](SDValue VA, SDValue VB,
12767 static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2,
12776 return SDValue();
12790 static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
12791 const APInt &Zeroable, SDValue V1, SDValue V2,
12800 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2f64, V1, V2,
12827 if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
12832 if (SDValue Insertion = lowerShuffleAsElementInsertion(
12839 if (SDValue Insertion = lowerShuffleAsElementInsertion(
12847 if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
12855 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
12860 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG))
12874 static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
12875 const APInt &Zeroable, SDValue V1, SDValue V2,
12884 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2i64, V1, V2,
12907 if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
12911 if (SDValue Shift =
12918 if (SDValue Insertion = lowerShuffleAsElementInsertion(
12924 if (SDValue Insertion = lowerShuffleAsElementInsertion(
12932 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
12937 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG))
12944 if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v2i64, V1, V2, Mask,
12948 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v2i64, V1, V2, Mask,
12974 static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
12975 ArrayRef<int> Mask, SDValue V1,
12976 SDValue V2, SelectionDAG &DAG) {
12977 SDValue LowV = V1, HighV = V2;
13066 static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
13067 const APInt &Zeroable, SDValue V1, SDValue V2,
13075 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
13083 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f32, V1, V2,
13118 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
13125 if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
13134 if (SDValue V = lowerShuffleAsElementInsertion(
13140 if (SDValue V = lowerShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG))
13144 if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1,
13159 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
13170 static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
13171 const APInt &Zeroable, SDValue V1, SDValue V2,
13181 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2, Mask,
13189 if (SDValue Shift =
13194 if (SDValue Rotate =
13202 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2,
13224 if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
13228 if (SDValue Shift =
13235 if (SDValue V = lowerShuffleAsElementInsertion(
13243 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
13247 if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
13252 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG))
13259 if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i32, V1, V2, Mask,
13263 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i32, V1, V2, Mask,
13279 if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1, V2,
13289 SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1);
13290 SDValue CastV2 = DAG.getBitcast(MVT::v4f32, V2);
13291 SDValue ShufPS = DAG.getVectorShuffle(MVT::v4f32, DL, CastV1, CastV2, Mask);
13311 static SDValue lowerV8I16GeneralSingleInputShuffle(
13312 const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
13805 static SDValue lowerShuffleAsBlendOfPSHUFBs(
13806 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
13815 SmallVector<SDValue, 64> V1Mask(NumBytes, DAG.getUNDEF(MVT::i8));
13816 SmallVector<SDValue, 64> V2Mask(NumBytes, DAG.getUNDEF(MVT::i8));
13846 SDValue V;
13868 static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
13869 const APInt &Zeroable, SDValue V1, SDValue V2,
13878 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i16, V1, V2, Mask,
13883 if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v8i16, V1, V2, Mask, Zeroable,
13891 if (SDValue Shift =
13897 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2,
13902 if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v8i16, V1, Mask,
13907 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
13911 if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
13916 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, Mask,
13931 if (SDValue Shift =
13938 if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
13944 if (SDValue V = lowerShuffleAsElementInsertion(
13952 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
13956 if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
13961 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
13965 if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
13970 if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v8i16, V1, V2, Mask, Zeroable,
13975 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V2, Mask,
13979 if (SDValue BitBlend =
13984 if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v8i16, V1, V2, Mask,
13997 SDValue V1V2 = concatSubVectors(V1, V2, DAG, DL);
14006 SmallVector<SDValue, 4> DWordClearOps(4,
14010 SDValue DWordClearMask =
14018 SDValue ShAmt = DAG.getTargetConstant(16, DL, MVT::i8);
14029 SDValue Result = DAG.getNode(PackOpc, DL, MVT::v8i16, V1, V2);
14053 if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2,
14072 static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
14073 const APInt &Zeroable, SDValue V1, SDValue V2,
14084 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f16, V1, V2,
14089 if (SDValue V = lowerShuffleAsElementInsertion(
14103 static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT,
14104 ArrayRef<int> Mask, SDValue V1, SDValue V2,
14108 SDValue MaskNode;
14128 SDValue Result;
14147 static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
14148 const APInt &Zeroable, SDValue V1, SDValue V2,
14156 if (SDValue Shift =
14162 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i8, V1, V2, Mask,
14167 if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG,
14172 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v16i8, V1, V2, Mask,
14177 if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v16i8, V1, V2, Mask, Zeroable,
14181 if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i8, V1, V2, Mask, Zeroable,
14187 if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
14196 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i8, V1, V2,
14201 if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v16i8, V1, Mask,
14205 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
14223 auto tryToWidenViaDuplication = [&]() -> SDValue {
14225 return SDValue();
14258 return SDValue();
14300 if (SDValue V = tryToWidenViaDuplication())
14304 if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
14309 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
14313 if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v16i8, V1, V2, Mask,
14341 SDValue PSHUFB = lowerShuffleAsBlendOfPSHUFBs(
14349 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask,
14361 if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(
14372 SDValue MaskNode = getConstVector(Mask, MVT::v16i8, DAG, DL, true);
14378 if (SDValue V = lowerShuffleAsByteRotateAndPermute(
14388 if (SDValue V = lowerShuffleAsElementInsertion(
14392 if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
14410 SmallVector<SDValue, 8> WordClearOps(8, DAG.getConstant(0, DL, MVT::i16));
14413 SDValue WordClearMask = DAG.getBuildVector(MVT::v8i16, DL, WordClearOps);
14421 SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1,
14451 SDValue V = V1;
14459 SDValue VLoHalf, VHiHalf;
14483 SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL);
14491 SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
14492 SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask);
14501 static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
14502 MVT VT, SDValue V1, SDValue V2,
14539 static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
14540 SDValue V2, ArrayRef<int> Mask,
14557 auto SplitVector = [&](SDValue V) {
14558 SDValue LoV, HiV;
14564 SDValue LoV1, HiV1, LoV2, HiV2;
14630 SDValue V1Blend, V2Blend;
14653 return SDValue();
14655 SDValue Lo = HalfBlend(LoMask);
14656 SDValue Hi = HalfBlend(HiMask);
14668 static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,
14669 SDValue V2, ArrayRef<int> Mask,
14722 static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT,
14723 SDValue V1, SDValue V2,
14744 SDValue LHS = DAG.getVectorShuffle(VT, DL, V1, V2, LHSMask);
14745 SDValue RHS = DAG.getVectorShuffle(VT, DL, V1, V2, RHSMask);
14758 static SDValue lowerShuffleAsLanePermuteAndPermute(
14759 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
14771 auto getSublanePermute = [&](int NumSublanes) -> SDValue {
14804 return SDValue();
14826 return SDValue();
14833 return SDValue();
14835 SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask);
14841 if (SDValue V = getSublanePermute(/*NumSublanes=*/NumLanes))
14846 return SDValue();
14849 if (SDValue V = getSublanePermute(/*NumSublanes=*/NumLanes * 2))
14855 return SDValue();
14881 static SDValue lowerShuffleAsLanePermuteAndShuffle(
14882 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
14932 SDValue Flipped = DAG.getBitcast(PVT, V1);
14940 static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
14941 SDValue V2, ArrayRef<int> Mask,
14954 if (SDValue BcstLd = getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, DL,
14961 return SDValue();
14968 return SDValue();
14976 SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
14988 if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable,
15004 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
15060 static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
15061 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
15066 return SDValue();
15094 return SDValue();
15144 return SDValue();
15164 return SDValue();
15168 return SDValue();
15174 return SDValue();
15187 SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
15193 return SDValue();
15204 SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
15210 return SDValue();
15284 static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
15298 SDValue V = (HalfIdx < 2 ? V1 : V2);
15305 SDValue Half1 = getHalfVector(HalfIdx1);
15306 SDValue Half2 = getHalfVector(HalfIdx2);
15307 SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
15309 SDValue Op0 = V;
15310 SDValue Op1 = DAG.getUNDEF(HalfVT);
15324 static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1,
15325 SDValue V2, ArrayRef<int> Mask,
15333 return SDValue();
15344 SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
15354 SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
15363 return SDValue();
15392 return SDValue();
15398 return SDValue();
15402 return SDValue();
15410 return SDValue();
15418 return SDValue();
15421 return SDValue();
15428 return SDValue();
15435 static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
15436 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
15474 SDValue RepeatShuf = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatMask);
15485 return SDValue();
15494 return SDValue();
15498 return SDValue();
15526 return SDValue();
15572 return SDValue();
15604 return SDValue();
15606 SDValue RepeatedShuffle =
15628 if (SDValue Shuffle = ShuffleSubLanes(Scale))
15631 return SDValue();
15634 static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
15679 static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1,
15680 SDValue V2, ArrayRef<int> Mask,
15691 return SDValue();
15706 static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT,
15707 SDValue V1, SDValue V2,
15715 return SDValue();
15719 return SDValue();
15729 SDValue Unpack = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2,
15754 static SDValue lowerShufflePairAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
15755 SDValue V1, SDValue V2,
15760 return SDValue();
15780 return SDValue();
15790 return SDValue();
15805 return SDValue();
15809 SDValue Unpckl = DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
15810 SDValue Unpckh = DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
15811 SDValue Perm1 = DAG.getNode(X86ISD::VPERM2X128, DL, VT, Unpckl, Unpckh,
15813 SDValue Perm2 = DAG.getNode(X86ISD::VPERM2X128, DL, VT, Unpckl, Unpckh,
15827 static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
15828 const APInt &Zeroable, SDValue V1, SDValue V2,
15835 if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4f64, V1, V2, Mask, Zeroable,
15841 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f64, V1, V2,
15865 if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
15870 if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v4f64, V1, V2,
15880 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG))
15883 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
15888 if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask,
15913 if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
15922 if (SDValue V = lowerShuffleAsLanePermuteAndRepeatedMask(
15928 if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, V1, V2,
15947 static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
15948 const APInt &Zeroable, SDValue V1, SDValue V2,
15956 if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4i64, V1, V2, Mask, Zeroable,
15960 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
15965 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask,
15971 if (SDValue Shift =
15997 if (SDValue Shift =
16004 if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i64, V1, V2, Mask,
16008 if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2,
16014 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, Mask,
16019 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
16033 if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
16038 if (SDValue V =
16047 if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
16060 static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
16061 const APInt &Zeroable, SDValue V1, SDValue V2,
16068 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
16073 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask,
16082 if (SDValue R = splitAndLowerShuffle(DL, MVT::v8f32, V1, V2, Mask, DAG,
16086 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2, Mask,
16108 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG))
16118 if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
16126 SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
16130 SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
16140 if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
16146 if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, V1, V2,
16156 if (SDValue V = lowerShufflePairAsUNPCKAndPermute(DL, MVT::v8f32, V1, V2,
16182 static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
16183 const APInt &Zeroable, SDValue V1, SDValue V2,
16196 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2, Mask,
16203 if (SDValue V = lowerShufflePairAsUNPCKAndPermute(DL, MVT::v8i32, V1, V2,
16215 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
16220 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask,
16226 if (SDValue Shift =
16231 if (SDValue Rotate =
16249 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG))
16254 if (SDValue Shift =
16260 if (SDValue Rotate =
16266 if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i32, V1, V2, Mask,
16270 if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2,
16276 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i32, V1, V2, Mask,
16282 if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
16289 if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v8i32, Mask, V1, V2, DAG))
16294 SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
16302 SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1);
16303 SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2);
16304 SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask,
16311 if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
16324 static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
16325 const APInt &Zeroable, SDValue V1, SDValue V2,
16336 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
16341 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i16, V1, V2, Mask,
16345 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
16350 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG))
16354 if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG,
16359 if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i16, V1, V2, Mask, Zeroable,
16364 if (SDValue Shift =
16370 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i16, V1, V2, Mask,
16376 if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
16382 if (SDValue Rotate =
16388 if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v16i16, Mask, V1, V2, DAG))
16394 if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
16412 if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v16i16, Mask, V1, V2,
16422 if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
16427 if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
16434 if (SDValue V = lowerShufflePairAsUNPCKAndPermute(DL, MVT::v16i16, V1, V2,
16447 static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
16448 const APInt &Zeroable, SDValue V1, SDValue V2,
16459 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2, Mask,
16464 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask,
16468 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
16473 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG))
16477 if (SDValue V = lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG,
16482 if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v32i8, V1, V2, Mask, Zeroable,
16487 if (SDValue Shift =
16493 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i8, V1, V2, Mask,
16499 if (SDValue Rotate =
16505 if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
16514 if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v32i8, Mask, V1, V2, DAG))
16517 if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
16525 if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2,
16535 if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
16540 if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
16548 if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2,
16555 if (SDValue V = lowerShufflePairAsUNPCKAndPermute(DL, MVT::v32i8, V1, V2,
16569 static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
16570 SDValue V1, SDValue V2, const APInt &Zeroable,
16579 if (SDValue Insertion = lowerShuffleAsElementInsertion(
16584 if (SDValue V =
16599 if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
16602 if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
16641 static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
16642 const APInt &Zeroable, SDValue V1, SDValue V2,
16655 return SDValue();
16663 SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
16676 SDValue SubVec =
16708 SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
16724 SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
16732 SDValue Op = Widened128Mask[i] >= 4 ? V2 : V1;
16737 return SDValue();
16747 static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
16748 const APInt &Zeroable, SDValue V1, SDValue V2,
16777 if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8f64, Mask, Zeroable, V1,
16781 if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
16785 if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, V1, V2, Mask,
16789 if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2,
16793 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
16801 static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
16802 const APInt &Zeroable, SDValue V1, SDValue V2,
16826 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
16829 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
16837 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
16841 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
16847 if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
16855 SDValue VPermMask = getConstVector(Mask, MVT::v16i32, DAG, DL, true);
16860 if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
16868 static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
16869 const APInt &Zeroable, SDValue V1, SDValue V2,
16878 if (SDValue Shift =
16904 if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8i64, Mask, Zeroable, V1,
16909 if (SDValue Shift =
16915 if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i64, V1, V2, Mask,
16921 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask,
16925 if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
16929 if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2,
16933 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
16941 static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
16942 const APInt &Zeroable, SDValue V1, SDValue V2,
16954 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
16960 if (SDValue Shift =
16965 if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v16i32, V1, Mask,
16983 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
16988 if (SDValue Shift =
16994 if (SDValue Rotate =
16999 if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v16i32, V1, V2, Mask,
17005 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i32, V1, V2, Mask,
17012 SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1);
17013 SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2);
17014 SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,
17021 if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
17026 if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2,
17030 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
17038 static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
17039 const APInt &Zeroable, SDValue V1, SDValue V2,
17050 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
17055 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG))
17059 if (SDValue V =
17064 if (SDValue Shift =
17070 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i16, V1, V2, Mask,
17076 if (SDValue Rotate =
17090 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
17094 if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2,
17102 static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
17103 const APInt &Zeroable, SDValue V1, SDValue V2,
17114 if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
17119 if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
17123 if (SDValue V = lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG,
17128 if (SDValue Shift =
17134 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v64i8, V1, V2, Mask,
17140 if (SDValue Rotate =
17145 if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v64i8, V1, V2, Mask,
17149 if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1, V2,
17155 if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
17159 if (SDValue Result = lowerShuffleAsLanePermuteAndPermute(
17163 if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
17170 if (SDValue V = lowerShuffleAsByteRotateAndPermute(DL, MVT::v64i8, V1, V2,
17184 if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
17200 static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
17201 MVT VT, SDValue V1, SDValue V2,
17214 if (SDValue Insertion = lowerShuffleAsElementInsertion(
17219 if (SDValue V =
17224 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
17231 if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
17234 if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
17274 static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef<int> Mask,
17275 MVT VT, SDValue V1, SDValue V2,
17280 return SDValue();
17296 return SDValue();
17300 return SDValue();
17305 SDValue Res = widenMaskVector(V1, false, Subtarget, DAG, DL);
17349 static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
17350 MVT VT, SDValue V1, SDValue V2,
17385 SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT,
17394 if (SDValue Shift = lower1BitShuffleAsKSHIFTR(DL, Mask, VT, V1, V2, Subtarget,
17400 for (SDValue V : { V1, V2 }) {
17404 SDValue Res = widenMaskVector(V, false, Subtarget, DAG, DL);
17428 SDValue Op0 = V1.getOperand(0);
17429 SDValue Op1 = V1.getOperand(1);
17468 return SDValue();
17476 SDValue Shuffle = DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask);
17551 static bool canCombineAsMaskOperation(SDValue V,
17569 auto HasMaskOperation = [&](SDValue V) {
17604 static SDValue canonicalizeShuffleMaskWithHorizOp(
17605 MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask,
17616 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
17620 SDValue V1 = Op.getOperand(0);
17621 SDValue V2 = Op.getOperand(1);
17685 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask,
17720 SmallVector<SDValue> Ops = {V1, V2};
17725 if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
17758 static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
17761 SDValue Cond = Op.getOperand(0);
17762 SDValue LHS = Op.getOperand(1);
17763 SDValue RHS = Op.getOperand(2);
17774 return SDValue();
17777 SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
17778 SDValue Cond = Op.getOperand(0);
17779 SDValue LHS = Op.getOperand(1);
17780 SDValue RHS = Op.getOperand(2);
17796 return SDValue();
17800 if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG))
17812 return SDValue();
17819 return SDValue();
17827 SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond,
17838 return SDValue();
17875 return SDValue();
17884 SDValue Select = DAG.getNode(ISD::VSELECT, dl, CastVT, Cond, LHS, RHS);
17890 static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
17892 SDValue Vec = Op.getOperand(0);
17893 SDValue Idx = Op.getOperand(1);
17898 return SDValue();
17910 SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec,
17922 return SDValue();
17927 return SDValue();
17928 SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
17936 return SDValue();
17941 static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
17943 SDValue Vec = Op.getOperand(0);
17946 SDValue Idx = Op.getOperand(1);
17966 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec);
17967 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx);
18020 SDValue
18021 X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
18024 SDValue Vec = Op.getOperand(0);
18026 SDValue Idx = Op.getOperand(1);
18063 return SDValue();
18102 SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Vec,
18108 if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
18121 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
18133 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
18172 return SDValue();
18177 static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
18180 SDValue Vec = Op.getOperand(0);
18181 SDValue Elt = Op.getOperand(1);
18182 SDValue Idx = Op.getOperand(2);
18191 SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
18198 SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Elt);
18202 SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
18213 SDValue N0 = Op.getOperand(0);
18214 SDValue N1 = Op.getOperand(1);
18215 SDValue N2 = Op.getOperand(2);
18220 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVT,
18233 return SDValue();
18238 return SDValue();
18240 SDValue IdxExt = DAG.getZExtOrTrunc(N2, dl, IdxSVT);
18241 SDValue IdxSplat = DAG.getSplatBuildVector(IdxVT, dl, IdxExt);
18242 SDValue EltSplat = DAG.getSplatBuildVector(VT, dl, N1);
18244 SmallVector<SDValue, 16> RawIndices;
18247 SDValue Indices = DAG.getBuildVector(IdxVT, dl, RawIndices);
18255 return SDValue();
18267 SDValue ZeroCst = DAG.getConstant(0, dl, VT.getScalarType());
18268 SDValue OnesCst = DAG.getAllOnesConstant(dl, VT.getScalarType());
18269 SmallVector<SDValue, 8> CstVectorElts(NumElts, ZeroCst);
18271 SDValue CstVector = DAG.getBuildVector(VT, dl, CstVectorElts);
18281 SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)
18298 SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
18315 SDValue N1SplatVec = DAG.getSplatBuildVector(VT, dl, N1);
18323 SDValue V = extract128BitVector(N0, IdxVal, DAG, dl);
18410 return SDValue();
18413 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
18444 SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
18452 static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
18459 static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
18465 SDValue Vec = Op.getOperand(0);
18508 SDValue
18509 X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
18517 SDValue Result = DAG.getTargetConstantPool(
18532 SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
18540 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
18554 SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op,
18559 SDValue
18560 X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
18568 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);
18583 SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
18610 SDValue Result;
18657 SDValue
18658 X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
18662 static SDValue
18663 GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
18664 SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
18669 SDValue TGA;
18676 return SDValue(*UI->use_begin()->use_begin(), 0);
18687 SDValue Ops[] = { Chain, TGA, *InGlue };
18690 SDValue Ops[] = { Chain, TGA };
18698 SDValue Glue = Chain.getValue(1);
18699 SDValue Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
18708 SDValue Offset =
18715 static SDValue
18718 SDValue InGlue;
18720 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
18729 static SDValue
18737 static SDValue
18744 static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
18754 SDValue Base;
18760 SDValue InGlue;
18761 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
18774 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
18777 SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
18784 static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
18793 SDValue ThreadPointer =
18817 SDValue TGA =
18820 SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
18838 SDValue
18839 X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
18887 SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
18890 SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result);
18900 SDValue Chain = DAG.getEntryNode();
18903 SDValue Args[] = { Chain, Offset };
18930 SDValue Chain = DAG.getEntryNode();
18939 SDValue TlsArray = Subtarget.is64Bit()
18945 SDValue ThreadPointer =
18948 SDValue res;
18953 SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT);
18961 SDValue Scale =
18971 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
18974 SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
19011 static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
19012 SDValue Lo, Hi;
19019 static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, const SDLoc &dl,
19029 SDValue Src = Op.getOperand(OpNo);
19035 return SDValue();
19044 SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src);
19046 SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other},
19048 SDValue Chain = CvtVec.getValue(1);
19049 SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
19054 SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec);
19061 static SDValue LowerI64IntToFP16(SDValue Op, const SDLoc &dl, SelectionDAG &DAG,
19069 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
19074 return SDValue();
19080 SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
19082 SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {MVT::v2f16, MVT::Other},
19084 SDValue Chain = CvtVec.getValue(1);
19085 SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
19090 SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, MVT::v2f16, InVec);
19121 static SDValue vectorizeExtractedCast(SDValue Cast, const SDLoc &DL,
19126 SDValue Extract = Cast.getOperand(0);
19130 return SDValue();
19133 SDValue VecOp = Extract.getOperand(0);
19139 return SDValue();
19155 SDValue VCast = DAG.getNode(Cast.getOpcode(), DL, ToVT, VecOp);
19163 static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
19167 SDValue CastToInt = CastToFP.getOperand(0);
19170 return SDValue();
19173 SDValue X = CastToInt.getOperand(0);
19176 return SDValue();
19182 return SDValue();
19203 SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
19204 SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecSrcVT, X);
19205 SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX);
19206 SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecVT, VCastToInt);
19210 static SDValue lowerINT_TO_FP_vXi64(SDValue Op, const SDLoc &DL,
19215 SDValue Src = Op->getOperand(IsStrict ? 1 : 0);
19231 SDValue Tmp = IsStrict ? DAG.getConstant(0, DL, MVT::v8i64)
19235 SDValue Res, Chain;
19255 return SDValue();
19257 SDValue Zero = DAG.getConstant(0, DL, MVT::v4i64);
19258 SDValue One = DAG.getConstant(1, DL, MVT::v4i64);
19259 SDValue Sign = DAG.getNode(ISD::OR, DL, MVT::v4i64,
19262 SDValue IsNeg = DAG.getSetCC(DL, MVT::v4i64, Src, Zero, ISD::SETLT);
19263 SDValue SignSrc = DAG.getSelect(DL, MVT::v4i64, IsNeg, Sign, Src);
19264 SmallVector<SDValue, 4> SignCvts(4);
19265 SmallVector<SDValue, 4> Chains(4);
19267 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, SignSrc,
19278 SDValue SignCvt = DAG.getBuildVector(VT, DL, SignCvts);
19280 SDValue Slow, Chain;
19291 SDValue Cvt = DAG.getSelect(DL, MVT::v4f32, IsNeg, Slow, SignCvt);
19299 static SDValue promoteXINT_TO_FP(SDValue Op, const SDLoc &dl,
19302 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
19303 SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();
19307 SDValue Rnd = DAG.getIntPtrConstant(0, dl);
19338 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
19342 SDValue Src = Op.getOperand(OpNo);
19343 SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();
19356 if (SDValue Extract = vectorizeExtractedCast(Op, dl, DAG, Subtarget))
19359 if (SDValue R = lowerFPToIntToFP(Op, dl, DAG, Subtarget))
19378 return SDValue();
19393 if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, dl, DAG, Subtarget))
19395 if (SDValue V = LowerI64IntToFP16(Op, dl, DAG, Subtarget))
19400 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src);
19409 return SDValue();
19411 SDValue ValueToStore = Src;
19425 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
19427 std::pair<SDValue, SDValue> Tmp =
19436 std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(
19437 EVT DstVT, EVT SrcVT, const SDLoc &DL, SDValue Chain, SDValue Pointer,
19447 SDValue FILDOps[] = {Chain, Pointer};
19448 SDValue Result =
19459 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
19461 SDValue FSTOps[] = {Chain, Result, StackSlot};
19488 static SDValue LowerUINT_TO_FP_i64(SDValue Op, const SDLoc &dl,
19514 SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, Align(16));
19524 SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, Align(16));
19527 SDValue XR1 =
19529 SDValue CLod0 = DAG.getLoad(
19532 SDValue Unpck1 =
19535 SDValue CLod1 = DAG.getLoad(
19538 SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
19540 SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
19541 SDValue Result;
19547 SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
19556 static SDValue LowerUINT_TO_FP_i32(SDValue Op, const SDLoc &dl,
19561 SDValue Bias = DAG.getConstantFP(
19565 SDValue Load =
19572 SDValue Or = DAG.getNode(
19584 SDValue Chain = Op.getOperand(0);
19585 SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
19592 std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound(
19600 SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
19606 static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, const SDLoc &DL,
19610 return SDValue();
19614 SDValue N0 = Op.getOperand(IsStrict ? 1 : 0);
19621 return SDValue();
19625 SDValue Res = DAG.getNode(Op->getOpcode(), DL, {MVT::v4f64, MVT::Other},
19627 SDValue Chain = Res.getValue(1);
19646 SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i64, N0);
19647 SDValue VBias = DAG.getConstantFP(
19649 SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v2i64, ZExtIn,
19659 static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL,
19663 SDValue V = Op->getOperand(IsStrict ? 1 : 0);
19683 SDValue Tmp =
19687 SDValue Res, Chain;
19706 SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i64, V);
19711 SDValue CPIdx = DAG.getConstantPool(Bias, PtrVT, Align(8));
19713 SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
19714 SDValue VBias = DAG.getMemIntrinsicNode(
19719 SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v4i64, ZExtIn,
19746 return SDValue();
19756 SDValue VecCstLow = DAG.getConstant(0x4b000000, DL, VecIntVT);
19758 SDValue VecCstHigh = DAG.getConstant(0x53000000, DL, VecIntVT);
19761 SDValue VecCstShift = DAG.getConstant(16, DL, VecIntVT);
19762 SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift);
19764 SDValue Low, High;
19768 SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow);
19769 SDValue VecBitcast = DAG.getBitcast(VecI16VT, V);
19776 SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
19777 SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift);
19783 SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT);
19785 SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask);
19793 SDValue VecCstFSub = DAG.getConstantFP(
19800 SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
19803 SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low);
19806 SDValue FHigh = DAG.getNode(ISD::STRICT_FSUB, DL, {VecFloatVT, MVT::Other},
19812 SDValue FHigh =
19817 static SDValue lowerUINT_TO_FP_vec(SDValue Op, const SDLoc &dl, SelectionDAG &DAG,
19820 SDValue N0 = Op.getOperand(OpNo);
19837 SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
19841 SDValue Src = Op.getOperand(OpNo);
19846 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
19850 return SDValue();
19863 if (SDValue Extract = vectorizeExtractedCast(Op, dl, DAG, Subtarget))
19882 if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, dl, DAG, Subtarget))
19884 if (SDValue V = LowerI64IntToFP16(Op, dl, DAG, Subtarget))
19899 return SDValue();
19902 SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64, 8);
19908 SDValue OffsetSlot =
19910 SDValue Store1 = DAG.getStore(Chain, dl, Src, StackSlot, MPI, SlotAlign);
19911 SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
19913 std::pair<SDValue, SDValue> Tmp =
19922 SDValue ValueToStore = Src;
19929 SDValue Store =
19935 SDValue Ops[] = {Store, StackSlot};
19936 SDValue Fild =
19942 SDValue SignSet = DAG.getSetCC(
19948 SDValue FudgePtr =
19953 SDValue Zero = DAG.getIntPtrConstant(0, dl);
19954 SDValue Four = DAG.getIntPtrConstant(4, dl);
19955 SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero);
19959 SDValue Fudge = DAG.getExtLoad(
19972 SDValue Add =
19985 SDValue Add = DAG.getNode(Opc, dl, MVT::f80, Fild, Fudge);
19992 // just return an SDValue().
19996 SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
19998 SDValue &Chain) const {
20003 SDValue Value = Op.getOperand(IsStrict ? 1 : 0);
20010 return SDValue();
20037 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
20041 SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
20075 SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
20079 SDValue Cmp;
20099 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Cmp);
20100 SDValue Const63 = DAG.getConstant(63, DL, MVT::i8);
20103 SDValue FltOfs = DAG.getSelect(DL, TheVT, Cmp, ThreshVal,
20122 SDValue Ops[] = { Chain, StackSlot };
20135 SDValue Ops[] = { Chain, Value, StackSlot };
20136 SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL,
20140 SDValue Res = DAG.getLoad(Op.getValueType(), DL, FIST, StackSlot, MPI);
20150 static SDValue LowerAVXExtend(SDValue Op, const SDLoc &dl, SelectionDAG &DAG,
20153 SDValue In = Op.getOperand(0);
20194 SDValue OpLo = DAG.getNode(ExtendInVecOpc, dl, HalfVT, In);
20202 SDValue ZeroVec = DAG.getConstant(0, dl, InVT);
20203 SDValue Undef = DAG.getUNDEF(InVT);
20205 SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
20212 static SDValue SplitAndExtendv16i1(unsigned ExtOpc, MVT VT, SDValue In,
20215 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In,
20217 SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In,
20221 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i16, Lo, Hi);
20225 static SDValue LowerZERO_EXTEND_Mask(SDValue Op, const SDLoc &DL,
20229 SDValue In = Op->getOperand(0);
20237 SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In);
20263 SDValue One = DAG.getConstant(1, DL, WideVT);
20264 SDValue Zero = DAG.getConstant(0, DL, WideVT);
20266 SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero);
20282 static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
20284 SDValue In = Op.getOperand(0);
20300 static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
20309 return SDValue();
20319 return SDValue();
20345 SDValue LHS = DAG.getBitcast(InVT, In);
20346 SDValue RHS = Subtarget.hasAVX512() ? DAG.getUNDEF(InVT) : LHS;
20347 SDValue Res = DAG.getNode(Opcode, DL, OutVT, LHS, RHS);
20354 SDValue Lo, Hi;
20360 if (SDValue Res =
20373 SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi);
20382 SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi);
20406 SDValue Res =
20414 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
20422 static SDValue truncateVectorWithPACKUS(EVT DstVT, SDValue In, const SDLoc &DL,
20430 static SDValue truncateVectorWithPACKSS(EVT DstVT, SDValue In, const SDLoc &DL,
20442 static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
20443 SDValue In, const SDLoc &DL,
20448 return SDValue();
20459 return SDValue();
20470 return SDValue();
20477 return SDValue();
20481 return SDValue();
20507 return SDValue();
20526 return SDValue();
20532 static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
20541 return SDValue();
20546 SmallVector<SDValue> LowerOps;
20547 if (SDValue Lo = isUpperSubvectorUndef(In, DL, DAG)) {
20549 if (SDValue Res = LowerTruncateVecPackWithSignBits(DstHalfVT, Lo, DL,
20557 if (SDValue Src =
20561 return SDValue();
20566 static SDValue LowerTruncateVecPack(MVT DstVT, SDValue In, const SDLoc &DL,
20576 return SDValue();
20581 return SDValue();
20583 return SDValue();
20589 SmallVector<SDValue> LowerOps;
20590 if (SDValue Lo = isUpperSubvectorUndef(In, DL, DAG)) {
20592 if (SDValue Res = LowerTruncateVecPack(DstHalfVT, Lo, DL, Subtarget, DAG))
20610 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, In);
20614 return SDValue();
20617 static SDValue LowerTruncateVecI1(SDValue Op, const SDLoc &DL,
20621 SDValue In = Op.getOperand(0);
20657 SDValue Lo, Hi;
20696 SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
20699 SDValue In = Op.getOperand(0);
20714 SDValue Lo, Hi;
20728 if (SDValue SignPack =
20737 return SDValue();
20746 if (SDValue SignPack =
20779 SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
20781 SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
20820 static SDValue expandFP_TO_UINT_SSE(MVT VT, SDValue Src, const SDLoc &dl,
20829 SDValue Small = DAG.getNode(X86ISD::CVTTP2SI, dl, VT, Src);
20830 SDValue Big =
20845 SDValue Overflow = DAG.getNode(ISD::OR, dl, VT, Small, Big);
20849 SDValue IsOverflown =
20856 SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
20861 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
20862 SDValue Chain = IsStrict ? Op->getOperand(0) : SDValue();
20866 SDValue Res;
20898 SDValue Tmp = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v8f64)
20928 SDValue Tmp =
20930 SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
21002 SDValue Tmp =
21032 SDValue Tmp =
21058 return SDValue();
21060 SDValue Zero = DAG.getConstantFP(0.0, dl, MVT::v2f32);
21061 SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32,
21065 SDValue Chain = Tmp.getValue(1);
21072 SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
21092 return SDValue();
21110 SDValue FloatOffset = DAG.getNode(ISD::UINT_TO_FP, dl, SrcVT,
21117 SDValue Small =
21120 SDValue Big = DAG.getNode(
21131 SDValue IsOverflown = DAG.getNode(
21139 return SDValue();
21163 return SDValue();
21197 std::pair<SDValue, SDValue> Tmp =
21207 if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) {
21216 SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op,
21218 SDValue Src = Op.getOperand(0);
21223 return DstVT.getScalarType() == MVT::i32 ? Op : SDValue();
21226 return SDValue();
21235 SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
21238 SDValue Src = N->getOperand(0);
21244 return SDValue();
21248 SDValue Chain = DAG.getEntryNode();
21255 SDValue StackPtr = DAG.CreateStackTemporary(DstVT, OtherVT);
21264 SDValue Ops[] = { Chain, StackPtr };
21272 SDValue StoreOps[] = { Chain, Src, StackPtr };
21280 SDValue
21281 X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
21287 SDLoc dl(SDValue(Node, 0));
21288 SDValue Src = Node->getOperand(0);
21301 return SDValue();
21349 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
21350 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
21357 SDValue MinClamped = DAG.getNode(
21360 SDValue BothClamped = DAG.getNode(
21363 SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, BothClamped);
21371 SDValue MinClamped = DAG.getNode(
21374 SDValue BothClamped = DAG.getNode(
21377 SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped);
21386 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
21391 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
21392 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
21395 SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, Src);
21403 SDValue Select = FpToInt;
21425 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
21430 SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
21435 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
21436 SDValue In = Op.getOperand(IsStrict ? 1 : 0);
21443 return SDValue();
21466 return SDValue();
21483 SDValue Callee = DAG.getExternalSymbol(
21490 SDValue Res;
21502 SDValue Res;
21528 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8f16, In,
21540 SDValue Res =
21548 SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
21552 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
21553 SDValue In = Op.getOperand(IsStrict ? 1 : 0);
21558 return SDValue();
21563 return SDValue();
21577 SDValue Callee = DAG.getExternalSymbol(
21585 SDValue Res;
21601 return SDValue();
21606 return SDValue();
21611 SDValue Res;
21612 SDValue Rnd = DAG.getTargetConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, DL,
21640 static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) {
21642 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
21647 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16,
21651 SDValue Chain;
21669 static SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) {
21671 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
21676 SDValue Res, Chain;
21701 SDValue X86TargetLowering::LowerFP_TO_BF16(SDValue Op,
21708 SDValue Res;
21718 SDValue Res =
21725 static SDValue lowerAddSubToHorizontalOp(SDValue Op, const SDLoc &DL,
21729 SDValue LHS = Op.getOperand(0);
21730 SDValue RHS = Op.getOperand(1);
21772 SDValue X = LHS.getOperand(0);
21792 SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X);
21799 SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
21809 static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) {
21810 SDValue N0 = Op.getOperand(0);
21821 SDValue Adder = DAG.getNode(ISD::FCOPYSIGN, dl, VT,
21831 static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
21872 SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);
21874 SDValue Op0 = Op.getOperand(0);
21879 SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
21887 SDValue LogicNode = DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
21892 static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
21893 SDValue Mag = Op.getOperand(0);
21894 SDValue Sign = Op.getOperand(1);
21930 SDValue SignMask = DAG.getConstantFP(
21932 SDValue MagMask = DAG.getConstantFP(
21938 SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Sign, SignMask);
21943 SDValue MagBits;
21956 SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit);
21961 static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
21962 SDValue N0 = Op.getOperand(0);
21972 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, N0);
21980 static SDValue getBT(SDValue Src, SDValue BitNo, const SDLoc &DL, SelectionDAG &DAG) {
21991 return SDValue();
22021 static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
22029 static bool isOrXorXorTree(SDValue X, bool Root = true) {
22041 static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG,
22043 SDValue Op0 = X.getOperand(0);
22044 SDValue Op1 = X.getOperand(1);
22046 SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT, HasPT, SToV);
22047 SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT, HasPT, SToV);
22055 SDValue A = SToV(Op0);
22056 SDValue B = SToV(Op1);
22068 static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
22079 return SDValue();
22088 return SDValue();
22091 auto IsVectorBitCastCheap = [](SDValue X) {
22098 return SDValue();
22142 auto ScalarToVector = [&](SDValue X) -> SDValue {
22146 SDValue OrigX = X.getOperand(0);
22168 SDValue Cmp;
22176 SDValue VecX = ScalarToVector(X);
22177 SDValue VecY = ScalarToVector(Y);
22195 SDValue BCCmp =
22197 SDValue PT = DAG.getNode(X86ISD::PTEST, DL, MVT::i32, BCCmp, BCCmp);
22199 SDValue X86SetCC = getSETCC(X86CC, PT, DL, DAG);
22207 SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
22208 SDValue FFFFs = DAG.getConstant(0xFFFF, DL, MVT::i32);
22212 return SDValue();
22219 static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
22220 SmallVectorImpl<SDValue> &SrcOps,
22222 SmallVector<SDValue, 8> Opnds;
22223 DenseMap<SDValue, APInt> SrcOpMap;
22234 SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot;
22253 SDValue Src = I->getOperand(0);
22254 DenseMap<SDValue, APInt>::iterator M = SrcOpMap.find(Src);
22275 for (SDValue &SrcOp : SrcOps)
22288 static SDValue LowerVectorAllEqual(const SDLoc &DL, SDValue LHS, SDValue RHS,
22296 return SDValue();
22301 return SDValue();
22305 return SDValue();
22312 auto MaskBits = [&](SDValue Src) {
22316 SDValue MaskValue = DAG.getConstant(Mask, DL, SrcVT);
22325 return SDValue();
22330 SDValue Lo =
22332 SDValue Hi =
22348 return SDValue();
22357 return SDValue();
22382 SDValue V = DAG.getSetCC(DL, BoolVT, LHS, RHS, ISD::SETEQ);
22395 SDValue V = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
22411 SDValue V = DAG.getSetCC(DL, BoolVT, LHS, RHS, ISD::SETNE);
22419 SDValue V = DAG.getNode(ISD::XOR, DL, TestVT, LHS, RHS);
22427 SDValue V = DAG.getNode(X86ISD::PCMPEQ, DL, MaskVT, LHS, RHS);
22436 static SDValue MatchVectorAllEqualTest(SDValue LHS, SDValue RHS,
22446 return SDValue();
22448 SDValue Op = LHS;
22450 return SDValue();
22459 SDValue Src = Op.getOperand(0);
22479 SmallVector<SDValue, 8> VecIns;
22483 [VT](SDValue V) { return VT == V.getValueType(); }) &&
22488 return SDValue();
22496 SDValue LHS = VecIns[Slot];
22497 SDValue RHS = VecIns[Slot + 1];
22511 if (SDValue Match =
22524 SDValue Src = peekThroughBitcasts(Op);
22530 SDValue LHS = Src.getOperand(0);
22531 SDValue RHS = Src.getOperand(1);
22545 SDValue Inner = Src.getOperand(0);
22559 return SDValue();
22563 static bool hasNonFlagsUse(SDValue Op) {
22584 static bool isProfitableToUseFlagOp(SDValue Op) {
22596 static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
22640 SDValue ArithOp = Op;
22679 return SDValue(Op.getNode(), 1);
22697 SmallVector<SDValue, 4> Ops(Op->op_begin(), Op->op_begin() + NumOperands);
22699 SDValue New = DAG.getNode(Opcode, dl, VTs, Ops);
22700 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), New);
22701 return SDValue(New.getNode(), 1);
22706 static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
22765 SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(1), Op1);
22774 SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0, Op1.getOperand(1));
22780 SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
22790 SDNode *N, SDValue, SDValue IntPow2) const {
22808 bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
22826 SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
22853 SDValue Estimate = DAG.getNode(Opcode, DL, VT, Op);
22866 SDValue Zero = DAG.getIntPtrConstant(0, DL);
22867 SDValue Undef = DAG.getUNDEF(MVT::v8f16);
22875 return SDValue();
22880 SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
22901 return SDValue();
22917 SDValue Zero = DAG.getIntPtrConstant(0, DL);
22918 SDValue Undef = DAG.getUNDEF(MVT::v8f16);
22926 return SDValue();
22939 SDValue
22945 return SDValue(N,0); // Lower SDIV as SDIV
22953 return SDValue();
22960 return SDValue();
22965 return SDValue();
22972 static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
22975 SDValue Op0 = And.getOperand(0);
22976 SDValue Op1 = And.getOperand(1);
22982 SDValue Src, BitNo;
22994 return SDValue();
23002 SDValue AndLHS = Op0;
23022 return SDValue();
23031 if (SDValue BT = getBT(Src, BitNo, dl, DAG)) {
23036 return SDValue();
23046 static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
23047 SDValue &Op1, bool &IsAlwaysSignaling) {
23109 static SDValue splitIntVSETCC(EVT VT, SDValue LHS, SDValue RHS,
23115 SDValue CC = DAG.getCondCode(Cond);
23118 SDValue LHS1, LHS2;
23122 SDValue RHS1, RHS2;
23133 static SDValue LowerIntVSETCC_AVX512(SDValue Op, const SDLoc &dl,
23135 SDValue Op0 = Op.getOperand(0);
23136 SDValue Op1 = Op.getOperand(1);
23137 SDValue CC = Op.getOperand(2);
23157 static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc,
23161 return SDValue();
23166 SmallVector<SDValue, 8> NewVecC;
23171 return SDValue();
23176 return SDValue();
23179 return SDValue();
23191 static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
23196 return SDValue();
23200 return SDValue();
23204 return SDValue();
23212 return SDValue();
23213 SDValue ULEOp1 =
23216 return SDValue();
23225 SDValue UGEOp1 =
23228 return SDValue();
23241 SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1);
23246 static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
23250 SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0);
23251 SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1);
23252 SDValue CC = Op.getOperand(IsStrict ? 3 : 2);
23262 return SDValue();
23265 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
23288 SDValue Cmp;
23300 return SDValue();
23304 SDValue SignalCmp = DAG.getNode(
23332 SDValue Cmp0, Cmp1;
23447 SDValue BC0 = peekThroughBitcasts(Op0);
23470 SDValue Result = Op0.getOperand(0);
23520 if (SDValue UGTOp1 =
23528 if (SDValue ULTOp1 =
23546 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
23558 if (SDValue V =
23587 SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
23589 SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
23598 SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
23600 SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
23612 SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
23614 SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
23622 SDValue SB = DAG.getConstant(FlipSigns ? 0x8000000080000000ULL
23634 SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
23635 SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
23640 SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
23641 SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
23642 SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
23644 SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
23663 SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
23667 SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
23681 SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl,
23687 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
23697 static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC,
23700 SDValue &X86CC) {
23705 return SDValue();
23712 return SDValue();
23721 return SDValue();
23732 SDValue LHS = Op0.getOperand(0);
23733 SDValue RHS = Op0.getOperand(1);
23739 SDValue LHS = Op0;
23740 SDValue RHS = Op0;
23752 SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
23755 SDValue &X86CC) const {
23765 if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CondCode)) {
23772 if (SDValue CmpZ = MatchVectorAllEqualTest(Op0, Op1, CC, dl, Subtarget, DAG,
23779 if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC))
23809 SDValue Neg = DAG.getNode(X86ISD::SUB, dl, CmpVTs,
23811 return SDValue(Neg.getNode(), 1);
23822 SDValue New = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(0),
23824 DAG.ReplaceAllUsesOfValueWith(SDValue(Op0.getNode(), 0), New);
23827 return SDValue(New.getNode(), 1);
23836 SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG, Subtarget);
23841 SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
23850 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
23851 SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0);
23852 SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1);
23858 return SDValue();
23904 SDValue X86CC;
23905 SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
23906 SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
23913 return SDValue();
23915 SDValue EFLAGS;
23926 SDValue X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8);
23927 SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
23931 SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
23932 SDValue LHS = Op.getOperand(0);
23933 SDValue RHS = Op.getOperand(1);
23934 SDValue Carry = Op.getOperand(2);
23935 SDValue Cond = Op.getOperand(3);
23947 SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1));
23955 static std::pair<SDValue, SDValue>
23956 getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
23958 SDValue Value, Overflow;
23959 SDValue LHS = Op.getOperand(0);
23960 SDValue RHS = Op.getOperand(1);
24001 static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
24008 SDValue Value, Overflow;
24011 SDValue SetCC = getSETCC(Cond, Overflow, DL, DAG);
24017 static bool isX86LogicalCmp(SDValue Op) {
24031 static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
24035 SDValue VOp0 = V.getOperand(0);
24041 SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
24043 SDValue Cond = Op.getOperand(0);
24044 SDValue Op1 = Op.getOperand(1);
24045 SDValue Op2 = Op.getOperand(2);
24048 SDValue CC;
24062 SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
24069 SDValue Cmp =
24077 SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
24097 SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
24098 SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
24099 SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);
24104 SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2);
24109 SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
24110 SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
24117 SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond);
24123 if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
24144 SDValue Cmp = Cond.getOperand(1);
24145 SDValue CmpOp0 = Cmp.getOperand(0);
24153 auto MatchFFSMinus1 = [&](SDValue Op1, SDValue Op2) {
24163 SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
24173 SDValue Sub;
24175 SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
24178 SDValue One = DAG.getConstant(1, DL, CmpOp0.getValueType());
24181 SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
24188 SDValue Src1, Src2;
24204 SDValue Neg;
24216 SDValue Mask = DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))
24217 SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
24231 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, VT);
24232 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, Op1, ShiftAmt);
24252 SDValue Cmp = Cond.getOperand(1);
24266 SDValue Value;
24283 if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, X86CondCode)) {
24306 SDValue Res =
24320 SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
24324 SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1,
24341 SDValue Ops[] = { Op2, Op1, CC, Cond };
24342 SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops);
24348 SDValue Ops[] = { Op2, Op1, CC, Cond };
24352 static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, const SDLoc &dl,
24356 SDValue In = Op->getOperand(0);
24382 SDValue V;
24388 SDValue NegOne = DAG.getConstant(-1, dl, WideVT);
24389 SDValue Zero = DAG.getConstant(0, dl, WideVT);
24407 static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
24409 SDValue In = Op->getOperand(0);
24424 static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
24427 SDValue In = Op->getOperand(0);
24436 return SDValue();
24438 return SDValue();
24442 return SDValue();
24486 SDValue Lo = DAG.getNode(Opc, dl, HalfVT, In);
24487 SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, DAG.getUNDEF(InVT), HiMask);
24510 SDValue Curr = In;
24511 SDValue SignExt = Curr;
24538 SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32);
24539 SDValue Sign = DAG.getSetCC(dl, MVT::v4i32, Zero, Curr, ISD::SETGT);
24547 static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
24550 SDValue In = Op->getOperand(0);
24586 SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In);
24593 SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
24600 static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) {
24601 SDValue StoredVal = Store->getValue();
24612 return SDValue();
24615 SDValue Value0, Value1;
24618 SDValue Ptr0 = Store->getBasePtr();
24619 SDValue Ptr1 =
24621 SDValue Ch0 =
24625 SDValue Ch1 = DAG.getStore(Store->getChain(), DL, Value1, Ptr1,
24634 static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT,
24636 SDValue StoredVal = Store->getValue();
24645 return SDValue();
24652 SmallVector<SDValue, 4> Stores;
24655 SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(),
24657 SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreSVT, StoredVal,
24659 SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr,
24668 static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
24672 SDValue StoredVal = St->getValue();
24700 return SDValue();
24712 return SDValue();
24716 return SDValue();
24743 SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()};
24755 static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
24772 SDValue NewLd = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(),
24779 SDValue Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, NewLd);
24786 return SDValue();
24791 static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
24801 SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
24802 SDValue Chain = Op.getOperand(0);
24803 SDValue Cond = Op.getOperand(1);
24804 SDValue Dest = Op.getOperand(2);
24811 SDValue LHS = Cond.getOperand(0);
24812 SDValue RHS = Cond.getOperand(1);
24821 SDValue Value, Overflow;
24828 SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
24834 SDValue CCVal;
24835 SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, CC, SDLoc(Cond), DAG, CCVal);
24852 SDValue FalseBB = User->getOperand(1);
24859 SDValue Cmp =
24861 SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
24873 SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS);
24874 SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
24883 SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS);
24884 SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
24891 SDValue Value, Overflow;
24895 SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
24911 SDValue LHS = Cond;
24912 SDValue RHS = DAG.getConstant(0, dl, CondVT);
24914 SDValue CCVal;
24915 SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, ISD::SETNE, dl, DAG, CCVal);
24925 SDValue
24926 X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
24937 SDValue Chain = Op.getOperand(0);
24938 SDValue Size = Op.getOperand(1);
24949 SDValue Result;
24967 SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
25002 SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
25014 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
25016 SDValue Ops[2] = {Result, Chain};
25020 SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
25032 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
25042 SmallVector<SDValue, 8> MemOps;
25043 SDValue FIN = Op.getOperand(1);
25045 SDValue Store = DAG.getStore(
25061 SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
25069 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
25077 SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
25087 SDValue Chain = Op.getOperand(0);
25088 SDValue SrcPtr = Op.getOperand(1);
25119 SDValue InstOps[] = {Chain, SrcPtr,
25124 SDValue VAARG = DAG.getMemIntrinsicNode(
25135 static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
25145 SDValue Chain = Op.getOperand(0);
25146 SDValue DstPtr = Op.getOperand(1);
25147 SDValue SrcPtr = Op.getOperand(2);
25181 static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
25182 SDValue SrcOp, uint64_t ShiftAmt,
25223 SDValue Amt = DAG.getConstant(ShiftAmt, dl, VT);
25224 if (SDValue C = DAG.FoldConstantArithmetic(ShiftOpc, dl, VT, {SrcOp, Amt}))
25233 static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
25234 SDValue SrcOp, SDValue ShAmt, int ShAmtIdx,
25277 SmallVector<SDValue> MaskElts(
25281 SDValue Mask = DAG.getBuildVector(AmtVT, dl, MaskElts);
25306 SDValue ByteShift = DAG.getTargetConstant(
25330 static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
25345 SDValue Lo, Hi;
25364 static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
25365 SDValue PreservedSrc,
25376 SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
25390 static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
25391 SDValue PreservedSrc,
25403 SDValue IMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i1,
25439 static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
25440 SDValue EntryEBP) {
25457 SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
25458 SDValue ParentFrameOffset =
25470 SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP,
25475 SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
25478 auto isRoundModeCurDirection = [](SDValue Rnd) {
25484 auto isRoundModeSAE = [](SDValue Rnd) {
25498 auto isRoundModeSAEToX = [](SDValue Rnd, unsigned &RC) {
25530 SDValue Rnd = Op.getOperand(2);
25537 return SDValue();
25543 SDValue Sae = Op.getOperand(2);
25551 return SDValue();
25556 SDValue Src2 = Op.getOperand(2);
25563 SDValue Rnd = Op.getOperand(3);
25570 return SDValue();
25577 SDValue Sae = Op.getOperand(3);
25585 return SDValue();
25592 SDValue Src1 = Op.getOperand(1);
25593 SDValue Src2 = Op.getOperand(2);
25594 SDValue Src3 = Op.getOperand(3);
25606 SDValue Rnd = Op.getOperand(4);
25613 return SDValue();
25621 SDValue Src4 = Op.getOperand(4);
25631 SDValue Src = Op.getOperand(1);
25632 SDValue PassThru = Op.getOperand(2);
25633 SDValue Mask = Op.getOperand(3);
25639 SDValue Rnd = Op.getOperand(4);
25647 return SDValue();
25654 SDValue Src = Op.getOperand(1);
25655 SDValue PassThru = Op.getOperand(2);
25656 SDValue Mask = Op.getOperand(3);
25657 SDValue Rnd = Op.getOperand(4);
25665 return SDValue();
25671 SDValue Src1 = Op.getOperand(1);
25672 SDValue Src2 = Op.getOperand(2);
25673 SDValue passThru = Op.getOperand(3);
25674 SDValue Mask = Op.getOperand(4);
25682 SDValue Rnd = Op.getOperand(5);
25690 return SDValue();
25699 SDValue RoundingMode = Op.getOperand(5);
25702 SDValue Sae = Op.getOperand(6);
25706 return SDValue();
25713 SDValue Src1 = Op.getOperand(1);
25714 SDValue Src2 = Op.getOperand(2);
25715 SDValue passThru = Op.getOperand(3);
25716 SDValue Mask = Op.getOperand(4);
25717 SDValue Rnd = Op.getOperand(5);
25719 SDValue NewOp;
25727 return SDValue();
25732 SDValue Src1 = Op.getOperand(1);
25733 SDValue Src2 = Op.getOperand(2);
25734 SDValue passThru = Op.getOperand(3);
25735 SDValue Mask = Op.getOperand(4);
25736 SDValue Sae = Op.getOperand(5);
25743 return SDValue();
25749 SDValue Src1 = Op.getOperand(1);
25750 SDValue Src2 = Op.getOperand(2);
25751 SDValue PassThru = Op.getOperand(3);
25752 SDValue Mask = Op.getOperand(4);
25753 SDValue NewOp;
25755 SDValue Rnd = Op.getOperand(5);
25761 return SDValue();
25768 SDValue Src1 = Op.getOperand(1);
25769 SDValue Src2 = Op.getOperand(2);
25770 SDValue PassThru = Op.getOperand(3);
25771 SDValue Mask = Op.getOperand(4);
25775 SDValue Sae = Op.getOperand(5);
25779 return SDValue();
25786 SDValue Src1 = Op.getOperand(1);
25787 SDValue Src2 = Op.getOperand(2);
25788 SDValue Src3 = Op.getOperand(3);
25789 SDValue PassThru = Op.getOperand(4);
25790 SDValue Mask = Op.getOperand(5);
25791 SDValue Sae = Op.getOperand(6);
25798 return SDValue();
25804 SDValue Src1 = Op.getOperand(1);
25805 SDValue Src2 = Op.getOperand(2);
25806 SDValue Src3 = Op.getOperand(3);
25807 SDValue PassThru = Op.getOperand(4);
25808 SDValue Mask = Op.getOperand(5);
25812 SDValue Sae = Op.getOperand(6);
25816 return SDValue();
25822 SDValue Src1 = Op.getOperand(1);
25823 SDValue Src2 = Op.getOperand(2);
25824 SDValue Src3 = Op.getOperand(3);
25833 SDValue Src1 = Op.getOperand(1);
25834 SDValue Src2 = Op.getOperand(2);
25841 SDValue Src1 = Op.getOperand(1);
25842 SDValue Src2 = Op.getOperand(2);
25843 SDValue Src3 = Op.getOperand(3);
25844 SDValue Mask = Op.getOperand(4);
25847 SDValue PassThru = Src3;
25854 SDValue NewOp;
25856 SDValue Rnd = Op.getOperand(5);
25862 return SDValue();
25874 SDValue Src1 = Op.getOperand(1);
25875 SDValue Imm = Op.getOperand(2);
25876 SDValue Mask = Op.getOperand(3);
25877 SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm);
25878 SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, SDValue(),
25882 SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
25890 SDValue CC = Op.getOperand(3);
25891 SDValue Mask = Op.getOperand(4);
25896 SDValue Sae = Op.getOperand(5);
25901 return SDValue();
25908 SDValue Src1 = Op.getOperand(1);
25909 SDValue Src2 = Op.getOperand(2);
25910 SDValue CC = Op.getOperand(3);
25911 SDValue Mask = Op.getOperand(4);
25913 SDValue Cmp;
25915 SDValue Sae = Op.getOperand(5);
25919 return SDValue();
25925 SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(),
25929 SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
25936 SDValue LHS = Op.getOperand(1);
25937 SDValue RHS = Op.getOperand(2);
25942 SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
25943 SDValue SetCC;
25947 SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
25953 SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
25972 SDValue LHS = Op.getOperand(1);
25973 SDValue RHS = Op.getOperand(2);
25975 SDValue Sae = Op.getOperand(4);
25977 SDValue FCmp;
25985 return SDValue();
25988 SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
25995 SDValue SrcOp = Op.getOperand(1);
25996 SDValue ShAmt = Op.getOperand(2);
26011 SDValue Mask = Op.getOperand(3);
26012 SDValue DataToCompress = Op.getOperand(1);
26013 SDValue PassThru = Op.getOperand(2);
26026 SDValue Src1 = Op.getOperand(1);
26027 SDValue Src2 = Op.getOperand(2);
26028 SDValue Src3 = Op.getOperand(3);
26029 SDValue Imm = Op.getOperand(4);
26030 SDValue Mask = Op.getOperand(5);
26031 SDValue Passthru = (IntrData->Type == FIXUPIMM)
26037 SDValue Sae = Op.getOperand(6);
26041 return SDValue();
26044 SDValue FixupImm = DAG.getNode(Opc, dl, VT, Src1, Src2, Src3, Imm);
26056 SDValue RoundingMode = DAG.getTargetConstant(Round & 0xf, dl, MVT::i32);
26065 SDValue RoundingMode = DAG.getTargetConstant(Round & 0xf, dl, MVT::i32);
26073 SDValue Control = DAG.getTargetConstant(Imm & 0xffff, dl,
26083 SDValue Res;
26090 SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1),
26095 SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
26096 SDValue Results[] = { SetCC, Res };
26103 SDValue Src = Op.getOperand(1);
26104 SDValue PassThru = Op.getOperand(2);
26105 SDValue Mask = Op.getOperand(3);
26117 SDValue Src = Op.getOperand(1);
26118 SDValue Rnd = Op.getOperand(2);
26119 SDValue PassThru = Op.getOperand(3);
26120 SDValue Mask = Op.getOperand(4);
26144 SDValue Src = Op.getOperand(1);
26145 SDValue PassThru = Op.getOperand(2);
26146 SDValue Mask = Op.getOperand(3);
26164 default: return SDValue(); // Don't custom lower most intrinsics.
26249 SDValue LHS = Op.getOperand(1);
26250 SDValue RHS = Op.getOperand(2);
26251 SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
26252 SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
26311 SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));
26313 SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps).getValue(2);
26314 SDValue SetCC = getSETCC(X86CC, PCMP, dl, DAG);
26326 SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));
26339 SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));
26358 SDValue Op1 = Op.getOperand(1);
26365 SDValue Result = DAG.getMCSymbol(LSDASym, VT);
26370 SDValue FnOp = Op.getOperand(1);
26371 SDValue IncomingFPOp = Op.getOperand(2);
26408 SDValue Operation =
26412 SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL,
26414 SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL,
26427 SDValue ShAmt = Op.getOperand(2);
26495 static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
26496 SDValue Src, SDValue Mask, SDValue Base,
26497 SDValue Index, SDValue ScaleOp, SDValue Chain,
26503 return SDValue();
26505 SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
26520 SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
26521 SDValue Res =
26527 static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
26528 SDValue Src, SDValue Mask, SDValue Base,
26529 SDValue Index, SDValue ScaleOp, SDValue Chain,
26536 return SDValue();
26538 SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
26558 SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
26559 SDValue Res =
26565 static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
26566 SDValue Src, SDValue Mask, SDValue Base,
26567 SDValue Index, SDValue ScaleOp, SDValue Chain,
26573 return SDValue();
26575 SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
26589 SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale};
26590 SDValue Res =
26596 static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
26597 SDValue Mask, SDValue Base, SDValue Index,
26598 SDValue ScaleOp, SDValue Chain,
26604 return SDValue();
26606 SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
26608 SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
26609 SDValue Segment = DAG.getRegister(0, MVT::i32);
26612 SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
26613 SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain};
26615 return SDValue(Res, 0);
26626 static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL,
26631 SmallVectorImpl<SDValue> &Results) {
26632 SDValue Chain = N->getOperand(0);
26633 SDValue Glue;
26642 SDValue N1Ops[] = {Chain, Glue};
26644 TargetOpcode, DL, Tys, ArrayRef<SDValue>(N1Ops, Glue.getNode() ? 2 : 1));
26645 Chain = SDValue(N1, 0);
26648 SDValue LO, HI;
26650 LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1));
26654 LO = DAG.getCopyFromReg(Chain, DL, X86::EAX, MVT::i32, SDValue(N1, 1));
26663 SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
26671 SDValue Ops[] = { LO, HI };
26672 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
26684 SmallVectorImpl<SDValue> &Results) {
26688 SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode,
26694 SDValue Chain = Results[1];
26697 SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, Glue);
26702 static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
26704 SmallVector<SDValue, 3> Results;
26711 static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) {
26713 SDValue Chain = Op.getOperand(0);
26714 SDValue RegNode = Op.getOperand(2);
26729 static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) {
26731 SDValue Chain = Op.getOperand(0);
26732 SDValue EHGuard = Op.getOperand(2);
26748 static SDValue
26749 EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &DL, SDValue Val,
26750 SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
26753 SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
26754 SDValue Ops[] = { Chain, Val, Ptr, Undef };
26760 static SDValue EmitMaskedTruncSStore(bool SignedSat, SDValue Chain,
26762 SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
26765 SDValue Ops[] = { Chain, Val, Ptr, Mask };
26779 static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
26793 SDValue Chain = Op->getOperand(0);
26794 SDValue CopyRBP = DAG.getCopyFromReg(Chain, dl, X86::RBP, MVT::i64);
26795 SDValue Result =
26796 SDValue(DAG.getMachineNode(X86::SUB64ri32, dl, MVT::i64, CopyRBP,
26809 SDValue Result =
26861 SDValue Chain = Op->getOperand(0);
26879 SDValue Operation =
26882 SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
26889 SDValue Chain = Op.getOperand(0);
26901 SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op.getOperand(2),
26903 SDValue SetCC = getSETCC(X86::COND_E, Operation.getValue(0), dl, DAG);
26913 SDValue Chain = Op.getOperand(0);
26935 SDValue Operation = DAG.getMemIntrinsicNode(
26938 SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(1), DL, DAG);
26951 SDValue Chain = Op.getOperand(0);
26973 SDValue Operation = DAG.getMemIntrinsicNode(
26979 SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(0), DL, DAG);
26990 SDValue Chain = Op.getOperand(0);
26992 SDValue Operation = DAG.getNode(X86ISD::TESTUI, dl, VTs, Chain);
26993 SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
27002 SDValue Chain = Op.getOperand(0);
27003 SDValue Op1 = Op.getOperand(2);
27004 SDValue Op2 = Op.getOperand(3);
27009 SDValue Res =
27021 SDValue Chain = Op.getOperand(0);
27022 SDValue Op1 = Op.getOperand(2);
27023 SDValue Op2 = Op.getOperand(3);
27027 SDValue Size = DAG.getConstant(VT.getScalarSizeInBits(), DL, MVT::i32);
27029 SDValue Res =
27043 SDValue Chain = Op.getOperand(0);
27044 SDValue Addr = Op.getOperand(2);
27045 SDValue Src1 = Op.getOperand(3);
27046 SDValue Src2 = Op.getOperand(4);
27047 SDValue CC = Op.getOperand(5);
27049 SDValue Operation = DAG.getMemIntrinsicNode(
27063 SDValue Chain = Op.getOperand(0);
27064 SDValue Op1 = Op.getOperand(2);
27065 SDValue Op2 = Op.getOperand(3);
27098 SDValue Chain = Op.getOperand(0);
27099 SDValue Op1 = Op.getOperand(2);
27100 SDValue Op2 = Op.getOperand(3);
27124 SDValue LockArith =
27131 return SDValue();
27141 SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
27145 SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
27148 SDValue(Result.getNode(), 1)};
27149 SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops);
27153 SDValue(Result.getNode(), 2));
27156 SDValue Chain = Op.getOperand(0);
27157 SDValue Src = Op.getOperand(2);
27158 SDValue Base = Op.getOperand(3);
27159 SDValue Index = Op.getOperand(4);
27160 SDValue Mask = Op.getOperand(5);
27161 SDValue Scale = Op.getOperand(6);
27167 SDValue Chain = Op.getOperand(0);
27168 SDValue Src = Op.getOperand(2);
27169 SDValue Base = Op.getOperand(3);
27170 SDValue Index = Op.getOperand(4);
27171 SDValue Mask = Op.getOperand(5);
27172 SDValue Scale = Op.getOperand(6);
27178 SDValue Chain = Op.getOperand(0);
27179 SDValue Base = Op.getOperand(2);
27180 SDValue Mask = Op.getOperand(3);
27181 SDValue Index = Op.getOperand(4);
27182 SDValue Src = Op.getOperand(5);
27183 SDValue Scale = Op.getOperand(6);
27192 SDValue Chain = Op.getOperand(0);
27193 SDValue Mask = Op.getOperand(2);
27194 SDValue Index = Op.getOperand(3);
27195 SDValue Base = Op.getOperand(4);
27196 SDValue Scale = Op.getOperand(5);
27202 SmallVector<SDValue, 2> Results;
27213 SmallVector<SDValue, 2> Results;
27226 SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
27228 SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG);
27229 SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
27231 Ret, SDValue(InTrans.getNode(), 1));
27236 SDValue Mask = Op.getOperand(4);
27237 SDValue DataToTruncate = Op.getOperand(3);
27238 SDValue Addr = Op.getOperand(2);
27239 SDValue Chain = Op.getOperand(0);
27254 SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
27255 SDValue Offset = DAG.getUNDEF(VMask.getValueType());
27269 SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
27281 SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
27287 return SDValue();
27294 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
27296 SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT);
27303 SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
27308 SDValue X86TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
27314 SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
27345 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
27387 SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
27413 SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
27414 SDValue Chain = Op.getOperand(0);
27415 SDValue Offset = Op.getOperand(1);
27416 SDValue Handler = Op.getOperand(2);
27425 SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
27428 SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
27439 SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
27457 SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
27464 SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
27471 static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
27475 SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
27477 SDValue Root = Op.getOperand(0);
27478 SDValue Trmp = Op.getOperand(1); // trampoline
27479 SDValue FPtr = Op.getOperand(2); // nested function
27480 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
27487 SDValue OutChains[6];
27500 SDValue Addr = Trmp;
27585 SDValue OutChains[4];
27586 SDValue Addr, Disp;
27620 SDValue X86TargetLowering::LowerGET_ROUNDING(SDValue Op,
27650 SDValue StackSlot =
27655 SDValue Chain = Op.getOperand(0);
27656 SDValue Ops[] = {Chain, StackSlot};
27662 SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI, Align(2));
27666 SDValue Shift =
27673 SDValue LUT = DAG.getConstant(0x2d, DL, MVT::i32);
27674 SDValue RetVal =
27684 SDValue X86TargetLowering::LowerSET_ROUNDING(SDValue Op,
27688 SDValue Chain = Op.getNode()->getOperand(0);
27693 SDValue StackSlot =
27700 SDValue Ops[] = {Chain, StackSlot};
27705 SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI);
27711 SDValue NewRM = Op.getNode()->getOperand(1);
27712 SDValue RMBits;
27740 SDValue ShiftValue =
27746 SDValue Shifted =
27758 SDValue OpsLD[] = {Chain, StackSlot};
27774 SDValue CWD = DAG.getLoad(MVT::i32, DL, Chain, StackSlot, MPI);
27802 SDValue X86TargetLowering::LowerGET_FPENV_MEM(SDValue Op,
27806 SDValue Chain = Op->getOperand(0);
27807 SDValue Ptr = Op->getOperand(1);
27833 SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr,
27845 static SDValue createSetFPEnvNodes(SDValue Ptr, SDValue Chain, const SDLoc &DL,
27858 SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr,
27869 SDValue X86TargetLowering::LowerSET_FPENV_MEM(SDValue Op,
27872 SDValue Chain = Op->getOperand(0);
27873 SDValue Ptr = Op->getOperand(1);
27881 SDValue X86TargetLowering::LowerRESET_FPENV(SDValue Op,
27885 SDValue Chain = Op.getNode()->getOperand(0);
27905 SDValue Env = DAG.getConstantPool(FPEnvBits, PtrVT);
27920 static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG,
27942 SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op);
27943 SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode);
27944 SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT);
27950 static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL,
27964 SmallVector<SDValue, 64> LUTVec;
27967 SDValue InRegLUT = DAG.getBuildVector(CurrVT, DL, LUTVec);
27974 SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0));
27975 SDValue Zero = DAG.getConstant(0, DL, CurrVT);
27977 SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT);
27978 SDValue Lo = Op0;
27979 SDValue Hi = DAG.getNode(ISD::SRL, DL, CurrVT, Op0, NibbleShift);
27980 SDValue HiZ;
27992 SDValue Res = DAG.getNode(ISD::ADD, DL, CurrVT, Lo, Hi);
28004 SDValue Shift = DAG.getConstant(CurrScalarSizeInBits, DL, NextVT);
28021 SDValue ResNext = Res = DAG.getBitcast(NextVT, Res);
28022 SDValue R0 = DAG.getNode(ISD::SRL, DL, NextVT, ResNext, Shift);
28023 SDValue R1 = DAG.getNode(ISD::SRL, DL, NextVT, HiZ, Shift);
28032 static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
28054 static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
28078 SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT),
28093 static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
28097 SDValue N0 = Op.getOperand(0);
28112 SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT),
28118 static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
28135 static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
28138 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
28164 SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);
28165 SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT);
28166 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask);
28167 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
28173 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
28174 SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
28187 SDValue Zero = DAG.getConstant(0, DL, VT);
28188 SDValue Result =
28191 SDValue SumDiff = Result.getValue(0);
28192 SDValue Overflow = Result.getValue(1);
28193 SDValue SatMin = DAG.getConstant(MinVal, DL, VT);
28194 SDValue SatMax = DAG.getConstant(MaxVal, DL, VT);
28195 SDValue SumNeg =
28202 return SDValue();
28205 static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
28213 SDValue N0 = Op.getOperand(0);
28214 SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
28216 SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_NS, DL, MVT::i8),
28217 SDValue(Neg.getNode(), 1)};
28223 SDValue Src = Op.getOperand(0);
28224 SDValue Neg = DAG.getNegative(Src, DL, VT);
28238 return SDValue();
28241 static SDValue LowerAVG(SDValue Op, const X86Subtarget &Subtarget,
28254 return SDValue();
28257 static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget,
28270 return SDValue();
28273 static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
28279 SDValue X = Op.getOperand(0);
28280 SDValue Y = Op.getOperand(1);
28314 auto MatchesZero = [](SDValue Op, APInt Zero) {
28322 for (const SDValue &OpVal : Op->op_values()) {
28344 SDValue NewX, NewY;
28360 SDValue VX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VectorType, X);
28364 SDValue Imm = DAG.getTargetConstant(MinMaxOp == X86ISD::FMAX ? 0b11 : 0b101,
28366 SDValue IsNanZero = DAG.getNode(X86ISD::VFPCLASSS, DL, MVT::v1i1, VX, Imm);
28367 SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
28370 SDValue NeedSwap = DAG.getBitcast(MVT::i8, Ins);
28375 SDValue IsXSigned;
28377 SDValue XInt = DAG.getNode(ISD::BITCAST, DL, IVT, X);
28378 SDValue ZeroCst = DAG.getConstant(0, DL, IVT);
28382 SDValue Ins = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2f64,
28385 SDValue VX = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, Ins);
28386 SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VX,
28389 SDValue ZeroCst = DAG.getConstant(0, DL, MVT::i32);
28412 SDValue MinMax = DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());
28417 SDValue IsNaN = DAG.getSetCC(DL, SetCCType, NewX, NewX, ISD::SETUO);
28421 static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
28444 SDValue LHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(0));
28445 SDValue RHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(1));
28446 SDValue Diff = DAG.getNode(ISD::SUB, dl, WideVT, LHS, RHS);
28447 SDValue AbsDiff = DAG.getNode(ISD::ABS, dl, WideVT, Diff);
28453 return SDValue();
28456 static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
28468 SDValue A = Op.getOperand(0);
28469 SDValue B = Op.getOperand(1);
28505 SDValue Mask = DAG.getBitcast(VT, DAG.getConstant(0x00FF, dl, ExVT));
28506 SDValue BLo = DAG.getNode(ISD::AND, dl, VT, Mask, B);
28507 SDValue BHi = DAG.getNode(X86ISD::ANDNP, dl, VT, Mask, B);
28508 SDValue RLo = DAG.getNode(X86ISD::VPMADDUBSW, dl, ExVT, A, BLo);
28509 SDValue RHi = DAG.getNode(X86ISD::VPMADDUBSW, dl, ExVT, A, BHi);
28521 SDValue Undef = DAG.getUNDEF(VT);
28522 SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef));
28523 SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef));
28525 SDValue BLo, BHi;
28528 SmallVector<SDValue, 16> LoOps, HiOps;
28546 SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
28547 SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
28558 SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
28559 SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
28562 SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64,
28566 SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64,
28603 SDValue Zero = DAG.getConstant(0, dl, VT);
28606 SDValue AloBlo = Zero;
28610 SDValue AloBhi = Zero;
28612 SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
28616 SDValue AhiBlo = Zero;
28618 SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
28622 SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo);
28628 static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl,
28632 SDValue *Low = nullptr) {
28648 SDValue Zero = DAG.getConstant(0, dl, VT);
28650 SDValue ALo, AHi;
28659 SDValue BLo, BHi;
28662 SmallVector<SDValue, 16> LoOps, HiOps;
28665 SDValue LoOp = B.getOperand(i + j);
28666 SDValue HiOp = B.getOperand(i + j + 8);
28698 SDValue RLo = DAG.getNode(MulOpc, dl, ExVT, ALo, BLo);
28699 SDValue RHi = DAG.getNode(MulOpc, dl, ExVT, AHi, BHi);
28707 static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
28713 SDValue A = Op.getOperand(0);
28714 SDValue B = Op.getOperand(1);
28743 SDValue Odd0 =
28746 SDValue Odd1 =
28756 SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
28761 SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
28770 SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask);
28775 SDValue Zero = DAG.getConstant(0, dl, VT);
28776 SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
28778 SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
28781 SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
28803 SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A);
28804 SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B);
28805 SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB);
28814 static SDValue LowerMULO(SDValue Op, const X86Subtarget &Subtarget,
28824 SDValue A = Op.getOperand(0);
28825 SDValue B = Op.getOperand(1);
28831 SDValue LHSLo, LHSHi;
28835 SDValue RHSLo, RHSHi;
28844 SDValue Lo = DAG.getNode(Op.getOpcode(), dl, LoVTs, LHSLo, RHSLo);
28845 SDValue Hi = DAG.getNode(Op.getOpcode(), dl, HiVTs, LHSHi, RHSHi);
28848 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
28849 SDValue Ovf = DAG.getNode(ISD::CONCAT_VECTORS, dl, OvfVT, Lo.getValue(1),
28864 SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A);
28865 SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B);
28866 SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB);
28868 SDValue Low = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
28870 SDValue Ovf;
28872 SDValue High, LowSign;
28899 SDValue High =
28924 SDValue Low;
28925 SDValue High =
28928 SDValue Ovf;
28931 SDValue LowSign =
28945 SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {
28952 SmallVector<SDValue> Result;
28970 SDValue InChain = DAG.getEntryNode();
28978 SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16);
28992 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
29006 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
29010 SDValue X86TargetLowering::LowerWin64_FP_TO_INT128(SDValue Op,
29012 SDValue &Chain) const {
29017 SDValue Arg = Op.getOperand(IsStrict ? 1 : 0);
29035 SDValue Result;
29044 SDValue X86TargetLowering::LowerWin64_INT128_TO_FP(SDValue Op,
29050 SDValue Arg = Op.getOperand(IsStrict ? 1 : 0);
29066 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
29069 SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16);
29075 SDValue Result;
29105 SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL, MVT VT,
29110 SmallVector<SDValue> MaskBits;
29183 static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
29187 SDValue R = Op.getOperand(0);
29188 SDValue Amt = Op.getOperand(1);
29195 SDValue Ex = DAG.getBitcast(ExVT, R);
29206 SDValue Upper =
29208 SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
29217 SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
29219 SDValue Lower =
29234 return SDValue();
29269 SDValue Mask = DAG.getAllOnesConstant(dl, VT);
29292 SDValue Zeros = DAG.getConstant(0, dl, VT);
29295 SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT);
29303 return SDValue();
29306 SDValue Mask = getGFNICtrlMask(Op.getOpcode(), DAG, dl, VT, ShiftAmt);
29313 SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R,
29322 SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R,
29331 SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
29333 SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
29341 return SDValue();
29344 static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
29348 SDValue R = Op.getOperand(0);
29349 SDValue Amt = Op.getOperand(1);
29354 if (SDValue BaseShAmt = DAG.getSplatSourceVector(Amt, BaseShAmtIdx)) {
29372 SDValue BitMask = DAG.getConstant(-1, dl, ExtVT);
29382 SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT,
29391 SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT);
29404 return SDValue();
29408 static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
29418 return SDValue();
29428 SmallVector<SDValue> Elts(NumElems, DAG.getUNDEF(SVT));
29450 SDValue Z = DAG.getConstant(0, dl, VT);
29451 SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z));
29452 SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z));
29460 return SDValue();
29463 static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
29467 SDValue R = Op.getOperand(0);
29468 SDValue Amt = Op.getOperand(1);
29479 if (SDValue V = LowerShiftByScalarImmediate(Op, DAG, Subtarget))
29482 if (SDValue V = LowerShiftByScalarVariable(Op, DAG, Subtarget))
29494 SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
29495 SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
29518 SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});
29519 SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});
29520 SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
29521 SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
29537 SDValue Amt1, Amt2;
29541 SDValue A = Amt->getOperand(i);
29569 SDValue Shift1 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,
29571 SDValue Shift2 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,
29583 if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
29590 SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);
29591 SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
29592 if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
29593 SDValue Zero = DAG.getConstant(0, dl, VT);
29594 SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ);
29595 SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale);
29609 SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);
29610 SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
29611 if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
29612 SDValue Amt0 =
29614 SDValue Amt1 =
29616 SDValue Sra1 =
29618 SDValue Res = DAG.getNode(ISD::MULHS, dl, VT, R, Scale);
29630 SDValue Amt0, Amt1, Amt2, Amt3;
29642 SDValue Z = DAG.getConstant(0, dl, VT);
29648 SDValue Amt01 = DAG.getBitcast(MVT::v8i16, Amt);
29649 SDValue Amt23 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01,
29651 SDValue Msk02 = getV4X86ShuffleImm8ForMask({0, 1, 1, 1}, dl, DAG);
29652 SDValue Msk13 = getV4X86ShuffleImm8ForMask({2, 3, 3, 3}, dl, DAG);
29661 SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0));
29662 SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1));
29663 SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2));
29664 SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3));
29669 SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1});
29670 SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7});
29673 SDValue R01 = DAG.getVectorShuffle(VT, dl, R0, R1, {0, -1, -1, 5});
29674 SDValue R23 = DAG.getVectorShuffle(VT, dl, R2, R3, {2, -1, -1, 7});
29705 SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8);
29724 SmallVector<SDValue, 16> LoAmt, HiAmt;
29733 SDValue LoA = DAG.getBuildVector(VT16, dl, LoAmt);
29734 SDValue HiA = DAG.getBuildVector(VT16, dl, HiAmt);
29736 SDValue LoR = DAG.getBitcast(VT16, getUnpackl(DAG, dl, VT, R, R));
29737 SDValue HiR = DAG.getBitcast(VT16, getUnpackh(DAG, dl, VT, R, R));
29752 auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
29776 SDValue Z = DAG.getConstant(0, dl, SelVT);
29777 SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel);
29790 SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT));
29813 SDValue ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), Amt);
29814 SDValue AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), Amt);
29815 SDValue RLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), R);
29816 SDValue RHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), R);
29823 SDValue MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 4, DAG);
29824 SDValue MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 4, DAG);
29858 SDValue Z = DAG.getConstant(0, dl, VT);
29859 SDValue ALo = getUnpackl(DAG, dl, VT, Amt, Z);
29860 SDValue AHi = getUnpackh(DAG, dl, VT, Amt, Z);
29861 SDValue RLo = getUnpackl(DAG, dl, VT, Z, R);
29862 SDValue RHi = getUnpackh(DAG, dl, VT, Z, R);
29867 SDValue Lo = DAG.getNode(Opc, dl, ExtVT, RLo, ALo);
29868 SDValue Hi = DAG.getNode(Opc, dl, ExtVT, RHi, AHi);
29880 auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) {
29894 SDValue C =
29912 SDValue M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 8, DAG);
29945 return SDValue();
29948 static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
29955 SDValue Op0 = Op.getOperand(0);
29956 SDValue Op1 = Op.getOperand(1);
29957 SDValue Amt = Op.getOperand(2);
29972 SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
30005 SDValue ShX =
30008 SDValue ShY =
30018 SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, Op0,
30020 SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Op1,
30025 SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
30026 SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
30031 return SDValue();
30051 if (SDValue ScalarAmt = DAG.getSplatSourceVector(AmtMod, ScalarAmtIdx)) {
30054 return SDValue();
30056 SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
30057 SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
30072 return SDValue();
30084 SDValue Res = DAG.getNode(ISD::OR, DL, WideVT, Op0, Op1);
30095 SDValue Z = DAG.getConstant(0, DL, VT);
30096 SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
30097 SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
30098 SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z));
30099 SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z));
30100 SDValue Lo = DAG.getNode(ShiftOpc, DL, ExtVT, RLo, ALo);
30101 SDValue Hi = DAG.getNode(ShiftOpc, DL, ExtVT, RHi, AHi);
30106 return SDValue();
30120 SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType());
30121 SDValue HiShift = DAG.getConstant(EltSizeInBits, DL, Amt.getValueType());
30125 SDValue Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, HiShift);
30137 return SDValue();
30150 static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
30156 SDValue R = Op.getOperand(0);
30157 SDValue Amt = Op.getOperand(1);
30193 SDValue Z = DAG.getConstant(0, DL, VT);
30198 if (SDValue NegAmt = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {Z, Amt}))
30211 SDValue Mask = getGFNICtrlMask(Opcode, DAG, DL, VT, RotAmt);
30245 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, R,
30247 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, R,
30266 SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
30267 SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
30274 if (SDValue BaseRotAmt = DAG.getSplatSourceVector(AmtMod, BaseRotAmtIdx)) {
30280 SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
30281 SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
30300 SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
30301 SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
30302 SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z));
30303 SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z));
30304 SDValue Lo = DAG.getNode(ShiftOpc, DL, ExtVT, RLo, ALo);
30305 SDValue Hi = DAG.getNode(ShiftOpc, DL, ExtVT, RHi, AHi);
30323 return SDValue();
30337 auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
30350 SDValue Z = DAG.getConstant(0, DL, SelVT);
30351 SDValue C = DAG.getNode(X86ISD::PCMPGT, DL, SelVT, Z, Sel);
30372 SDValue M;
30408 SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
30410 SDValue SHL = DAG.getNode(IsROTL ? ISD::SHL : ISD::SRL, DL, VT, R, Amt);
30411 SDValue SRL = DAG.getNode(IsROTL ? ISD::SRL : ISD::SHL, DL, VT, R, AmtR);
30428 SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG);
30430 return SDValue();
30434 SDValue Lo = DAG.getNode(ISD::MUL, DL, VT, R, Scale);
30435 SDValue Hi = DAG.getNode(ISD::MULHU, DL, VT, R, Scale);
30444 SDValue R13 = DAG.getVectorShuffle(VT, DL, R, R, OddMask);
30445 SDValue Scale13 = DAG.getVectorShuffle(VT, DL, Scale, Scale, OddMask);
30447 SDValue Res02 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64,
30450 SDValue Res13 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64,
30989 static SDValue emitLockedStackOp(SelectionDAG &DAG,
30990 const X86Subtarget &Subtarget, SDValue Chain,
31021 SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
31022 SDValue Ops[] = {
31032 return SDValue(Res, 1);
31035 SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
31036 SDValue Ops[] = {
31047 return SDValue(Res, 1);
31050 static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
31065 SDValue Chain = Op.getOperand(0);
31073 static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget,
31089 SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
31090 Op.getOperand(2), SDValue());
31091 SDValue Ops[] = { cpIn.getValue(0),
31098 SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
31101 SDValue cpOut =
31103 SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
31105 SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG);
31112 static SDValue getPMOVMSKB(const SDLoc &DL, SDValue V, SelectionDAG &DAG,
31117 SDValue Lo, Hi;
31128 SDValue Lo, Hi;
31140 static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
31142 SDValue Src = Op.getOperand(0);
31152 SDValue Lo, Hi;
31164 SDValue V = DAG.getSExtOrTrunc(Src, DL, SExtVT);
31176 return SDValue();
31208 static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
31224 SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT);
31236 SDValue Zeros = DAG.getConstant(0, DL, VT);
31237 SDValue V32 = DAG.getBitcast(VT, V);
31238 SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros);
31239 SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros);
31265 SDValue ShifterV = DAG.getConstant(8, DL, VT);
31266 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), ShifterV);
31272 static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
31296 SmallVector<SDValue, 64> LUTVec;
31299 SDValue InRegLUT = DAG.getBuildVector(VT, DL, LUTVec);
31300 SDValue M0F = DAG.getConstant(0x0F, DL, VT);
31303 SDValue FourV = DAG.getConstant(4, DL, VT);
31304 SDValue HiNibbles = DAG.getNode(ISD::SRL, DL, VT, Op, FourV);
31307 SDValue LoNibbles = DAG.getNode(ISD::AND, DL, VT, Op, M0F);
31312 SDValue HiPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, HiNibbles);
31313 SDValue LoPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, LoNibbles);
31319 static SDValue LowerVectorCTPOP(SDValue Op, const SDLoc &DL,
31325 SDValue Op0 = Op.getOperand(0);
31351 SDValue ByteOp = DAG.getBitcast(ByteVT, Op0);
31352 SDValue PopCnt8 = DAG.getNode(ISD::CTPOP, DL, ByteVT, ByteOp);
31358 return SDValue();
31363 static SDValue LowerCTPOP(SDValue N, const X86Subtarget &Subtarget,
31366 SDValue Op = N.getOperand(0);
31410 SDValue LUT = DAG.getConstant(0x4332322132212110ULL, DL, MVT::i64);
31426 SDValue Mask11 = DAG.getConstant(0x11111111U, DL, MVT::i32);
31442 return SDValue(); // fallback to generic expansion.
31450 static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
31452 SDValue In = Op.getOperand(0);
31459 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
31479 SmallVector<SDValue, 16> MaskElts;
31488 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, MaskElts);
31489 SDValue Res = DAG.getBitcast(MVT::v16i8, In);
31495 static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
31504 SDValue In = Op.getOperand(0);
31521 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
31534 SDValue Res = DAG.getNode(ISD::BSWAP, DL, VT, In);
31546 SDValue Matrix = getGFNICtrlMask(ISD::BITREVERSE, DAG, DL, VT);
31554 SDValue NibbleMask = DAG.getConstant(0xF, DL, VT);
31555 SDValue Lo = DAG.getNode(ISD::AND, DL, VT, In, NibbleMask);
31556 SDValue Hi = DAG.getNode(ISD::SRL, DL, VT, In, DAG.getConstant(4, DL, VT));
31569 SmallVector<SDValue, 16> LoMaskElts, HiMaskElts;
31575 SDValue LoMask = DAG.getBuildVector(VT, DL, LoMaskElts);
31576 SDValue HiMask = DAG.getBuildVector(VT, DL, HiMaskElts);
31582 static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget,
31585 SDValue X = Op.getOperand(0);
31592 SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X,
31595 SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
31602 return SDValue();
31606 SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
31609 SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
31615 SDValue Hi16 = DAG.getNode(ISD::SRL, DL, MVT::i32, X,
31625 SDValue Hi = DAG.getNode(
31628 SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
31630 SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1);
31633 SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
31638 static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
31670 static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
31673 SDValue Chain = N->getOperand(0);
31674 SDValue LHS = N->getOperand(1);
31675 SDValue RHS = N->getOperand(2);
31716 SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL);
31723 SDValue NewChain = DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Chain);
31730 SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget);
31738 static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
31756 SDValue Chain;
31760 SDValue VecVal = DAG.getBitcast(MVT::v2i64, Node->getVal());
31769 SDValue SclToVec =
31774 SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()};
31780 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
31787 SDValue LdOps[] = {Chain, StackPtr};
31788 SDValue Value = DAG.getMemIntrinsicNode(
31794 SDValue StoreOps[] = {Chain, Value, Node->getBasePtr()};
31814 SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, Node->getMemoryVT(),
31820 static SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {
31827 return SDValue();
31833 SDValue Carry = Op.getOperand(2);
31839 SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs,
31844 SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B,
31852 static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
31860 SDValue Arg = Op.getOperand(0);
31880 SDValue Callee =
31891 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
31898 SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
31900 SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
31908 static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
31929 SDValue N1 = InOp.getOperand(1);
31939 SmallVector<SDValue, 16> Ops;
31945 SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
31951 SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) :
31957 static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
31963 SDValue Src = N->getValue();
31968 SDValue Scale = N->getScale();
31969 SDValue Index = N->getIndex();
31970 SDValue Mask = N->getMask();
31971 SDValue Chain = N->getChain();
31972 SDValue BasePtr = N->getBasePtr();
31982 SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
31986 return SDValue();
31994 return SDValue();
32015 SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
32020 static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
32026 SDValue Mask = N->getMask();
32028 SDValue PassThru = N->getPassThru();
32037 SDValue NewLoad = DAG.getMaskedLoad(
32043 SDValue Select = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
32074 SDValue NewLoad = DAG.getMaskedLoad(
32079 SDValue Extract =
32082 SDValue RetOps[] = {Extract, NewLoad.getValue(1)};
32086 static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
32089 SDValue DataToStore = N->getValue();
32092 SDValue Mask = N->getMask();
32128 static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
32136 SDValue Index = N->getIndex();
32137 SDValue Mask = N->getMask();
32138 SDValue PassThru = N->getPassThru();
32145 return SDValue();
32171 SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
32173 SDValue NewGather = DAG.getMemIntrinsicNode(
32176 SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT,
32181 static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
32183 SDValue Src = Op.getOperand(0);
32204 SDValue X86TargetLowering::LowerGC_TRANSITION(SDValue Op,
32212 SmallVector<SDValue, 2> Ops;
32218 return SDValue(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0);
32222 static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) {
32225 SDValue Lo, Hi;
32229 SDValue RC = Op.getOperand(1);
32235 static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
32301 static SDValue getFlagsOfCmpZeroFori1(SelectionDAG &DAG, const SDLoc &DL,
32302 SDValue Mask) {
32309 return SDValue(CmpZero.getNode(), 1);
32312 SDValue X86TargetLowering::visitMaskedLoad(
32313 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, MachineMemOperand *MMO,
32314 SDValue &NewLoad, SDValue Ptr, SDValue PassThru, SDValue Mask) const {
32327 SDValue Ops[] = {Chain, Ptr, ScalarPassThru, COND_NE, Flags};
32332 SDValue X86TargetLowering::visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL,
32333 SDValue Chain,
32334 MachineMemOperand *MMO, SDValue Ptr,
32335 SDValue Val, SDValue Mask) const {
32345 SDValue Ops[] = {Chain, ScalarVal, Ptr, COND_NE, Flags};
32350 SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
32511 SmallVectorImpl<SDValue>&Results,
32523 SDValue Lo, Hi;
32529 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
32535 SDValue Lo, Hi;
32543 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
32545 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
32551 Results.push_back(LowerCVTPS2PH(SDValue(N, 0), DAG));
32561 SDValue Op = DAG.getNode(ISD::SRL, dl, MVT::i64, N->getOperand(0),
32574 SDValue Wide =
32594 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0));
32595 SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1));
32596 SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1);
32599 SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
32612 SDValue Op0 = DAG.getNode(ExtOpc, dl, MVT::v2i64, N->getOperand(0));
32613 SDValue Op1 = DAG.getNode(ExtOpc, dl, MVT::v2i64, N->getOperand(1));
32614 SDValue Res = DAG.getNode(ISD::MUL, dl, MVT::v2i64, Op0, Op1);
32617 SDValue Hi = DAG.getBitcast(MVT::v4i32, Res);
32625 SDValue HiCmp;
32633 SDValue Ovf = DAG.getSetCC(dl, N->getValueType(1), Hi, HiCmp, ISD::SETNE);
32661 SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
32663 SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
32665 SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
32667 SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1);
32678 SDValue UNDEF = DAG.getUNDEF(VT);
32679 SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
32681 SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
32700 SmallVector<SDValue, 8> Ops0(NumConcats, DAG.getUNDEF(VT));
32703 SDValue N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Ops0);
32704 SDValue N1 = DAG.getConstant(SplatVal, dl, ResVT);
32705 SDValue Res = DAG.getNode(N->getOpcode(), dl, ResVT, N0, N1);
32711 SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
32724 SDValue In = N->getOperand(0);
32734 if (SDValue Src =
32736 if (SDValue Res = truncateVectorWithPACK(PackOpcode, VT, Src,
32752 SDValue WidenIn = widenSubVector(In, false, Subtarget, DAG, dl, 128);
32784 SDValue Lo, Hi;
32789 SDValue Res = DAG.getVectorShuffle(MVT::v16i8, dl, Lo, Hi,
32803 SDValue WidenIn = widenSubVector(In, false, Subtarget, DAG, dl,
32820 SDValue In = N->getOperand(0);
32834 SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32);
32835 SDValue SignBits = DAG.getSetCC(dl, MVT::v4i32, Zero, In, ISD::SETGT);
32839 SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
32842 SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
32846 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
32872 SDValue Lo = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, LoVT, In, DAG);
32881 SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
32884 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
32897 SDValue Src = N->getOperand(IsStrict ? 1 : 0);
32898 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
32901 SDValue Res;
32927 SDValue Tmp =
32929 SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
32953 SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(TmpVT));
32973 SDValue Res;
32974 SDValue Chain;
33003 SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
33021 SDValue Res =
33046 SDValue Res;
33047 SDValue Chain;
33066 SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4i32, MVT::Other},
33098 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
33099 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
33102 SDValue Chain;
33117 SDValue Chain;
33118 SDValue V = LowerWin64_FP_TO_INT128(SDValue(N, 0), DAG, Chain);
33125 if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, Chain)) {
33134 if (SDValue V = LRINT_LLRINTHelper(N, DAG))
33147 SDValue Src = N->getOperand(IsStrict ? 1 : 0);
33160 SDValue Res = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
33177 SDValue Res = DAG.getNode(Opc, dl, {MVT::v4f32, MVT::Other},
33189 SDValue Zero = DAG.getConstant(0, dl, SrcVT);
33190 SDValue One = DAG.getConstant(1, dl, SrcVT);
33191 SDValue Sign = DAG.getNode(ISD::OR, dl, SrcVT,
33194 SDValue IsNeg = DAG.getSetCC(dl, MVT::v2i64, Src, Zero, ISD::SETLT);
33195 SDValue SignSrc = DAG.getSelect(dl, SrcVT, IsNeg, Sign, Src);
33196 SmallVector<SDValue, 4> SignCvts(4, DAG.getConstantFP(0.0, dl, MVT::f32));
33198 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
33207 SDValue SignCvt = DAG.getBuildVector(MVT::v4f32, dl, SignCvts);
33208 SDValue Slow, Chain;
33221 SDValue Cvt = DAG.getSelect(dl, MVT::v4f32, IsNeg, Slow, SignCvt);
33239 SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4f32, MVT::Other},
33247 SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src);
33248 SDValue VBias = DAG.getConstantFP(
33250 SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
33254 SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
33256 SDValue Res = DAG.getNode(X86ISD::STRICT_VFPROUND, dl,
33263 SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
33271 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
33272 SDValue Src = N->getOperand(IsStrict ? 1 : 0);
33273 SDValue Rnd = N->getOperand(IsStrict ? 2 : 1);
33276 SDValue V;
33278 SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f32)
33320 SDValue Src = N->getOperand(IsStrict ? 1 : 0);
33323 SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f16)
33325 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f16, Src, Ext);
33371 SDValue cpInL, cpInH;
33375 Regs64bit ? X86::RAX : X86::EAX, cpInL, SDValue());
33379 SDValue swapInL, swapInH;
33392 SDValue Result;
33396 SDValue Ops[] = {swapInH.getValue(0), N->getOperand(1), swapInL,
33403 SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1),
33409 SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
33412 SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
33415 SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
33417 SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
33419 SDValue Success = getSETCC(X86::COND_E, EFLAGS, dl, DAG);
33439 SDValue Ld = DAG.getLoad(MVT::v2i64, dl, Node->getChain(),
33441 SDValue ResL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
33443 SDValue ResH = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
33457 SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
33458 SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
33461 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
33470 SDValue Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Ld,
33481 SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
33482 SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD,
33485 SDValue Chain = Result.getValue(1);
33491 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
33495 SDValue StoreOps[] = { Chain, Result, StackPtr };
33538 SDValue Lo, Hi;
33542 SDValue Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
33553 SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64,
33567 SDValue Index = Gather->getIndex();
33573 SDValue Mask = Gather->getMask();
33575 SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT,
33585 SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
33587 SDValue Res = DAG.getMemIntrinsicNode(
33609 SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
33612 SDValue Chain = Res.getValue(1);
33623 SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
33624 SDValue Res = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
33631 SDValue V = LowerADDRSPACECAST(SDValue(N,0), DAG);
33640 Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG));
33648 SDValue VecOp = N->getOperand(0);
33650 SDValue Split = DAG.getBitcast(ExtVT, N->getOperand(0));
34239 bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
34326 bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
37014 X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
37026 auto NeedsSignExtension = [&](SDValue V, unsigned ActiveBits) {
37048 SDValue NewC =
37051 SDValue NewOp =
37100 SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT);
37101 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
37105 static void computeKnownBitsForPSADBW(SDValue LHS, SDValue RHS,
37125 static void computeKnownBitsForPMADDWD(SDValue LHS, SDValue RHS,
37148 static void computeKnownBitsForPMADDUBSW(SDValue LHS, SDValue RHS,
37172 const SDValue Op, const APInt &DemandedElts, unsigned Depth,
37182 [&DAG, Depth, KnownBitsFunc](SDValue Op, APInt &DemandedEltsOp) {
37197 void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37233 SDValue Src = Op.getOperand(0);
37298 SDValue Src = Op.getOperand(0);
37299 SDValue Idx = Op.getOperand(1);
37311 SDValue Src = Op.getOperand(0);
37346 SDValue LHS = Op.getOperand(0);
37347 SDValue RHS = Op.getOperand(1);
37373 SDValue LHS = Op.getOperand(0);
37374 SDValue RHS = Op.getOperand(1);
37383 SDValue LHS = Op.getOperand(0);
37384 SDValue RHS = Op.getOperand(1);
37415 SDValue Op0 = Op.getOperand(0);
37416 SDValue Op1 = Op.getOperand(1);
37542 SDValue LHS = Op.getOperand(1);
37543 SDValue RHS = Op.getOperand(2);
37554 SDValue LHS = Op.getOperand(1);
37555 SDValue RHS = Op.getOperand(2);
37566 SDValue LHS = Op.getOperand(1);
37567 SDValue RHS = Op.getOperand(2);
37584 SmallVector<SDValue, 2> Ops;
37631 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
37642 SDValue Src = Op.getOperand(0);
37661 auto NumSignBitsPACKSS = [&](SDValue V, const APInt &Elts) -> unsigned {
37662 SDValue BC = peekThroughBitcasts(V);
37666 SDValue BC0 = peekThroughBitcasts(BC.getOperand(0));
37667 SDValue BC1 = peekThroughBitcasts(BC.getOperand(1));
37690 SDValue Src = Op.getOperand(0);
37697 SDValue Src = Op.getOperand(0);
37708 SDValue Src = Op.getOperand(0);
37753 SmallVector<SDValue, 2> Ops;
37799 SDValue X86TargetLowering::unwrapAddress(SDValue N) const {
37806 // specified VT and memory VT. Returns SDValue() on failure.
37807 static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT,
37811 return SDValue();
37814 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
37825 SDValue V1, const SelectionDAG &DAG,
38128 SDValue &V1, SDValue &V2, const SDLoc &DL,
38275 auto computeKnownBitsElementWise = [&DAG](SDValue V) {
38324 bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2,
38454 return SDValue();
38458 SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]);
38459 SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]);
38484 static SDValue combineX86ShuffleChainWithExtract(
38485 ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
38499 static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
38516 auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) {
38527 SDValue V1 = peekThroughBitcasts(Inputs[0]);
38528 SDValue V2 = (UnaryShuffle ? DAG.getUNDEF(V1.getValueType())
38536 SDValue Res;
38596 return SDValue(); // Nothing to do!
38613 ArrayRef<int> ScaledMask, SDValue V1, SDValue V2,
38617 SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
38623 SDValue Op = ScaledMask[i] >= 4 ? V2 : V1;
38628 return SDValue();
38653 return SDValue(); // Nothing to do!
38655 if (SDValue V = MatchSHUF128(ShuffleVT, DL, ScaledMask, V1, V2, DAG))
38667 return SDValue(); // Nothing to do!
38681 return SDValue(); // Nothing to do!
38682 SDValue Lo = CanonicalizeShuffleInput(RootVT, V1);
38683 SDValue Hi = CanonicalizeShuffleInput(RootVT, BaseMask[1] == 0 ? V1 : V2);
38689 return SDValue(); // Nothing to do!
38706 return SDValue(); // Nothing to do!
38717 SDValue LHS = isInRange(Mask[0], 0, 2) ? V1 : V2;
38718 SDValue RHS = isInRange(Mask[1], 0, 2) ? V1 : V2;
38760 return SDValue();
38789 return SDValue(); // Nothing to do!
38796 return SDValue(); // Nothing to do!
38809 return SDValue(); // Nothing to do!
38821 return SDValue(); // Nothing to do!
38836 SDValue SrcV1 = V1, SrcV2 = V2;
38841 return SDValue(); // Nothing to do!
38854 return SDValue(); // Nothing to do!
38864 SDValue NewV1 = V1; // Save operands in case early exit happens.
38865 SDValue NewV2 = V2;
38871 return SDValue(); // Nothing to do!
38885 return SDValue(); // Nothing to do!
38903 return SDValue(); // Nothing to do!
38913 return SDValue(); // Nothing to do!
38933 return SDValue(); // Nothing to do!
38952 return SDValue(); // Nothing to do!
38968 return SDValue();
38991 SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
39035 if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
39058 return SDValue();
39080 SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL);
39094 SmallVector<SDValue, 16> VPermIdx;
39096 SDValue Idx =
39100 SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx);
39135 SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true);
39150 SmallVector<SDValue, 16> PSHUFBMask;
39169 SDValue PSHUFBMaskOp = DAG.getBuildVector(ByteVT, DL, PSHUFBMask);
39182 SmallVector<SDValue, 16> VPPERMMask;
39201 SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask);
39208 if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
39235 return SDValue();
39246 static SDValue combineX86ShuffleChainWithExtract(
39247 ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
39254 return SDValue();
39265 SDValue Input = peekThroughBitcasts(Inputs[I]);
39277 return SDValue();
39291 SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end());
39293 SDValue &Input = WideInputs[I];
39321 return SDValue();
39347 SDValue WideRoot = WideInputs.front().getValueSizeInBits() >
39354 if (SDValue WideShuffle =
39363 return SDValue();
39368 static SDValue canonicalizeShuffleMaskWithHorizOp(
39369 MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask,
39373 return SDValue();
39375 SmallVector<SDValue> BC;
39376 for (SDValue Op : Ops)
39380 SDValue BC0 = BC[0];
39383 if (VT0.getSizeInBits() != RootSizeInBits || llvm::any_of(BC, [&](SDValue V) {
39386 return SDValue();
39392 return SDValue();
39395 bool OneUseOps = llvm::all_of(Ops, [](SDValue Op) {
39423 SDValue Src0 = BC[M / 4];
39424 SDValue Src1 = Src0.getOperand((M % 4) >= 2);
39427 return SDValue();
39429 SDValue M0 = GetHOpSrc(ScaledMask[0]);
39430 SDValue M1 = GetHOpSrc(ScaledMask[1]);
39431 SDValue M2 = GetHOpSrc(ScaledMask[2]);
39432 SDValue M3 = GetHOpSrc(ScaledMask[3]);
39434 SDValue LHS = DAG.getNode(Opcode0, DL, SrcVT, M0, M1);
39435 SDValue RHS = DAG.getNode(Opcode0, DL, SrcVT, M2, M3);
39441 SDValue LHS, RHS;
39446 SDValue Src = BC[M / 4].getOperand((M % 4) >= 2);
39466 SDValue Res = DAG.getNode(Opcode0, DL, VT0, LHS, RHS);
39479 return SDValue();
39481 SDValue BC1 = BC[BC.size() - 1];
39487 auto ContainsOps = [](SDValue HOp, SDValue Op) {
39536 SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1;
39537 SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1;
39541 SDValue Undef = DAG.getUNDEF(SrcVT);
39542 SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL);
39564 SDValue V0 = extract128BitVector(BC[0].getOperand(M0 & 1), Idx0, DAG, DL);
39565 SDValue V1 = extract128BitVector(BC[0].getOperand(M1 & 1), Idx1, DAG, DL);
39566 SDValue Res = DAG.getNode(Opcode0, DL, HalfVT, V0, V1);
39571 return SDValue();
39577 static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef<SDValue> Ops,
39595 return SDValue();
39602 llvm::none_of(Ops, [](SDValue SrcOp) { return SrcOp->hasOneUse(); }))
39603 return SDValue();
39656 return SDValue();
39658 SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL);
39699 static SDValue combineX86ShufflesRecursively(
39700 ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
39716 return SDValue();
39719 SDValue Op = SrcOps[SrcOpIndex];
39724 return SDValue(); // Bail if we hit a non-simple non-vector.
39728 return SDValue();
39756 SmallVector<SDValue, 2> OpInputs;
39763 if (llvm::any_of(OpInputs, [VT](SDValue OpInput) {
39766 return SDValue();
39770 SDValue SrcVec = Op.getOperand(0);
39778 return SDValue();
39802 SmallVector<SDValue, 16> Ops;
39834 auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int {
39836 SDValue InputBC = peekThroughBitcasts(Input);
39851 for (SDValue OpInput : OpInputs)
39938 SDValue &Op = Ops[I];
39954 for (SDValue &Op : Ops)
40001 if (SDValue Res = combineX86ShufflesRecursively(
40010 if (SDValue Cst = combineX86ShufflesConstants(
40016 if (Depth == 0 && llvm::all_of(Ops, [&](SDValue Op) {
40025 return SDValue();
40030 if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
40037 SDValue &Op = I.value();
40069 if (SDValue NewOp =
40079 if (any_of(Ops, [RootSizeInBits](SDValue Op) {
40082 for (SDValue &Op : Ops)
40112 if (SDValue Shuffle = combineX86ShuffleChain(
40119 SDValue LHS = peekThroughBitcasts(Ops.front());
40120 SDValue RHS = peekThroughBitcasts(Ops.back());
40126 return SDValue();
40137 static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
40150 static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
40153 SmallVector<SDValue, 2> Ops;
40192 static SDValue combineRedundantDWordShuffle(SDValue N,
40202 SmallVector<SDValue, 8> Chain;
40203 SDValue V = N.getOperand(0);
40207 return SDValue(); // Nothing combined!
40223 return SDValue();
40233 return SDValue();
40244 return SDValue();
40251 return SDValue();
40257 return SDValue(); // Nothing to combine.
40281 return SDValue();
40292 SDValue W = Chain.pop_back_val();
40322 static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
40326 return SDValue();
40329 auto commuteSHUFP = [&VT, &DL, &DAG](SDValue Parent, SDValue V) {
40331 return SDValue();
40332 SDValue N0 = V.getOperand(0);
40333 SDValue N1 = V.getOperand(1);
40338 return SDValue();
40346 if (SDValue NewSHUFP = commuteSHUFP(N, N.getOperand(0))) {
40353 SDValue N0 = N.getOperand(0);
40354 SDValue N1 = N.getOperand(1);
40357 if (SDValue NewSHUFP = commuteSHUFP(N, N0))
40360 } else if (SDValue NewSHUFP = commuteSHUFP(N, N0)) {
40363 } else if (SDValue NewSHUFP = commuteSHUFP(N, N1)) {
40371 return SDValue();
40376 static SDValue
40377 combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1, ArrayRef<int> BlendMask,
40382 return SDValue();
40385 SDValue BC0 = peekThroughOneUseBitcasts(N0);
40386 SDValue BC1 = peekThroughOneUseBitcasts(N1);
40391 SmallVector<SDValue, 2> Ops0, Ops1;
40397 return SDValue();
40409 return SDValue();
40415 return SDValue();
40449 return SDValue();
40451 return SDValue();
40460 return SDValue();
40462 SDValue NewBlend =
40482 static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
40488 auto IsMergeableWithShuffle = [Opc, &DAG](SDValue Op, bool FoldShuf = true,
40505 auto IsSafeToMoveShuffle = [ShuffleVT](SDValue Op, unsigned BinOp) {
40517 SmallVector<SDValue> Ops;
40531 SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
40534 SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
40535 SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
40540 SDValue LHS, RHS;
40577 SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
40578 SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1));
40584 SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
40585 SDValue Op10 = peekThroughOneUseBitcasts(N1.getOperand(0));
40586 SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
40587 SDValue Op11 = peekThroughOneUseBitcasts(N1.getOperand(1));
40594 SDValue LHS, RHS;
40617 SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
40618 SDValue Op10 = peekThroughOneUseBitcasts(N1.getOperand(0));
40619 SDValue Res;
40636 return SDValue();
40640 static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
40646 SDValue Src0 = peekThroughBitcasts(V.getOperand(0));
40647 SDValue Src1 = peekThroughBitcasts(V.getOperand(1));
40654 return SDValue();
40658 SDValue LHS = Src0.getOperand(0);
40659 SDValue RHS = Src1.isUndef() ? Src1 : Src1.getOperand(0);
40660 SDValue Res =
40678 SDValue LHS = Src0.getOperand(0);
40679 SDValue RHS = Src1.isUndef() ? Src1 : Src1.getOperand(0);
40680 SDValue Res = DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT0, LHS, RHS,
40688 return SDValue();
40692 static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
40701 if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))
40707 SDValue Src = N.getOperand(0);
40712 if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::f64, MVT::v2f64, DAG)) {
40713 SDValue Movddup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, VZLoad);
40715 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
40721 return SDValue();
40724 SDValue Src = N.getOperand(0);
40725 SDValue BC = peekThroughBitcasts(Src);
40738 if (SDValue Res = combineX86ShufflesRecursively(
40793 // Ensure the same SDValue from the SDNode use is being used.
40799 return extractSubVector(SDValue(User, 0), 0, DAG, DL,
40809 SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
40810 SDValue BcastLd =
40817 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
40820 SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT, BcastLd,
40832 SDValue TruncIn = Src.getOperand(0);
40841 SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
40842 SDValue BcastLd = DAG.getMemIntrinsicNode(
40847 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
40859 SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
40860 SDValue BcastLd =
40864 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
40884 SDValue Ptr = DAG.getMemBasePlusOffset(
40886 SDValue Ops[] = { LN->getChain(), Ptr };
40887 SDValue BcastLd = DAG.getMemIntrinsicNode(
40893 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
40905 SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
40906 SDValue BcastLd =
40910 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
40924 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
40925 SDValue BcastLd = DAG.getMemIntrinsicNode(
40930 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
40936 return SDValue();
40939 SDValue N0 = N.getOperand(0);
40945 if (SDValue VZLoad =
40948 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
40961 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
40962 SDValue VZLoad =
40966 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
40978 SDValue In = N0.getOperand(0);
40981 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, In);
40983 SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Trunc);
40984 SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, SclVec);
41004 SDValue CP = DAG.getConstantPool(ConstantVector::get(ConstantVec), PVT);
41018 SDValue V = peekThroughOneUseBitcasts(N0);
41022 SDValue In = V.getOperand(1);
41027 SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, SubVT, In);
41034 return SDValue();
41037 SDValue N0 = N.getOperand(0);
41038 SDValue N1 = N.getOperand(1);
41065 SmallVector<SDValue> Ops;
41066 SDValue LHS = peekThroughOneUseBitcasts(N0);
41067 SDValue RHS = peekThroughOneUseBitcasts(N1);
41077 SDValue MaskLHS = LHS.getOperand(1);
41078 SDValue MaskRHS = RHS.getOperand(1);
41080 if (SDValue NewMask = combineX86ShufflesConstants(
41083 SDValue NewLHS = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT,
41085 SDValue NewRHS = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT,
41094 return SDValue();
41103 SmallVector<SDValue> Ops;
41106 SmallVector<SDValue> SubOps;
41108 SDValue Sub = peekThroughBitcasts(Ops[i]);
41128 return SDValue();
41133 SDValue N0 = N.getOperand(0);
41134 SDValue N1 = N.getOperand(1);
41138 SDValue Src = N0.getOperand(0);
41140 SDValue Res = DAG.getNode(X86ISD::VPERMI, DL, SrcVT, Src, N1);
41143 return SDValue();
41151 SDValue LHS = N->getOperand(0);
41152 SDValue RHS = N->getOperand(1);
41154 SmallVector<SDValue> LHSOps, RHSOps;
41155 SDValue NewLHS, NewRHS;
41171 return SDValue();
41175 SDValue LHS = N->getOperand(0);
41176 SDValue RHS = N->getOperand(1);
41189 if (SDValue Res = canonicalizeLaneShuffleWithRepeatedOps(N, DAG, DL))
41196 return SDValue();
41197 SDValue Src = peekThroughBitcasts(N.getOperand(Idx < 2 ? 0 : 1));
41198 SmallVector<SDValue> SubOps;
41207 return SDValue();
41210 if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) {
41211 if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) {
41218 return SDValue();
41223 SDValue N0 = N.getOperand(0);
41224 SDValue N1 = N.getOperand(1);
41226 SDValue V = peekThroughOneUseBitcasts(N0);
41238 SDValue Res = DAG.getNode(Opcode, DL, VT,
41256 SDValue N0 = N.getOperand(0);
41257 SDValue N1 = N.getOperand(1);
41265 SDValue N10 = N1.getOperand(0);
41266 SDValue N11 = N1.getOperand(1);
41272 SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
41275 SDValue Scl = DAG.getNode(Opcode1, DL, SVT, N10, N11);
41276 SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl);
41281 return SDValue();
41285 SDValue Op0 = N.getOperand(0);
41286 SDValue Op1 = N.getOperand(1);
41304 SmallVector<SDValue, 2> Ops1;
41325 SmallVector<SDValue, 2> Ops0;
41348 return SDValue();
41376 SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(),
41379 SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0,
41383 DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1));
41388 return SDValue();
41393 SDValue V1 = peekThroughBitcasts(N.getOperand(0));
41394 SDValue V2 = peekThroughBitcasts(N.getOperand(2));
41406 SDValue Mask = widenSubVector(N.getOperand(1), false, Subtarget, DAG,
41408 SDValue Perm = DAG.getNode(X86ISD::VPERMV, DL, WideVT, Mask,
41414 return SDValue();
41417 return SDValue();
41425 SDValue V = N.getOperand(0);
41456 SDValue D = peekThroughOneUseBitcasts(V.getOperand(0));
41485 if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DL, DAG))
41491 return SDValue();
41535 SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1,
41550 SDValue V1 = N->getOperand(0);
41551 SDValue V2 = N->getOperand(1);
41565 SDValue LHS, RHS;
41594 static SDValue combineShuffleToFMAddSub(SDNode *N, const SDLoc &DL,
41601 return SDValue();
41606 return SDValue();
41609 SDValue Op0 = N->getOperand(0);
41610 SDValue Op1 = N->getOperand(1);
41611 SDValue FMAdd = Op0, FMSub = Op1;
41619 return SDValue();
41625 return SDValue();
41636 static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const SDLoc &DL,
41639 if (SDValue V = combineShuffleToFMAddSub(N, DL, Subtarget, DAG))
41642 SDValue Opnd0, Opnd1;
41645 return SDValue();
41650 SDValue Opnd2;
41657 return SDValue();
41663 return SDValue();
41669 return SDValue();
41677 static SDValue combineShuffleOfConcatUndef(SDNode *N, const SDLoc &DL,
41681 return SDValue();
41687 return SDValue();
41693 return SDValue();
41695 SDValue N0 = N->getOperand(0);
41696 SDValue N1 = N->getOperand(1);
41703 return SDValue();
41714 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0),
41722 static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
41725 return SDValue();
41727 return SDValue();
41732 return SDValue();
41740 return SDValue();
41752 static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
41756 if (SDValue V = narrowShuffle(Shuf, DAG))
41765 if (SDValue AddSub =
41770 if (SDValue LD = combineToConsecutiveLoads(
41771 VT, SDValue(N, 0), dl, DAG, Subtarget, /*IsAfterLegalize*/ true))
41780 if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, dl, DAG, Subtarget))
41784 SDValue Op(N, 0);
41785 if (SDValue Shuffle = combineTargetShuffle(Op, dl, DAG, DCI, Subtarget))
41793 if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
41800 return SDValue(N, 0);
41806 if (SDValue BinOp = canonicalizeShuffleWithOp(Op, DAG, dl))
41810 return SDValue();
41816 SDValue Op, const APInt &DemandedElts, unsigned MaskIndex,
41823 SDValue Mask = Op.getOperand(MaskIndex);
41835 SDValue BC = peekThroughOneUseBitcasts(Mask);
41873 SDValue CV = TLO.DAG.getConstantPool(ConstantVector::get(ConstVecOps), BCVT);
41874 SDValue LegalCV = LowerConstantPool(CV, TLO.DAG);
41875 SDValue NewMask = TLO.DAG.getLoad(
41883 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
41895 SDValue LHS = Op.getOperand(0);
41896 SDValue RHS = Op.getOperand(1);
41911 SDValue LHS = Op.getOperand(0);
41912 SDValue RHS = Op.getOperand(1);
41936 SDValue LHS = Op.getOperand(0);
41937 SDValue RHS = Op.getOperand(1);
41947 SDValue NewLHS = SimplifyMultipleUseDemandedVectorElts(
41949 SDValue NewRHS = SimplifyMultipleUseDemandedVectorElts(
41964 SDValue Amt = Op.getOperand(1);
41988 SDValue Src = Op.getOperand(0);
42001 if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
42014 SDValue LHS = Op.getOperand(0);
42015 SDValue RHS = Op.getOperand(1);
42036 SDValue LHS = Op.getOperand(0);
42037 SDValue RHS = Op.getOperand(1);
42047 SDValue Src = Op.getOperand(0);
42069 SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8);
42086 SDValue Src = Op.getOperand(0);
42108 SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8);
42126 SDValue LHS = Op.getOperand(0);
42127 SDValue RHS = Op.getOperand(1);
42129 auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) {
42172 SDValue NewLHS = SimplifyMultipleUseDemandedBits(LHS, BitsLHS, EltsLHS,
42174 SDValue NewRHS = SimplifyMultipleUseDemandedBits(RHS, BitsRHS, EltsRHS,
42189 SDValue Src = Op.getOperand(0);
42200 SDValue N0 = Op.getOperand(0);
42201 SDValue N1 = Op.getOperand(1);
42220 SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS,
42222 SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS,
42237 SDValue N0 = Op.getOperand(0);
42238 SDValue N1 = Op.getOperand(1);
42257 SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS,
42259 SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS,
42273 SDValue Src = Op.getOperand(0);
42287 if (SDValue R = combineBlendOfPermutes(
42316 SDValue Src = Op.getOperand(0);
42330 SDValue Elt = TLO.DAG.getLoad(SVT, DL, Mem->getChain(), Mem->getBasePtr(),
42332 SDValue Vec = TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Elt);
42338 SDValue Src = Op.getOperand(0);
42356 if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
42397 SDValue Src = Op.getOperand(0);
42402 SDValue Bcst = TLO.DAG.getNode(X86ISD::VBROADCAST, DL, BcstVT, Src);
42412 SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)};
42413 SDValue Bcst = TLO.DAG.getMemIntrinsicNode(
42416 TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1),
42427 SDValue Ld =
42430 TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1),
42438 if (SDValue BcstLd =
42458 SDValue Ext0 =
42460 SDValue ExtOp =
42462 SDValue UndefVec = TLO.DAG.getUNDEF(VT);
42463 SDValue Insert =
42475 SDValue Ext = extractSubVector(Op.getOperand(0), 2, TLO.DAG, DL, 128);
42476 SDValue UndefVec = TLO.DAG.getUNDEF(VT);
42477 SDValue Insert = insertSubVector(UndefVec, Ext, 0, TLO.DAG, DL, 128);
42492 SDValue ExtOp =
42494 SDValue UndefVec = TLO.DAG.getUNDEF(VT);
42495 SDValue Insert =
42534 SmallVector<SDValue, 4> Ops;
42536 SDValue SrcOp = Op.getOperand(i);
42547 SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ops);
42548 SDValue UndefVec = TLO.DAG.getUNDEF(VT);
42549 SDValue Insert =
42565 SmallVector<SDValue, 2> OpInputs;
42572 llvm::any_of(OpInputs, [VT](SDValue V) {
42637 SDValue NewShuffle = combineX86ShufflesRecursively(
42650 SDValue Op, const APInt &OriginalDemandedBits,
42659 SDValue Src = Op.getOperand(0);
42673 SDValue LHS = Op.getOperand(0);
42674 SDValue RHS = Op.getOperand(1);
42700 SDValue Mask = TLO.DAG.getConstant(DemandedMask, DL, VT);
42705 SDValue DemandedLHS = SimplifyMultipleUseDemandedBits(
42707 SDValue DemandedRHS = SimplifyMultipleUseDemandedBits(
42719 SDValue Op0 = Op.getOperand(0);
42720 SDValue Op1 = Op.getOperand(1);
42741 SDValue Op0 = Op.getOperand(0);
42761 SDValue NewShift = TLO.DAG.getNode(
42805 SDValue Op0 = Op.getOperand(0);
42806 SDValue Op1 = Op.getOperand(1);
42821 SDValue Op00 = Op0.getOperand(0);
42853 SDValue Sel = Op.getOperand(0);
42854 SDValue LHS = Op.getOperand(1);
42855 SDValue RHS = Op.getOperand(2);
42858 SDValue NewSel = SimplifyMultipleUseDemandedBits(
42860 SDValue NewLHS = SimplifyMultipleUseDemandedBits(
42862 SDValue NewRHS = SimplifyMultipleUseDemandedBits(
42876 SDValue Vec = Op.getOperand(0);
42902 if (SDValue V = SimplifyMultipleUseDemandedBits(
42914 SDValue Vec = Op.getOperand(0);
42915 SDValue Scl = Op.getOperand(1);
42961 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
42963 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
42966 SDValue Op0 = DemandedOp0 ? DemandedOp0 : Op.getOperand(0);
42967 SDValue Op1 = DemandedOp1 ? DemandedOp1 : Op.getOperand(1);
42974 SDValue Src = Op.getOperand(0);
42988 SDValue NewSrc =
42991 SDValue NewBcst =
43005 SDValue Src = Op.getOperand(0);
43017 SDValue NewSrc = extract128BitVector(Src, 0, TLO.DAG, SDLoc(Src));
43044 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
43050 SDValue Op0 = Op.getOperand(0);
43051 SDValue Op1 = Op.getOperand(1);
43081 SDValue Op0 = Op.getOperand(0);
43082 SDValue Op1 = Op.getOperand(1);
43130 SDValue Op0 = Op.getOperand(0);
43131 SDValue Op1 = Op.getOperand(1);
43163 SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
43164 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
43174 SDValue Vec = Op.getOperand(0);
43185 SDValue Op0 = Op.getOperand(0);
43209 SDValue Cond = Op.getOperand(0);
43210 SDValue LHS = Op.getOperand(1);
43211 SDValue RHS = Op.getOperand(2);
43222 SDValue LHS = Op.getOperand(0);
43223 SDValue RHS = Op.getOperand(1);
43239 SmallVector<SDValue, 2> ShuffleOps;
43246 llvm::all_of(ShuffleOps, [VT](SDValue V) {
43284 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
43292 SmallVector<SDValue, 2> Ops;
43320 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
43342 bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op,
43363 static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
43405 static SDValue adjustBitcastSrcVectorSSE1(SelectionDAG &DAG, SDValue Src,
43409 return SDValue();
43416 SDValue Op0 = Src.getOperand(0);
43427 SDValue Op0 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(0), DL);
43428 SDValue Op1 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(1), DL);
43435 return SDValue();
43439 static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
43440 SDValue Src, const SDLoc &DL) {
43470 static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
43475 return SDValue();
43480 if (SDValue V = adjustBitcastSrcVectorSSE1(DAG, Src, DL)) {
43511 return SDValue();
43515 SmallVector<SDValue, 4> SubSrcOps;
43518 SDValue LowerOp = SubSrcOps[0];
43519 ArrayRef<SDValue> UpperOps(std::next(SubSrcOps.begin()), SubSrcOps.end());
43521 all_of(UpperOps, [](SDValue Op) { return Op.isUndef(); })) {
43524 if (SDValue V = combineBitcastvxi1(DAG, SubVT, LowerOp, DL, Subtarget)) {
43545 return SDValue();
43587 return SDValue();
43596 return SDValue();
43599 SDValue V = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
43619 static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) {
43628 SDValue In = Op.getOperand(Idx);
43636 static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG,
43642 return SDValue();
43646 return SDValue();
43649 SDValue Op = N->getOperand(0);
43653 return SDValue();
43659 return SDValue();
43666 return SDValue();
43668 SDValue LHS = Op.getOperand(0);
43669 SDValue RHS = Op.getOperand(1);
43689 return SDValue();
43692 static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
43696 SDValue Splat = BV->getSplatValue();
43699 auto CreateMMXElement = [&](SDValue V) {
43716 SmallVector<SDValue, 8> Ops;
43757 SDValue Intrin = DAG.getTargetConstant(
43772 static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL,
43777 return SDValue(); // Limit search depth.
43784 SDValue Src = V.getOperand(0);
43800 SDValue Src = V.getOperand(0);
43804 if (SDValue N0 = combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG,
43813 SDValue Src = V.getOperand(0);
43817 if (SDValue N0 = combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG,
43829 if (SDValue N0 = combineBitcastToBoolVector(VT, V.getOperand(0), DL, DAG,
43831 if (SDValue N1 = combineBitcastToBoolVector(VT, V.getOperand(1), DL, DAG,
43838 SDValue Src0 = V.getOperand(0);
43844 if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget,
43856 return SDValue(Alt, 0);
43858 return SDValue();
43861 static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
43864 SDValue N0 = N->getOperand(0);
43877 if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget))
43902 SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1);
43906 SmallVector<SDValue, 4> Ops(N0->op_begin(), N0->op_end());
43915 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
43926 if (SDValue V =
43961 SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() };
43962 SDValue ResNode =
43965 DAG.ReplaceAllUsesOfValueWith(SDValue(BCast, 1), ResNode.getValue(1));
43997 SDValue Op = N0.getOperand(i);
44002 SDValue N00 = N0.getOperand(0);
44022 SDValue N00 = N0.getOperand(0);
44031 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
44063 SDValue Src = N0;
44070 SDValue MovmskIn = Src.getOperand(0);
44082 SDValue Cmp = DAG.getSetCC(dl, CmpVT, MovmskIn,
44090 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, CmpVT));
44099 if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget))
44113 default: return SDValue();
44123 return SDValue();
44125 SDValue LogicOp0 = N0.getOperand(0);
44126 SDValue LogicOp1 = N0.getOperand(1);
44134 SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1);
44143 SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0);
44148 return SDValue();
44152 static bool detectExtMul(SelectionDAG &DAG, const SDValue &Mul, SDValue &Op0,
44153 SDValue &Op1) {
44161 auto IsFreeTruncation = [](SDValue &Op) -> bool {
44185 static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) {
44186 SDValue AbsOp1 = Abs->getOperand(0);
44203 static SDValue createVPDPBUSD(SelectionDAG &DAG, SDValue LHS, SDValue RHS,
44226 SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, Vi8VT));
44229 SDValue DpOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
44231 SDValue DpOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
44236 ArrayRef<SDValue> Ops) {
44241 SDValue Zero = DAG.getConstant(0, DL, DpVT);
44249 static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
44250 const SDValue &Zext1, const SDLoc &DL,
44259 SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, InVT));
44262 SDValue SadOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
44264 SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
44268 ArrayRef<SDValue> Ops) {
44279 static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG,
44283 return SDValue();
44287 return SDValue();
44291 SDValue Src = DAG.matchBinOpReduction(
44294 return SDValue();
44299 return SDValue();
44302 SDValue MinPos = Src;
44306 SDValue Lo, Hi;
44317 SDValue Mask;
44334 SDValue Upper = DAG.getVectorShuffle(
44353 static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
44357 return SDValue();
44363 return SDValue();
44367 SDValue Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND});
44371 return SDValue();
44376 return SDValue();
44378 SDValue Movmsk;
44389 return SDValue();
44397 SDValue LHS = DAG.getFreeze(Match.getOperand(0));
44398 SDValue RHS = DAG.getFreeze(Match.getOperand(1));
44400 if (SDValue V = LowerVectorAllEqual(DL, LHS, RHS, CC, Mask, Subtarget,
44413 SDValue Lo, Hi;
44422 return SDValue();
44429 return SDValue();
44436 return SDValue();
44440 return SDValue();
44443 SDValue Lo, Hi;
44457 SDValue BitcastLogicOp = DAG.getBitcast(MaskSrcVT, Match);
44467 SDValue Result = DAG.getNode(ISD::PARITY, DL, CmpVT, Movmsk);
44471 SDValue CmpC;
44487 SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode);
44488 SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT);
44492 static SDValue combineVPDPBUSDPattern(SDNode *Extract, SelectionDAG &DAG,
44495 return SDValue();
44501 return SDValue();
44505 return SDValue();
44509 SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD});
44523 return SDValue();
44526 SDValue LHS, RHS;
44528 return SDValue();
44533 SDValue DP = createVPDPBUSD(DAG, LHS, RHS, StageBias, DL, Subtarget);
44548 SDValue Shuffle =
44563 static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
44567 return SDValue();
44573 return SDValue();
44577 return SDValue();
44581 SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD});
44599 return SDValue();
44602 SDValue Zext0, Zext1;
44604 return SDValue();
44608 SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL, Subtarget);
44622 SDValue Shuffle =
44645 static SDValue
44646 combineExtractFromVectorLoad(SDNode *N, EVT VecVT, SDValue SrcVec, uint64_t Idx,
44666 SDValue NewPtr = TLI.getVectorElementPointer(
44671 SDValue Load =
44678 return SDValue();
44683 static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
44687 return SDValue();
44690 SDValue Src = N->getOperand(0);
44691 SDValue Idx = N->getOperand(1);
44701 return SDValue();
44705 return SDValue();
44707 SDValue SrcBC = peekThroughBitcasts(Src);
44711 SDValue SrcOp = SrcBC.getOperand(0);
44733 SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(),
44738 DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1));
44753 SDValue Scl = SrcBC.getOperand(0);
44779 auto GetLegalExtract = [&Subtarget, &DAG, &dl](SDValue Vec, EVT VecVT,
44805 return SDValue();
44810 SmallVector<SDValue, 2> Ops;
44812 return SDValue();
44815 if (llvm::any_of(Ops, [SrcVT](SDValue Op) {
44818 return SDValue();
44853 return SDValue();
44856 return SDValue();
44872 SDValue SrcOp = Ops[ExtractIdx / Mask.size()];
44874 if (SDValue V = GetLegalExtract(SrcOp, ExtractVT, ExtractIdx))
44878 if (SDValue V = combineExtractFromVectorLoad(
44882 return SDValue();
44887 static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
44890 SDValue Vec = ExtElt->getOperand(0);
44891 SDValue Index = ExtElt->getOperand(1);
44898 return SDValue();
44905 return SDValue();
44909 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT,
44911 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT,
44918 return SDValue();
44932 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
44935 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
44937 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
44975 SmallVector<SDValue, 4> ExtOps;
44976 for (SDValue Op : Vec->ops())
44981 return SDValue();
44988 static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG,
44994 return SDValue();
44997 SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc,
45000 return SDValue();
45002 SDValue Index = ExtElt->getOperand(1);
45009 return SDValue();
45016 auto WidenToV16I8 = [&](SDValue V, bool ZeroExtend) {
45036 return SDValue();
45039 SDValue Lo = getUnpackl(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT));
45040 SDValue Hi = getUnpackh(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT));
45078 return SDValue();
45083 SDValue Lo, Hi;
45090 SDValue Hi = DAG.getVectorShuffle(
45120 ArrayRef<SDValue> Ops) {
45122 SDValue Zero = DAG.getConstant(0, DL, Ops[0].getValueType());
45130 SDValue Lo, Hi;
45138 SDValue RdxHi = DAG.getVectorShuffle(MVT::v2i64, DL, Rdx, Rdx, {1, -1});
45149 return SDValue();
45160 SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL);
45161 SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL);
45167 return SDValue();
45181 static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
45184 if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget))
45187 SDValue InputVector = N->getOperand(0);
45188 SDValue EltIdx = N->getOperand(1);
45219 SDValue Src = peekThroughBitcasts(InputVector);
45223 SDValue Sub = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Src,
45231 if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumEltBits),
45233 return SDValue(N, 0);
45241 SDValue Scl = InputVector.getOperand(1);
45249 return SDValue();
45270 if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget))
45273 if (SDValue VPDPBUSD = combineVPDPBUSDPattern(N, DAG, Subtarget))
45277 if (SDValue Cmp = combinePredicateReduction(N, DAG, Subtarget))
45281 if (SDValue MinMax = combineMinMaxReduction(N, DAG, Subtarget))
45285 if (SDValue V = combineArithReduction(N, DAG, Subtarget))
45288 if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
45292 if (SDValue V = combineExtractFromVectorLoad(
45325 if (SDValue BC =
45330 SDValue MaskIdx = DAG.getZExtOrTrunc(Use->getOperand(1), dl, MVT::i8);
45331 SDValue MaskBit = DAG.getConstant(1, dl, BCVT);
45332 SDValue Mask = DAG.getNode(ISD::SHL, dl, BCVT, MaskBit, MaskIdx);
45333 SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask);
45337 return SDValue(N, 0);
45344 SDValue TruncSrc = InputVector.getOperand(0);
45347 SDValue NewExt =
45353 return SDValue();
45358 static SDValue combineToExtendBoolVectorInReg(
45359 unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N0, SelectionDAG &DAG,
45363 return SDValue();
45365 return SDValue();
45367 return SDValue();
45376 return SDValue();
45378 return SDValue();
45380 return SDValue();
45382 SDValue N00 = N0.getOperand(0);
45385 return SDValue();
45387 SDValue Vec;
45425 SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT);
45432 SmallVector<SDValue, 32> Bits;
45438 SDValue BitMask = DAG.getBuildVector(VT, DL, Bits);
45457 static SDValue
45461 SDValue Cond = N->getOperand(0);
45462 SDValue LHS = N->getOperand(1);
45463 SDValue RHS = N->getOperand(2);
45469 return SDValue();
45493 return SDValue();
45507 SDValue CC = Cond.getOperand(2);
45520 return SDValue();
45527 return SDValue();
45531 SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
45532 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS);
45538 SDValue CastLHS = DAG.getBitcast(CondVT, LHS);
45539 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS);
45545 SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
45546 SDValue AndN;
45556 return SDValue();
45563 static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL,
45567 return SDValue();
45572 return SDValue();
45575 SDValue Cond = N->getOperand(0);
45576 SDValue TVal = N->getOperand(1);
45577 SDValue FVal = N->getOperand(2);
45581 return SDValue();
45584 ArrayRef<SDValue> Ops) {
45591 static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG,
45593 SDValue Cond = N->getOperand(0);
45594 SDValue LHS = N->getOperand(1);
45595 SDValue RHS = N->getOperand(2);
45600 return SDValue();
45605 return SDValue();
45611 return SDValue();
45625 return SDValue();
45631 return SDValue();
45647 SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
45655 R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0));
45660 return SDValue();
45668 static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
45672 SDValue Cond = N->getOperand(0);
45676 return SDValue();
45693 return SDValue();
45698 return SDValue();
45701 return SDValue();
45704 return SDValue();
45707 return SDValue();
45712 return SDValue();
45714 auto OnlyUsedAsSelectCond = [](SDValue Cond) {
45732 return SDValue();
45743 SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0),
45745 DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
45749 return SDValue(N, 0);
45753 if (SDValue V = TLI.SimplifyMultipleUseDemandedBits(Cond, DemandedBits, DAG))
45757 return SDValue();
45776 static SDValue combineLogicBlendIntoConditionalNegate(
45777 EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL,
45785 return SDValue();
45787 return SDValue();
45789 auto IsNegV = [](SDNode *N, SDValue V) {
45794 SDValue V;
45800 return SDValue();
45802 SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
45803 SDValue SubOp2 = Mask;
45818 SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
45822 static SDValue commuteSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL,
45825 return SDValue();
45827 return SDValue();
45829 SDValue Cond = N->getOperand(0);
45830 SDValue LHS = N->getOperand(1);
45831 SDValue RHS = N->getOperand(2);
45834 return SDValue();
45837 return SDValue();
45840 return SDValue();
45853 static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
45857 SDValue Cond = N->getOperand(0);
45858 SDValue LHS = N->getOperand(1);
45859 SDValue RHS = N->getOperand(2);
45863 if (SDValue V = DAG.simplifySelect(Cond, LHS, RHS))
45870 if (SDValue V = commuteSelect(N, DAG, DL, Subtarget))
45885 if (SDValue V = combineLogicBlendIntoConditionalNegate(VT, Cond, RHS, LHS,
45905 SmallVector<SDValue, 1> LHSOps, RHSOps;
46091 SDValue AndNode = Cond.getOperand(0);
46123 auto SelectableOp = [&TLI](SDValue Op) {
46147 SDValue Res = DAG.getSelect(DL, SrcVT, Cond, LHS, RHS);
46152 if (SDValue V = combineSelectOfTwoConstants(N, DAG, DL))
46158 SDValue Cond0 = Cond.getOperand(0);
46159 SDValue Cond1 = Cond.getOperand(1);
46204 SDValue InnerSetCC = RHS.getOperand(0);
46236 SDValue CondNew = DAG.getNOT(DL, Cond, CondVT);
46247 if (SDValue ExtCond = combineToExtendBoolVectorInReg(
46292 return SDValue();
46294 if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DL, DCI, Subtarget))
46297 if (SDValue V = combineVSelectToBLENDV(N, DAG, DL, DCI, Subtarget))
46300 if (SDValue V = narrowVectorSelect(N, DAG, DL, Subtarget))
46305 if (SDValue CondNot = IsNOT(Cond, DAG))
46347 SDValue Select = DAG.getSelect(DL, IntVT, Cond, LHS, RHS);
46364 SDValue And = Cond.getOperand(0);
46368 SDValue NotCond =
46386 SDValue Mask = And.getOperand(1);
46394 SDValue ShlAmt = getConstVector(ShlVals, VT.getSimpleVT(), DAG, DL);
46395 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And.getOperand(0), ShlAmt);
46396 SDValue NewCond =
46402 return SDValue();
46411 static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
46417 return SDValue();
46423 return SDValue();
46437 SDValue CmpLHS = Cmp.getOperand(0);
46438 SDValue CmpRHS = Cmp.getOperand(1);
46442 return SDValue();
46446 return SDValue();
46448 SDValue OpRHS = CmpLHS.getOperand(2);
46451 return SDValue();
46459 return SDValue();
46509 return SDValue();
46520 return SDValue();
46522 SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG, Subtarget);
46530 static SDValue checkSignTestSetCCCombine(SDValue Cmp, X86::CondCode &CC,
46533 return SDValue();
46536 return SDValue();
46538 SDValue Src;
46542 return SDValue();
46548 return SDValue();
46558 return SDValue();
46560 return SDValue();
46577 SDValue Mask = DAG.getNode(ISD::AND, DL, SrcVT, Src,
46599 static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
46603 return SDValue();
46607 return SDValue();
46611 SDValue Op1 = Cmp.getOperand(0);
46612 SDValue Op2 = Cmp.getOperand(1);
46614 SDValue SetCC;
46624 return SDValue();
46631 return SDValue();
46675 return SDValue();
46678 SDValue Op = SetCC.getOperand(0);
46687 return SDValue();
46693 return SDValue();
46700 return SDValue();
46702 return SDValue();
46710 return SDValue();
46717 static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
46718 X86::CondCode &CC1, SDValue &Flags,
46729 SDValue SetCC0, SetCC1;
46758 static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) {
46762 SDValue Carry = EFLAGS.getOperand(0);
46774 SDValue CarryOp1 = Carry.getOperand(1);
46788 SDValue SubCommute =
46791 return SDValue(SubCommute.getNode(), CarryOp1.getResNo());
46802 SDValue BitNo = DAG.getConstant(0, DL, Carry.getValueType());
46812 return SDValue();
46817 static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
46823 return SDValue();
46830 SDValue Op0 = EFLAGS.getOperand(0);
46831 SDValue Op1 = EFLAGS.getOperand(1);
46836 if (SDValue NotOp0 = IsNOT(Op0, DAG)) {
46874 if (SDValue NotOp1 = IsNOT(Op1, DAG)) {
46887 if (SDValue NotOp1 = IsNOT(Op1, DAG)) {
46894 SDValue BC = peekThroughBitcasts(Op0);
46922 if (SDValue Res =
46960 SDValue Src0 = peekThroughBitcasts(Op0);
46961 SDValue Src1 = peekThroughBitcasts(Op1);
46977 return SDValue();
46981 static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
46987 return SDValue();
46989 return SDValue();
46992 return SDValue();
46995 return SDValue();
46998 SDValue CmpOp = EFLAGS.getOperand(0);
47008 return SDValue();
47010 SDValue Vec = CmpOp.getOperand(0);
47021 return SDValue();
47038 SDValue BC = peekThroughBitcasts(Vec);
47058 SmallVector<SDValue> Ops;
47064 SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT,
47080 SDValue BC = peekThroughBitcasts(Vec);
47084 SDValue V = DAG.getNode(ISD::XOR, SDLoc(BC), BC.getValueType(),
47093 SDValue LHS = BC.getOperand(0);
47094 SDValue RHS = BC.getOperand(1);
47101 SDValue V = DAG.getNode(ISD::OR, SDLoc(EFLAGS), TestVT, LHS, RHS);
47113 SDValue VecOp0 = Vec.getOperand(0);
47114 SDValue VecOp1 = Vec.getOperand(1);
47120 SDValue Result = DAG.getBitcast(MVT::v16i8, VecOp0);
47134 if (SDValue Src = getSplitVectorSrc(VecOp0, VecOp1, true)) {
47136 SDValue Result = peekThroughBitcasts(Src);
47139 SDValue V = DAG.getNode(ISD::XOR, DL, Result.getValueType(),
47175 SmallVector<SDValue, 2> ShuffleInputs;
47183 SDValue Result = DAG.getBitcast(VecVT, ShuffleInputs[0]);
47202 SDValue LHS = Vec;
47203 SDValue RHS = IsAnyOf ? Vec : DAG.getAllOnesConstant(DL, IntVT);
47210 return SDValue();
47216 static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
47220 if (SDValue Flags = combineCarryThroughADD(EFLAGS, DAG))
47223 if (SDValue R = checkSignTestSetCCCombine(EFLAGS, CC, DAG))
47226 if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
47229 if (SDValue R = combinePTESTCC(EFLAGS, CC, DAG, Subtarget))
47232 if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget))
47239 static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
47244 SDValue FalseOp = N->getOperand(0);
47245 SDValue TrueOp = N->getOperand(1);
47247 SDValue Cond = N->getOperand(3);
47255 if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) {
47260 SDValue Ops[] = {FalseOp, TrueOp, DAG.getTargetConstant(CC, DL, MVT::i8),
47303 SDValue(FalseC, 0));
47343 SDValue(FalseC, 0));
47381 SDValue Ops[] = {FalseOp, Cond.getOperand(0),
47397 SDValue Cond0 = Cond.getOperand(0);
47405 SDValue NewSub =
47408 SDValue EFLAGS(NewSub.getNode(), 1);
47432 SDValue Flags;
47442 SDValue LOps[] = {FalseOp, TrueOp,
47444 SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps);
47445 SDValue Ops[] = {LCMOV, TrueOp, DAG.getTargetConstant(CC1, DL, MVT::i8),
47447 SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
47458 SDValue Add = TrueOp;
47459 SDValue Const = FalseOp;
47477 SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1));
47478 SDValue CMov =
47485 return SDValue();
47500 SDValue Opd = N->getOperand(i);
47553 static SDValue reduceVMULWidth(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
47558 return SDValue();
47566 return SDValue();
47570 return SDValue();
47572 SDValue N0 = N->getOperand(0);
47573 SDValue N1 = N->getOperand(1);
47577 return SDValue();
47582 SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0);
47583 SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1);
47587 SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
47596 SDValue MulHi =
47608 SDValue ResLo =
47616 SDValue ResHi =
47622 static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
47626 SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
47636 SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
47697 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
47699 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
47705 return SDValue();
47711 static SDValue combineMulToPMADDWD(SDNode *N, const SDLoc &DL,
47715 return SDValue();
47718 return SDValue();
47724 return SDValue();
47730 return SDValue();
47734 return SDValue();
47736 SDValue N0 = N->getOperand(0);
47737 SDValue N1 = N->getOperand(1);
47750 return SDValue();
47759 return SDValue();
47764 return SDValue();
47768 auto GetZeroableOp = [&](SDValue Op) {
47776 SDValue Src = Op.getOperand(0);
47791 SDValue Src = Op.getOperand(0);
47801 return SDValue();
47803 SDValue ZeroN0 = GetZeroableOp(N0);
47804 SDValue ZeroN1 = GetZeroableOp(N1);
47806 return SDValue();
47812 ArrayRef<SDValue> Ops) {
47822 static SDValue combineMulToPMULDQ(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
47825 return SDValue();
47833 return SDValue();
47835 SDValue N0 = N->getOperand(0);
47836 SDValue N1 = N->getOperand(1);
47843 ArrayRef<SDValue> Ops) {
47854 ArrayRef<SDValue> Ops) {
47861 return SDValue();
47864 static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
47870 if (SDValue V = combineMulToPMADDWD(N, DL, DAG, Subtarget))
47873 if (SDValue V = combineMulToPMULDQ(N, DL, DAG, Subtarget))
47882 return SDValue();
47886 return SDValue();
47889 return SDValue();
47893 return SDValue();
47906 return SDValue();
47912 return SDValue();
47918 SDValue NewMul = SDValue();
48031 SDValue Shift1 =
48034 SDValue Shift2 =
48052 static SDValue combineShiftToPMULH(SDNode *N, SelectionDAG &DAG,
48059 return SDValue();
48062 SDValue ShiftOperand = N->getOperand(0);
48064 return SDValue();
48069 return SDValue();
48075 return SDValue();
48077 SDValue LHS = ShiftOperand.getOperand(0);
48078 SDValue RHS = ShiftOperand.getOperand(1);
48083 return SDValue();
48092 return SDValue();
48095 SDValue Mulh = DAG.getNode(Opc, DL, MulVT, LHS, RHS);
48101 static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG,
48104 SDValue N0 = N->getOperand(0);
48105 SDValue N1 = N->getOperand(1);
48115 SDValue Cond = N0.getOperand(0);
48116 SDValue N00 = N0.getOperand(1);
48117 SDValue N01 = N0.getOperand(2);
48137 SDValue N00 = N0.getOperand(0);
48166 return SDValue();
48169 static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
48172 SDValue N0 = N->getOperand(0);
48173 SDValue N1 = N->getOperand(1);
48178 if (SDValue V = combineShiftToPMULH(N, DAG, DL, Subtarget))
48183 SDValue ShrAmtVal;
48206 return SDValue();
48208 SDValue N00 = N0.getOperand(0);
48209 SDValue N01 = N0.getOperand(1);
48215 return SDValue();
48217 return SDValue();
48224 SDValue NN =
48234 return SDValue();
48237 static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
48241 SDValue N0 = N->getOperand(0);
48242 SDValue N1 = N->getOperand(1);
48247 if (SDValue V = combineShiftToPMULH(N, DAG, DL, Subtarget))
48254 SDValue Cond = N0.getOperand(0);
48255 SDValue N00 = N0.getOperand(1);
48256 SDValue N01 = N0.getOperand(2);
48274 return SDValue();
48281 return SDValue();
48286 return SDValue();
48297 return SDValue();
48306 SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT);
48307 SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
48310 return SDValue();
48313 static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG,
48320 SDValue N0 = N->getOperand(0);
48321 SDValue N1 = N->getOperand(1);
48324 SDValue BC0 =
48326 SDValue BC1 =
48335 if (SDValue BCSrc = getSplitVectorSrc(BC0, BC1, false)) {
48336 SmallVector<SDValue> ShuffleOps;
48338 SDValue Vec = peekThroughBitcasts(BCSrc);
48346 SDValue Lo, Hi;
48351 SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi);
48364 SmallVector<SDValue> Ops0, Ops1;
48369 all_of(Ops0, [](SDValue Op) { return Op.getValueSizeInBits() == 128; });
48373 all_of(Ops1, [](SDValue Op) { return Op.getValueSizeInBits() == 128; });
48384 SDValue LHS, RHS;
48386 auto FindShuffleOpAndIdx = [&](int M, int &Idx, ArrayRef<SDValue> Ops) {
48390 SDValue Src = Ops[M / 2];
48409 SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS);
48420 SmallVector<SDValue> Ops0, Ops1;
48426 [](SDValue Op) { return Op.getValueType().is256BitVector(); }) &&
48428 [](SDValue Op) { return Op.getValueType().is256BitVector(); }) &&
48431 SDValue Op00 = peekThroughBitcasts(Ops0.front());
48432 SDValue Op10 = peekThroughBitcasts(Ops1.front());
48433 SDValue Op01 = peekThroughBitcasts(Ops0.back());
48434 SDValue Op11 = peekThroughBitcasts(Ops1.back());
48445 SDValue Res = DAG.getNode(Opcode, DL, VT, DAG.getBitcast(SrcVT, Op00),
48454 return SDValue();
48457 static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
48465 SDValue N0 = N->getOperand(0);
48466 SDValue N1 = N->getOperand(1);
48531 if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget))
48539 SDValue Not0 = N0.isUndef() ? N0 : IsNOT(N0, DAG);
48540 SDValue Not1 = N1.isUndef() ? N1 : IsNOT(N1, DAG);
48544 SDValue Pack =
48564 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i32,
48573 SDValue Src0, Src1;
48601 SDValue Op(N, 0);
48602 if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
48605 return SDValue();
48608 static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG,
48617 SDValue LHS = N->getOperand(0);
48618 SDValue RHS = N->getOperand(1);
48625 SDValue LHS0 = LHS.getOperand(0);
48626 SDValue LHS1 = LHS.getOperand(1);
48627 SDValue RHS0 = RHS.getOperand(0);
48628 SDValue RHS1 = RHS.getOperand(1);
48632 SDValue Res = DAG.getNode(LHS.getOpcode(), DL, LHS.getValueType(),
48637 SDValue NewLHS =
48640 SDValue NewRHS =
48650 if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget))
48653 return SDValue();
48656 static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
48663 SDValue N0 = N->getOperand(0);
48664 SDValue N1 = N->getOperand(1);
48683 if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
48684 return SDValue(N, 0);
48686 return SDValue();
48689 static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
48698 SDValue N0 = N->getOperand(0);
48699 SDValue N1 = N->getOperand(1);
48734 auto MergeShifts = [&](SDValue X, uint64_t Amt0, uint64_t Amt1) {
48758 SDValue Op(N, 0);
48759 if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
48771 SDValue BC = peekThroughOneUseBitcasts(N0.getOperand(0));
48776 SDValue Src = BC.getOperand(0);
48786 auto TryConstantFold = [&](SDValue V) {
48792 return SDValue();
48816 if (SDValue C = TryConstantFold(N0))
48821 SDValue BC = peekThroughOneUseBitcasts(N0);
48825 if (SDValue RHS = TryConstantFold(BC.getOperand(1))) {
48827 SDValue LHS = DAG.getNode(Opcode, DL, VT,
48835 if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumBitsPerElt),
48837 return SDValue(N, 0);
48839 return SDValue();
48842 static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
48852 SDValue Vec = N->getOperand(0);
48853 SDValue Scl = N->getOperand(1);
48854 SDValue Idx = N->getOperand(2);
48863 if (TLI.SimplifyDemandedBits(SDValue(N, 0),
48865 return SDValue(N, 0);
48870 SDValue Op(N, 0);
48871 if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
48875 return SDValue();
48881 static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
48888 if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
48889 SDValue N0 = N->getOperand(0);
48890 SDValue N1 = N->getOperand(1);
48891 SDValue CMP0 = N0.getOperand(1);
48892 SDValue CMP1 = N1.getOperand(1);
48897 return SDValue();
48899 SDValue CMP00 = CMP0->getOperand(0);
48900 SDValue CMP01 = CMP0->getOperand(1);
48942 SDValue FSetCC =
48947 SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1,
48953 SDValue OnesOrZeroesF =
48966 SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
48968 SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64);
48974 SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF);
48975 SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
48977 SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
48984 return SDValue();
48988 static SDValue combineAndNotIntoANDNP(SDNode *N, SelectionDAG &DAG) {
48993 return SDValue();
48995 SDValue X, Y;
48996 SDValue N0 = N->getOperand(0);
48997 SDValue N1 = N->getOperand(1);
48999 if (SDValue Not = IsNOT(N0, DAG)) {
49002 } else if (SDValue Not = IsNOT(N1, DAG)) {
49006 return SDValue();
49019 static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,
49028 return SDValue();
49030 auto GetNot = [&DAG](SDValue V) {
49037 return SDValue();
49039 SDValue IVEN = SVN->getOperand(0);
49042 return SDValue();
49045 return SDValue();
49046 SDValue Src = IVEN.getOperand(1);
49047 if (SDValue Not = IsNOT(Src, DAG)) {
49048 SDValue NotSrc = DAG.getBitcast(Src.getValueType(), Not);
49049 SDValue NotIVEN =
49055 return SDValue();
49058 SDValue X, Y;
49059 SDValue N0 = N->getOperand(0);
49060 SDValue N1 = N->getOperand(1);
49063 if (SDValue Not = GetNot(N0)) {
49066 } else if (SDValue Not = GetNot(N1)) {
49070 return SDValue();
49080 SDValue LoX, HiX;
49082 SDValue LoY, HiY;
49085 SDValue LoV = DAG.getNode(X86ISD::ANDNP, DL, SplitVT, {LoX, LoY});
49086 SDValue HiV = DAG.getNode(X86ISD::ANDNP, DL, SplitVT, {HiX, HiY});
49093 return SDValue();
49105 static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,
49109 return SDValue();
49112 return SDValue();
49114 SDValue N0 = N.getOperand(0);
49115 SDValue N1 = N.getOperand(1);
49119 return SDValue();
49121 if (SDValue NN0 = PromoteMaskArithmetic(N0, DL, VT, DAG, Depth + 1))
49126 return SDValue();
49130 return SDValue();
49135 if (SDValue NN1 = PromoteMaskArithmetic(N1, DL, VT, DAG, Depth + 1))
49143 else if (SDValue Cst =
49147 return SDValue();
49159 static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL,
49168 SDValue Narrow = N.getOperand(0);
49172 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, 0);
49174 return SDValue();
49203 static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
49207 SDValue N0 = N->getOperand(0);
49208 SDValue N1 = N->getOperand(1);
49213 return SDValue();
49215 SDValue N00 = N0.getOperand(0);
49216 SDValue N10 = N1.getOperand(0);
49224 return SDValue();
49228 SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
49234 return SDValue();
49243 return SDValue();
49252 SDValue ZeroIndex = DAG.getVectorIdxConstant(0, DL);
49253 SDValue N01 = N0.getOperand(1);
49254 SDValue N11 = N1.getOperand(1);
49255 SDValue Vec00 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N00);
49256 SDValue Vec01 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N01);
49257 SDValue Vec10 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N10);
49258 SDValue Vec11 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N11);
49259 SDValue Setcc0 = DAG.getSetCC(DL, BoolVecVT, Vec00, Vec01, CC0);
49260 SDValue Setcc1 = DAG.getSetCC(DL, BoolVecVT, Vec10, Vec11, CC1);
49261 SDValue Logic = DAG.getNode(N->getOpcode(), DL, BoolVecVT, Setcc0, Setcc1);
49267 static SDValue combineBitOpWithMOVMSK(SDNode *N, SelectionDAG &DAG) {
49272 SDValue N0 = N->getOperand(0);
49273 SDValue N1 = N->getOperand(1);
49278 return SDValue();
49280 SDValue Vec0 = N0.getOperand(0);
49281 SDValue Vec1 = N1.getOperand(0);
49289 return SDValue();
49294 SDValue Result =
49302 static SDValue combineBitOpWithShift(SDNode *N, SelectionDAG &DAG) {
49307 SDValue N0 = N->getOperand(0);
49308 SDValue N1 = N->getOperand(1);
49313 return SDValue();
49316 SDValue BC0 = peekThroughOneUseBitcasts(N0);
49317 SDValue BC1 = peekThroughOneUseBitcasts(N1);
49322 return SDValue();
49329 return SDValue();
49332 SDValue BitOp =
49334 SDValue Shift = DAG.getNode(BCOpc, DL, BCVT, BitOp, BC0.getOperand(1));
49339 return SDValue();
49345 static SDValue combineBitOpWithPACK(SDNode *N, SelectionDAG &DAG) {
49350 SDValue N0 = N->getOperand(0);
49351 SDValue N1 = N->getOperand(1);
49356 return SDValue();
49363 return SDValue();
49367 return SDValue();
49377 return SDValue();
49380 SDValue LHS = DAG.getNode(Opc, DL, SrcVT, N0.getOperand(0), N1.getOperand(0));
49381 SDValue RHS = DAG.getNode(Opc, DL, SrcVT, N0.getOperand(1), N1.getOperand(1));
49388 static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
49390 SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
49391 SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
49394 return SDValue();
49407 SDValue X, Y;
49419 SDValue Sra =
49428 return SDValue();
49432 return SDValue();
49435 return SDValue();
49439 return SDValue();
49443 SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
49444 SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT, Op0, ShAmt);
49450 static SDValue getIndexFromUnindexedLoad(LoadSDNode *Ld) {
49452 return SDValue();
49454 SDValue Base = Ld->getBasePtr();
49457 return SDValue();
49459 SDValue ShiftedIndex = Base.getOperand(0);
49462 return SDValue();
49483 static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
49490 return SDValue();
49494 SDValue N = Node->getOperand(i);
49499 return SDValue();
49504 return SDValue();
49537 SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0);
49538 SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, MVT::i32);
49541 SDValue Index = getIndexFromUnindexedLoad(Ld);
49543 return SDValue();
49546 SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, SizeC, Index);
49549 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
49550 SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub);
49557 return SDValue();
49565 static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
49575 return SDValue();
49580 SDValue Src = N->getOperand(0);
49582 return SDValue();
49587 return SDValue();
49592 return SDValue();
49600 return SDValue();
49603 return SDValue();
49607 SDValue SubVec = Src.getOperand(0);
49613 return SDValue();
49617 auto IsLegalSetCC = [&](SDValue V) {
49631 return SDValue();
49636 SmallVector<SDValue, 4> Ops(Src.getNumOperands(),
49639 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT,
49645 static SDValue getBMIMatchingOp(unsigned Opc, SelectionDAG &DAG,
49646 SDValue OpMustEq, SDValue Op, unsigned Depth) {
49653 return SDValue();
49660 return SDValue();
49663 if (SDValue R =
49687 return SDValue();
49690 static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
49696 return SDValue();
49702 if (SDValue OpMatch =
49706 return SDValue();
49709 static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
49727 SDValue SetCC = N->getOperand(0);
49730 return SDValue();
49735 return SDValue();
49739 return SDValue();
49741 SDValue X = SetCC.getOperand(1);
49747 SDValue CCN = SetCC.getOperand(0);
49755 SmallVector<SDValue> Ops(BrCond->op_values());
49763 SDValue NewBrCond =
49771 static SDValue combineAndOrForCcmpCtest(SDNode *N, SelectionDAG &DAG,
49785 return SDValue();
49787 SDValue SetCC0 = N->getOperand(0);
49788 SDValue SetCC1 = N->getOperand(1);
49791 return SDValue();
49793 auto GetCombineToOpc = [&](SDValue V) -> unsigned {
49794 SDValue Op = V.getOperand(1);
49810 return SDValue();
49817 return SDValue();
49824 SDValue SrcCC =
49828 SDValue CC1N = SetCC1.getOperand(0);
49834 SDValue CFlags = DAG.getTargetConstant(
49836 SDValue Sub = SetCC1.getOperand(1);
49840 SDValue CCMP = (NewOpc == X86ISD::CCMP)
49851 static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
49854 SDValue N0 = N->getOperand(0);
49855 SDValue N1 = N->getOperand(1);
49873 SDValue LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N0);
49874 SDValue RHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N1);
49883 SmallVector<SDValue, 2> SrcOps;
49885 if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps, &SrcPartials) &&
49889 SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
49895 SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT);
49923 SDValue Neg = DAG.getNegative(N0.getOperand(0), dl, VT);
49927 SDValue Shift = DAG.getNode(ISD::SHL, dl, VT, Neg,
49934 if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))
49937 if (SDValue V = combineScalarAndWithMaskSetcc(N, DAG, Subtarget))
49940 if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
49943 if (SDValue R = combineBitOpWithShift(N, DAG))
49946 if (SDValue R = combineBitOpWithPACK(N, DAG))
49949 if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
49952 if (SDValue R = combineAndShuffleNot(N, DAG, Subtarget))
49956 return SDValue();
49958 if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
49961 if (SDValue R = combineAndNotIntoANDNP(N, DAG))
49964 if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
49967 if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
49979 SDValue MaskMul = DAG.getNode(ISD::AND, dl, VT, N0.getOperand(1), N1);
49987 SDValue Src = N0;
50002 SDValue BitNo = Src.getOperand(1);
50012 if (SDValue BT = getBT(Src, BitNo, dl, DAG))
50019 SDValue Op(N, 0);
50020 if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
50025 auto GetDemandedMasks = [&](SDValue Op) {
50061 return SDValue(N, 0);
50064 SDValue NewN0 = TLI.SimplifyMultipleUseDemandedBits(N0, Bits0, Elts0, DAG);
50065 SDValue NewN1 = TLI.SimplifyMultipleUseDemandedBits(N1, Bits1, Elts1, DAG);
50075 SDValue BitMask = N1;
50076 SDValue SrcVec = N0.getOperand(0);
50100 if (SDValue Shuffle = combineX86ShufflesRecursively(
50110 if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
50113 return SDValue();
50117 static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
50124 return SDValue();
50126 SDValue N0 = peekThroughBitcasts(N->getOperand(0));
50127 SDValue N1 = peekThroughBitcasts(N->getOperand(1));
50129 return SDValue();
50135 return SDValue();
50143 return SDValue();
50147 return SDValue();
50152 return SDValue();
50154 return SDValue();
50165 SDValue A = DAG.getBitcast(OpVT, N0.getOperand(1));
50166 SDValue B = DAG.getBitcast(OpVT, N0.getOperand(0));
50167 SDValue C = DAG.getBitcast(OpVT, N1.getOperand(0));
50168 SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8);
50169 SDValue Res = getAVX512Node(X86ISD::VPTERNLOG, DL, OpVT, {A, B, C, Imm},
50174 SDValue X = N->getOperand(0);
50175 SDValue Y =
50182 static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) {
50186 SDValue N0 = N->getOperand(0);
50187 SDValue N1 = N->getOperand(1);
50221 static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
50228 return SDValue();
50230 SDValue X, Y, Mask;
50232 return SDValue();
50244 return SDValue();
50249 if (SDValue Res = combineLogicBlendIntoConditionalNegate(VT, Mask, X, Y, DL,
50255 return SDValue();
50259 return SDValue();
50276 static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) {
50277 SDValue Cmp = Op.getOperand(1);
50281 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0));
50284 SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
50285 SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
50297 static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
50301 return SDValue();
50303 auto isORCandidate = [](SDValue N) {
50312 return SDValue();
50315 auto isSetCCCandidate = [](SDValue N) {
50324 SDValue LHS = OR->getOperand(0);
50325 SDValue RHS = OR->getOperand(1);
50339 !isORCandidate(SDValue(OR, 0)))
50340 return SDValue();
50347 SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, DAG);
50348 SDValue Ret, NewRHS;
50353 return SDValue();
50365 return SDValue();
50372 static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
50373 SDValue And1_L, SDValue And1_R,
50376 return SDValue();
50377 SDValue NotOp = And0_L->getOperand(0);
50381 return SDValue();
50386 SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
50387 SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
50388 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
50389 SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
50398 static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
50402 SDValue N0 = Node->getOperand(0);
50404 return SDValue();
50405 SDValue N1 = Node->getOperand(1);
50407 return SDValue();
50410 SDValue N00 = N0->getOperand(0);
50411 SDValue N01 = N0->getOperand(1);
50412 SDValue N10 = N1->getOperand(0);
50413 SDValue N11 = N1->getOperand(1);
50414 if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
50416 if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
50418 if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
50420 if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
50422 return SDValue();
50429 static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
50430 SDValue X, SDValue Y,
50434 return SDValue();
50441 SDValue EFLAGS;
50451 return SDValue();
50475 SDValue NewSub = DAG.getNode(
50478 SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
50495 return SDValue();
50507 SDValue NewSub =
50510 SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
50537 SDValue NewSub =
50540 SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
50548 return SDValue();
50553 return SDValue();
50555 SDValue Z = EFLAGS.getOperand(0);
50567 SDValue Zero = DAG.getConstant(0, DL, ZVT);
50569 SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
50572 SDValue(Neg.getNode(), 1));
50581 SDValue One = DAG.getConstant(1, DL, ZVT);
50583 SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
50591 SDValue One = DAG.getConstant(1, DL, ZVT);
50593 SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
50613 static SDValue combineAddOrSubToADCOrSBB(SDNode *N, const SDLoc &DL,
50616 SDValue X = N->getOperand(0);
50617 SDValue Y = N->getOperand(1);
50620 if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, X, Y, DAG))
50624 if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, Y, X, DAG)) {
50630 return SDValue();
50633 static SDValue combineOrXorWithSETCC(SDNode *N, SDValue N0, SDValue N1,
50652 if (SDValue R = combineAddOrSubToADCOrSBB(IsSub, DL, VT, N1, N0, DAG))
50679 return SDValue();
50682 static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
50685 SDValue N0 = N->getOperand(0);
50686 SDValue N1 = N->getOperand(1);
50702 SmallVector<SDValue, 2> SrcOps;
50704 if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps, &SrcPartials) &&
50708 SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
50714 SDValue ZeroBits = DAG.getConstant(0, dl, MaskVT);
50715 SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT);
50722 if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))
50725 if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
50728 if (SDValue R = combineBitOpWithShift(N, DAG))
50731 if (SDValue R = combineBitOpWithPACK(N, DAG))
50734 if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
50738 return SDValue();
50740 if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
50743 if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget))
50746 if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
50753 SDValue Cond = N0.getOperand(1);
50763 SDValue NotCond = getSETCC(CCode, Cond.getOperand(1), SDLoc(Cond), DAG);
50765 SDValue R = DAG.getZExtOrTrunc(NotCond, dl, VT);
50802 SDValue Op(N, 0);
50803 if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
50808 auto SimplifyUndemandedElts = [&](SDValue Op, SDValue OtherOp) {
50826 return SDValue(N, 0);
50832 if (SDValue R = foldMaskedMerge(N, DAG))
50835 if (SDValue R = combineOrXorWithSETCC(N, N0, N1, DAG))
50838 return SDValue();
50845 static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) {
50849 return SDValue();
50851 SDValue N0 = N->getOperand(0);
50852 SDValue N1 = N->getOperand(1);
50856 return SDValue();
50860 return SDValue();
50863 SDValue Shift = N0.getOperand(0);
50865 return SDValue();
50870 return SDValue();
50875 return SDValue();
50881 SDValue ShiftOp = Shift.getOperand(0);
50886 SDValue Cond = DAG.getSetCC(DL, SetCCResultType, ShiftOp,
50900 static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
50904 return SDValue();
50908 default: return SDValue();
50912 case MVT::v2i64: if (!Subtarget.hasSSE2()) return SDValue(); break;
50916 case MVT::v4i64: if (!Subtarget.hasAVX2()) return SDValue(); break;
50922 SDValue Shift = N->getOperand(0);
50923 SDValue Ones = N->getOperand(1);
50926 return SDValue();
50933 return SDValue();
50943 /// Return the source value x to be truncated or SDValue() if the pattern was
50954 /// So return the smax(x, C1) value to be truncated or SDValue() if the
50956 static SDValue detectUSatPattern(SDValue In, EVT VT, SelectionDAG &DAG,
50965 auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue {
50969 return SDValue();
50973 if (SDValue UMin = MatchMinMax(In, ISD::UMIN, C2))
50979 if (SDValue SMin = MatchMinMax(In, ISD::SMIN, C2))
50984 if (SDValue SMax = MatchMinMax(In, ISD::SMAX, C1))
50985 if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, C2))
50991 return SDValue();
51001 /// Return the source value to be truncated or SDValue() if the pattern was not
51003 static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) {
51008 auto MatchMinMax = [](SDValue V, unsigned Opcode,
51009 const APInt &Limit) -> SDValue {
51014 return SDValue();
51026 if (SDValue SMin = MatchMinMax(In, ISD::SMIN, SignedMax))
51027 if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, SignedMin))
51030 if (SDValue SMax = MatchMinMax(In, ISD::SMAX, SignedMin))
51031 if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, SignedMax))
51034 return SDValue();
51037 static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
51041 return SDValue();
51053 if (SDValue USatVal = detectSSatPattern(In, VT, true)) {
51055 SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal,
51078 if (SDValue USatVal = detectSSatPattern(In, VT, true)) {
51082 SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL,
51085 SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG,
51093 if (SDValue SSatVal = detectSSatPattern(In, VT))
51103 SDValue SatVal;
51104 if (SDValue SSatVal = detectSSatPattern(In, VT)) {
51107 } else if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL)) {
51118 SmallVector<SDValue, 4> ConcatOps(NumConcats, DAG.getUNDEF(InVT));
51128 SDValue Res = DAG.getNode(TruncOpc, DL, TruncVT, SatVal);
51134 return SDValue();
51137 static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl,
51143 SDValue Ptr = Ld->getBasePtr();
51144 SDValue Chain = Ld->getChain();
51148 return SDValue();
51151 return SDValue();
51155 return SDValue();
51180 SDValue UserPtr = UserLd->getBasePtr();
51193 if (getTargetConstantBitsFromNode(SDValue(N, 0), NumBits, Undefs,
51195 getTargetConstantBitsFromNode(SDValue(User, 0), NumBits,
51198 SDValue Extract = extractSubVector(
51199 SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits());
51201 return DCI.CombineTo(N, Extract, SDValue(User, 1));
51209 return SDValue();
51212 static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
51235 return SDValue();
51238 SDValue Ptr1 = Ld->getBasePtr();
51239 SDValue Ptr2 =
51243 SDValue Load1 =
51247 SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr2,
51251 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
51254 SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2);
51265 SDValue IntLoad = DAG.getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(),
51269 SDValue BoolVec = DAG.getBitcast(RegVT, IntLoad);
51278 SDValue Ptr = Ld->getBasePtr();
51279 SDValue Chain = Ld->getChain();
51289 SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, dl,
51292 return DCI.CombineTo(N, Extract, SDValue(User, 1));
51297 if (SDValue V = combineConstantPoolLoads(Ld, dl, DAG, DCI, Subtarget))
51306 SDValue Cast =
51314 return SDValue();
51320 static int getOneTrueElt(SDValue V) {
51335 const SDValue &Op = BV->getOperand(i);
51356 SelectionDAG &DAG, SDValue &Addr,
51357 SDValue &Index, Align &Alignment,
51384 static SDValue
51393 SDValue Addr, VecIndex;
51397 return SDValue();
51411 SDValue Load =
51416 SDValue PassThru = DAG.getBitcast(CastVT, ML->getPassThru());
51419 SDValue Insert =
51425 static SDValue
51430 return SDValue();
51443 SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
51445 SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd,
51457 return SDValue();
51460 return SDValue();
51464 SDValue NewML = DAG.getMaskedLoad(
51468 SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,
51474 static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
51481 return SDValue();
51484 if (SDValue ScalarLoad =
51490 if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI))
51496 SDValue Mask = Mld->getMask();
51504 return SDValue(N, 0);
51506 if (SDValue NewMask =
51514 return SDValue();
51521 static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
51528 SDValue Addr, VecIndex;
51532 return SDValue();
51536 SDValue Value = MS->getValue();
51544 SDValue Extract =
51553 static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
51558 return SDValue();
51565 return SDValue();
51567 if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG, Subtarget))
51572 SDValue Mask = Mst->getMask();
51578 return SDValue(N, 0);
51580 if (SDValue NewMask =
51588 SDValue Value = Mst->getValue();
51598 return SDValue();
51601 static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
51607 SDValue StoredVal = St->getValue();
51628 SDValue Val = StoredVal.getOperand(0);
51642 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
51656 SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl,
51659 SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl,
51663 SDValue Ptr0 = St->getBasePtr();
51664 SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, TypeSize::getFixed(4), dl);
51666 SDValue Ch0 =
51670 SDValue Ch1 =
51693 return SDValue();
51706 return SDValue();
51727 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32,
51747 auto IsExtractedElement = [](SDValue V) {
51755 return SDValue();
51757 if (SDValue Extract = IsExtractedElement(StoredVal)) {
51758 SDValue Trunc = peekThroughOneUseBitcasts(Extract);
51760 SDValue Src = Trunc.getOperand(0);
51780 if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT()))
51784 if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(),
51791 return SDValue();
51800 SDValue Cast =
51816 return SDValue();
51824 return SDValue();
51832 return SDValue();
51836 return SDValue();
51841 SDValue NewLd = DAG.getLoad(MVT::f64, LdDL, Ld->getChain(),
51858 SDValue OldExtract = St->getOperand(1);
51859 SDValue ExtOp0 = OldExtract.getOperand(0);
51862 SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0);
51863 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
51870 return SDValue();
51873 static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG,
51878 SDValue StoredVal = N->getOperand(1);
51890 return SDValue(N, 0);
51893 return SDValue();
51909 static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS,
51932 auto GetShuffle = [&](SDValue Op, SDValue &N0, SDValue &N1,
51941 SmallVector<SDValue, 2> SrcOps;
51943 SDValue BC = peekThroughBitcasts(Op);
51945 !isAnyZero(SrcMask) && all_of(SrcOps, [BC](SDValue Op) {
51951 N0 = !SrcOps.empty() ? SrcOps[0] : SDValue();
51952 N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue();
51968 // NOTE: A default initialized SDValue represents an UNDEF of type VT.
51969 SDValue A, B;
51975 SDValue C, D;
51998 B = SDValue();
52000 A = SDValue();
52003 D = SDValue();
52005 C = SDValue();
52061 SDValue NewLHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
52062 SDValue NewRHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
52097 static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
52116 SDValue LHS = N->getOperand(0);
52117 SDValue RHS = N->getOperand(1);
52121 SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
52133 SDValue LHS = N->getOperand(0);
52134 SDValue RHS = N->getOperand(1);
52139 ArrayRef<SDValue> Ops) {
52142 SDValue HorizBinOp = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
52153 return SDValue();
52169 static SDValue combineFMulcFCMulc(SDNode *N, SelectionDAG &DAG,
52172 SDValue LHS = N->getOperand(0);
52173 SDValue RHS = N->getOperand(1);
52176 auto combineConjugation = [&](SDValue &r) {
52178 SDValue XOR = LHS.getOperand(0);
52189 SDValue I2F = DAG.getBitcast(VT, LHS.getOperand(0).getOperand(0));
52190 SDValue FCMulC = DAG.getNode(CombineOpcode, SDLoc(N), VT, RHS, I2F);
52199 SDValue Res;
52210 static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
52221 auto IsVectorAllNegativeZero = [&DAG](SDValue Op) {
52230 return SDValue();
52234 return SDValue();
52236 SDValue LHS = N->getOperand(0);
52237 SDValue RHS = N->getOperand(1);
52239 SDValue FAddOp1, MulOp0, MulOp1;
52242 &HasNoSignedZero](SDValue N) -> bool {
52245 SDValue Op0 = N.getOperand(0);
52272 return SDValue();
52279 SDValue CFmul =
52285 static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
52287 if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget))
52290 if (SDValue COp = combineFaddCFmul(N, DAG, Subtarget))
52293 return SDValue();
52296 static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
52299 SDValue Src = N->getOperand(0);
52305 return SDValue();
52317 static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
52321 SDValue Src = N->getOperand(0);
52328 auto IsFreeTruncation = [VT](SDValue Op) {
52347 auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
52348 SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
52349 SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
52355 return SDValue();
52360 return SDValue();
52379 SDValue Op0 = Src.getOperand(0);
52380 SDValue Op1 = Src.getOperand(1);
52388 return SDValue();
52398 static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
52403 return SDValue();
52406 return SDValue();
52411 return SDValue();
52416 return SDValue();
52422 return SDValue();
52424 SDValue LHS = Src.getOperand(0).getOperand(0);
52425 SDValue RHS = Src.getOperand(0).getOperand(1);
52431 auto IsSext = [&DAG](SDValue V) {
52434 auto IsZext = [&DAG](SDValue V) {
52441 return SDValue();
52461 SDValue Res = DAG.getNode(ISD::MULHU, DL, BCVT, DAG.getBitcast(BCVT, LHS),
52482 static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
52486 return SDValue();
52491 return SDValue();
52493 SDValue SSatVal = detectSSatPattern(In, VT);
52495 return SDValue();
52499 SDValue N0 = SSatVal.getOperand(0);
52500 SDValue N1 = SSatVal.getOperand(1);
52503 return SDValue();
52505 SDValue N00 = N0.getOperand(0);
52506 SDValue N01 = N0.getOperand(1);
52507 SDValue N10 = N1.getOperand(0);
52508 SDValue N11 = N1.getOperand(1);
52522 return SDValue();
52535 return SDValue();
52542 return SDValue();
52552 SDValue ZExtIn, SExtIn;
52554 SDValue N00Elt = N00.getOperand(i);
52555 SDValue N01Elt = N01.getOperand(i);
52556 SDValue N10Elt = N10.getOperand(i);
52557 SDValue N11Elt = N11.getOperand(i);
52563 return SDValue();
52569 return SDValue();
52582 return SDValue();
52583 SDValue N00In = N00Elt.getOperand(0);
52584 SDValue N01In = N01Elt.getOperand(0);
52585 SDValue N10In = N10Elt.getOperand(0);
52586 SDValue N11In = N11Elt.getOperand(0);
52594 return SDValue();
52597 auto ExtractVec = [&DAG, &DL, NumElems](SDValue &Ext) {
52609 ArrayRef<SDValue> Ops) {
52624 static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
52627 SDValue Src = N->getOperand(0);
52631 if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
52635 if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL))
52639 if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget))
52643 if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget))
52649 SDValue BCSrc = Src.getOperand(0);
52659 return SDValue();
52662 static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG,
52665 SDValue In = N->getOperand(0);
52668 if (SDValue SSatVal = detectSSatPattern(In, VT))
52670 if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL))
52675 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
52676 return SDValue(N, 0);
52678 return SDValue();
52690 static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) {
52696 return SDValue();
52700 SDValue Op = peekThroughBitcasts(SDValue(N, 0));
52705 return SDValue();
52713 return SDValue();
52714 if (SDValue NegOp0 = isFNEG(DAG, Op.getOperand(0).getNode(), Depth + 1))
52723 SDValue InsVector = Op.getOperand(0);
52724 SDValue InsVal = Op.getOperand(1);
52726 return SDValue();
52727 if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode(), Depth + 1))
52736 SDValue Op1 = Op.getOperand(1);
52737 SDValue Op0 = Op.getOperand(0);
52755 return SDValue();
52766 return SDValue();
52836 static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
52840 SDValue Arg = isFNEG(DAG, N);
52842 return SDValue();
52851 return SDValue();
52858 SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
52859 SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
52866 if (SDValue NegArg =
52870 return SDValue();
52873 SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
52879 if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth)) {
52909 SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue());
52929 if (SDValue NegOp0 =
52940 static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
52945 return SDValue();
52953 SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
52954 SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
52965 SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
52971 static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) {
52973 return SDValue();
52975 SDValue LHS = N->getOperand(0);
52977 return SDValue();
52985 static SDValue combineXorSubCTLZ(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
52990 return SDValue();
52995 return SDValue();
52997 SDValue N0 = N->getOperand(0);
52998 SDValue N1 = N->getOperand(1);
53002 return SDValue();
53004 SDValue OpCTLZ;
53005 SDValue OpSizeTM1;
53011 return SDValue();
53018 return SDValue();
53021 return SDValue();
53024 return SDValue();
53026 SDValue Op = OpCTLZ.getOperand(0);
53041 static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
53044 SDValue N0 = N->getOperand(0);
53045 SDValue N1 = N->getOperand(1);
53057 if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
53060 if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
53063 if (SDValue R = combineBitOpWithShift(N, DAG))
53066 if (SDValue R = combineBitOpWithPACK(N, DAG))
53069 if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
53072 if (SDValue R = combineXorSubCTLZ(N, DL, DAG, Subtarget))
53076 return SDValue();
53078 if (SDValue SetCC = foldXor1SetCC(N, DAG))
53081 if (SDValue R = combineOrXorWithSETCC(N, N0, N1, DAG))
53084 if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
53114 SDValue TruncExtSrc = N0.getOperand(0);
53118 SDValue LHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(0), DL, VT);
53119 SDValue RHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(1), DL, VT);
53125 if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
53131 static SDValue combineBITREVERSE(SDNode *N, SelectionDAG &DAG,
53134 SDValue N0 = N->getOperand(0);
53139 SDValue Src = N0.getOperand(0);
53149 SDValue Rev =
53155 return SDValue();
53159 static SDValue combineAVG(SDNode *N, SelectionDAG &DAG,
53163 SDValue N0 = N->getOperand(0);
53164 SDValue N1 = N->getOperand(1);
53173 SDValue SignMask = DAG.getConstant(SignBit, DL, VT);
53180 return SDValue();
53183 static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
53194 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
53195 return SDValue(N, 0);
53197 return SDValue();
53200 static bool isNullFPScalarOrVectorConst(SDValue V) {
53210 static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG,
53213 return SDValue();
53221 static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG,
53223 SDValue N0 = N->getOperand(0);
53224 SDValue N1 = N->getOperand(1);
53232 return SDValue();
53234 auto isAllOnesConstantFP = [](SDValue V) {
53249 return SDValue();
53253 static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG,
53256 if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget))
53260 if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
53263 if (SDValue V = combineFAndFNotToFAndn(N, DAG, Subtarget))
53270 static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG,
53277 if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
53284 static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
53297 if (SDValue NewVal = combineFneg(N, DAG, DCI, Subtarget))
53304 static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
53310 return SDValue();
53325 static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
53329 return SDValue();
53337 return SDValue();
53339 SDValue Op0 = N->getOperand(0);
53340 SDValue Op1 = N->getOperand(1);
53359 return SDValue();
53383 SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0);
53384 SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType, Op0, Op0, ISD::SETUO);
53391 static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
53397 if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
53398 return SDValue(N, 0);
53401 SDValue In = N->getOperand(0);
53410 if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) {
53412 SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
53415 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
53417 return SDValue(N, 0);
53421 return SDValue();
53424 static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
53430 SDValue In = N->getOperand(IsStrict ? 1 : 0);
53439 if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) {
53442 SDValue Convert =
53447 SDValue Convert =
53451 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
53453 return SDValue(N, 0);
53457 return SDValue();
53461 static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
53464 SDValue N0 = N->getOperand(0);
53465 SDValue N1 = N->getOperand(1);
53489 if (SDValue Not = IsNOT(N0, DAG))
53495 if (SDValue Not = IsNOT(N1, DAG))
53518 SDValue BC0 = peekThroughOneUseBitcasts(N0);
53522 SDValue Not = getConstVector(EltBits0, VT, DAG, DL);
53530 SDValue Op(N, 0);
53531 if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
53536 auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) {
53572 return SDValue(N, 0);
53576 return SDValue();
53579 static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
53581 SDValue N1 = N->getOperand(1);
53589 return SDValue(N, 0);
53592 return SDValue();
53595 static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG,
53598 SDValue Src = N->getOperand(IsStrict ? 1 : 0);
53606 return SDValue(N, 0);
53612 if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::i64, MVT::v2i64, DAG)) {
53615 SDValue Convert = DAG.getNode(
53620 SDValue Convert = DAG.getNode(N->getOpcode(), dl, MVT::v4f32,
53625 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
53627 return SDValue(N, 0);
53632 return SDValue();
53636 static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) {
53641 SDValue N0 = N->getOperand(0);
53642 SDValue N1 = N->getOperand(1);
53646 return SDValue();
53649 SDValue IntermediateBitwidthOp;
53658 return SDValue();
53660 SDValue CMovOp0 = N0.getOperand(0);
53661 SDValue CMovOp1 = N0.getOperand(1);
53666 return SDValue();
53688 SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, CMovVT, CMovOp0, CMovOp1,
53697 static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
53701 if (SDValue V = combineSextInRegCmov(N, DAG))
53705 SDValue N0 = N->getOperand(0);
53706 SDValue N1 = N->getOperand(1);
53717 SDValue N00 = N0.getOperand(0);
53723 return SDValue();
53727 if (SDValue Promote = PromoteMaskArithmetic(N0, dl, DAG, Subtarget))
53731 SDValue Tmp =
53736 return SDValue();
53744 static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
53748 return SDValue();
53753 return SDValue();
53755 SDValue Add = Ext->getOperand(0);
53757 return SDValue();
53759 SDValue AddOp0 = Add.getOperand(0);
53760 SDValue AddOp1 = Add.getOperand(1);
53770 return SDValue();
53777 return SDValue();
53792 return SDValue();
53796 SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0);
53797 SDValue NewConstant = DAG.getConstant(AddC, SDLoc(Add), VT);
53819 static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) {
53820 SDValue CMovN = Extend->getOperand(0);
53822 return SDValue();
53829 SDValue CMovOp0 = CMovN.getOperand(0);
53830 SDValue CMovOp1 = CMovN.getOperand(1);
53834 return SDValue();
53838 return SDValue();
53843 return SDValue();
53854 SDValue Res = DAG.getNode(X86ISD::CMOV, DL, ExtendVT, CMovOp0, CMovOp1,
53866 static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
53868 SDValue N0 = N->getOperand(0);
53874 return SDValue();
53880 return SDValue();
53884 return SDValue();
53888 return SDValue();
53894 return SDValue();
53900 return SDValue();
53902 SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
53910 static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
53913 SDValue N0 = N->getOperand(0);
53920 SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0),
53926 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
53931 return SDValue(N, 0);
53934 if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
53938 return SDValue();
53940 if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
53943 if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), DL, VT, N0,
53948 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), DL, DAG, Subtarget))
53955 if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
53958 return SDValue();
53967 static SDValue getInvertedVectorForFMA(SDValue V, SelectionDAG &DAG) {
53977 return SDValue();
53979 SmallVector<SDValue, 8> Ops;
53982 for (const SDValue &Op : V->op_values()) {
53993 return SDValue();
53998 return SDValue(NV, 0);
54004 for (const SDValue &Op : V->op_values()) {
54007 return SDValue();
54011 return SDValue(NV, 0);
54014 static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
54024 return SDValue();
54026 SDValue A = N->getOperand(IsStrict ? 1 : 0);
54027 SDValue B = N->getOperand(IsStrict ? 2 : 1);
54028 SDValue C = N->getOperand(IsStrict ? 3 : 2);
54035 SDValue Fmul = DAG.getNode(ISD::FMUL, dl, VT, A, B, Flags);
54043 return SDValue();
54045 auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
54048 if (SDValue NegV = TLI.getCheaperNegatedExpression(V, DAG, LegalOperations,
54057 SDValue Vec = V.getOperand(0);
54058 if (SDValue NegV = TLI.getCheaperNegatedExpression(
54067 if (SDValue NegV = getInvertedVectorForFMA(V, DAG)) {
54082 return SDValue();
54102 static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
54110 SDValue N2 = N->getOperand(2);
54112 SDValue NegN2 =
54115 return SDValue();
54125 static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
54129 SDValue N0 = N->getOperand(0);
54136 SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0),
54142 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
54147 return SDValue(N, 0);
54150 if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
54154 if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
54157 if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), dl, VT, N0,
54162 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), dl, DAG, Subtarget))
54165 if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
54168 if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget))
54174 SDValue N00 = N0.getOperand(0);
54175 SDValue N01 = N0.getOperand(1);
54184 return SDValue();
54190 static SDValue truncateAVX512SetCCNoBWI(EVT VT, EVT OpVT, SDValue LHS,
54191 SDValue RHS, ISD::CondCode CC,
54198 SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
54201 return SDValue();
54204 static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
54208 const SDValue LHS = N->getOperand(0);
54209 const SDValue RHS = N->getOperand(1);
54215 if (SDValue V = combineVectorSizedSetCCEquality(VT, LHS, RHS, CC, DL, DAG,
54221 if (SDValue V =
54229 auto MatchOrCmpEq = [&](SDValue N0, SDValue N1) {
54238 return SDValue();
54240 if (SDValue AndN = MatchOrCmpEq(LHS, RHS))
54242 if (SDValue AndN = MatchOrCmpEq(RHS, LHS))
54247 auto MatchAndCmpEq = [&](SDValue N0, SDValue N1) {
54256 return SDValue();
54258 if (SDValue AndN = MatchAndCmpEq(LHS, RHS))
54260 if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
54292 SDValue BaseOp = LHS.getOperand(0);
54293 SDValue SETCC0 = DAG.getSetCC(DL, VT, BaseOp, RHS, CC);
54294 SDValue SETCC1 = DAG.getSetCC(
54308 SDValue Op0 = LHS;
54309 SDValue Op1 = RHS;
54359 SDValue LHSOut = LHS;
54360 SDValue RHSOut = RHS;
54365 if (SDValue NewLHS = incDecVectorConstant(LHS, DAG, /*IsInc*/ true,
54368 else if (SDValue NewRHS = incDecVectorConstant(
54381 if (SDValue NewLHS = incDecVectorConstant(LHS, DAG, /*IsInc*/ false,
54384 else if (SDValue NewRHS = incDecVectorConstant(RHS, DAG, /*IsInc*/ true,
54399 if (SDValue R = truncateAVX512SetCCNoBWI(VT, OpVT, LHSOut, RHSOut,
54407 if (SDValue R =
54433 SDValue AddC = LHS.getOperand(1);
54437 SDValue C0 = SDValue();
54438 SDValue C1 = SDValue();
54465 SDValue NewLHS =
54467 SDValue NewRHS =
54478 return LowerVSETCC(SDValue(N, 0), Subtarget, DAG);
54487 return DAG.getSetCC(DL, VT, LHS, SDValue(FNeg, 0), CC);
54490 return SDValue();
54493 static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
54496 SDValue Src = N->getOperand(0);
54526 if (SDValue NotSrc = IsNOT(Src, DAG)) {
54560 SDValue ShiftLHS = Src.getOperand(0);
54561 SDValue ShiftRHS = Src.getOperand(1);
54574 SDValue Res = DAG.getNode(ISD::XOR, DL, SrcVT, ShiftLHS, ShiftRHS);
54581 SDValue SrcBC = peekThroughOneUseBitcasts(Src);
54593 SDValue NewSrc = DAG.getBitcast(SrcVT, SrcBC.getOperand(0));
54594 SDValue NewMovMsk = DAG.getNode(X86ISD::MOVMSK, DL, VT, NewSrc);
54604 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
54605 return SDValue(N, 0);
54607 return SDValue();
54610 static SDValue combineTESTP(SDNode *N, SelectionDAG &DAG,
54619 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
54620 return SDValue(N, 0);
54622 return SDValue();
54625 static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG,
54628 SDValue Mask = MemOp->getMask();
54637 return SDValue(N, 0);
54641 return SDValue();
54644 static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS,
54645 SDValue Index, SDValue Base, SDValue Scale,
54650 SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(),
54659 SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(),
54668 static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
54672 SDValue Index = GorS->getIndex();
54673 SDValue Base = GorS->getBasePtr();
54674 SDValue Scale = GorS->getScale();
54738 SDValue Splat = DAG.getSplatBuildVector(Index.getValueType(), DL, Base);
54764 SDValue Mask = GorS->getMask();
54770 return SDValue(N, 0);
54774 return SDValue();
54778 static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
54782 SDValue EFLAGS = N->getOperand(1);
54785 if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget))
54788 return SDValue();
54792 static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
54795 SDValue EFLAGS = N->getOperand(3);
54801 if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) {
54802 SDValue Cond = DAG.getTargetConstant(CC, DL, MVT::i8);
54807 return SDValue();
54811 static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
54827 SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
54831 return SDValue();
54840 return SDValue();
54847 SDValue SourceConst;
54850 {N->getOperand(0), SDValue(BV, 0)});
54852 SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
54854 SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst);
54855 SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0),
54857 SDValue Res = DAG.getBitcast(VT, NewAnd);
54863 return SDValue();
54869 static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG) {
54873 SDValue Trunc = N->getOperand(0);
54875 return SDValue();
54877 SDValue ExtElt = Trunc.getOperand(0);
54880 return SDValue();
54887 return SDValue();
54894 SDValue BitcastVec = DAG.getBitcast(BitcastVT, ExtElt.getOperand(0));
54896 SDValue NewExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TruncVT,
54901 static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
54904 SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
54920 return SDValue();
54928 SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
54942 SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
54962 return SDValue();
54965 static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
54971 if (SDValue Res = combineVectorCompareAndMaskUnaryOp(N, DAG))
54975 SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
54991 return SDValue();
54999 SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
55013 SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
55032 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
55041 SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0);
55042 SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast,
55059 return SDValue();
55064 return SDValue();
55068 std::pair<SDValue, SDValue> Tmp =
55078 return SDValue();
55080 if (SDValue V = combineToFPTruncExtElt(N, DAG))
55083 return SDValue();
55086 static bool needCarryOrOverflowFlag(SDValue Flags) {
55121 static bool onlyZeroFlagUsed(SDValue Flags) {
55148 static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
55153 return SDValue();
55160 SDValue Op = N->getOperand(0);
55164 if (SDValue CMP =
55165 combineX86SubCmpForFlags(N, SDValue(N, 0), DAG, DCI, Subtarget))
55173 onlyZeroFlagUsed(SDValue(N, 0))) {
55194 Op.hasOneUse() && onlyZeroFlagUsed(SDValue(N, 0))) {
55195 SDValue Src = Op.getOperand(0);
55199 SDValue BoolVec = Src.getOperand(0);
55221 if (Op.getOpcode() == ISD::ZERO_EXTEND && onlyZeroFlagUsed(SDValue(N, 0))) {
55222 SDValue Src = Op.getOperand(0);
55231 return SDValue();
55233 SDValue Trunc = Op;
55243 onlyZeroFlagUsed(SDValue(N, 0))) {
55250 return SDValue();
55254 default: return SDValue();
55259 return SDValue();
55266 if (needCarryOrOverflowFlag(SDValue(N, 0)))
55267 return SDValue();
55272 if (needCarryOrOverflowFlag(SDValue(N, 0)))
55273 return SDValue();
55279 SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0));
55280 SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1));
55295 static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
55302 SDValue LHS = N->getOperand(0);
55303 SDValue RHS = N->getOperand(1);
55309 if (SDValue CMP = combineX86SubCmpForFlags(N, SDValue(N, 1), DAG, DCI, ST))
55314 SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
55319 auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
55320 SDValue Ops[] = {N0, N1};
55323 SDValue Op(N, 0);
55338 static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
55339 SDValue LHS = N->getOperand(0);
55340 SDValue RHS = N->getOperand(1);
55341 SDValue BorrowIn = N->getOperand(2);
55343 if (SDValue Flags = combineCarryThroughADD(BorrowIn, DAG)) {
55356 return SDValue();
55360 static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
55362 SDValue LHS = N->getOperand(0);
55363 SDValue RHS = N->getOperand(1);
55364 SDValue CarryIn = N->getOperand(2);
55379 SDValue(N, 1).use_empty()) {
55382 SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1));
55383 SDValue Res1 = DAG.getNode(
55402 if (SDValue Flags = combineCarryThroughADD(CarryIn, DAG)) {
55415 return SDValue();
55418 static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
55433 return SDValue();
55437 return SDValue();
55442 return SDValue();
55455 SDValue Mul;
55457 SDValue Op0L = Op0->getOperand(i), Op1L = Op1->getOperand(i),
55464 return SDValue();
55470 return SDValue();
55485 return SDValue();
55493 return SDValue();
55498 return SDValue();
55505 return SDValue();
55509 SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(0));
55510 SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(1));
55513 ArrayRef<SDValue> Ops) {
55526 static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
55530 return SDValue();
55533 return SDValue();
55538 return SDValue();
55540 SDValue N00 = N0.getOperand(0);
55541 SDValue N01 = N0.getOperand(1);
55542 SDValue N10 = N1.getOperand(0);
55543 SDValue N11 = N1.getOperand(1);
55551 return SDValue();
55563 return SDValue();
55570 return SDValue();
55578 SDValue In0, In1;
55580 SDValue N00Elt = N00.getOperand(i);
55581 SDValue N01Elt = N01.getOperand(i);
55582 SDValue N10Elt = N10.getOperand(i);
55583 SDValue N11Elt = N11.getOperand(i);
55589 return SDValue();
55595 return SDValue();
55608 return SDValue();
55609 SDValue N00In = N00Elt.getOperand(0);
55610 SDValue N01In = N01Elt.getOperand(0);
55611 SDValue N10In = N10Elt.getOperand(0);
55612 SDValue N11In = N11Elt.getOperand(0);
55623 return SDValue();
55632 return SDValue();
55636 ArrayRef<SDValue> Ops) {
55666 static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1,
55669 return SDValue();
55673 return SDValue();
55690 return SDValue();
55699 SDValue LHS =
55701 SDValue RHS =
55710 static SDValue pushAddIntoCmovOfConsts(SDNode *N, const SDLoc &DL,
55718 auto isSuitableCmov = [](SDValue V) {
55730 SDValue Cmov = N->getOperand(0);
55731 SDValue OtherOp = N->getOperand(1);
55735 return SDValue();
55740 return SDValue();
55743 SDValue FalseOp = Cmov.getOperand(0);
55744 SDValue TrueOp = Cmov.getOperand(1);
55760 SDValue X = OtherOp.getOperand(0), Y = OtherOp.getOperand(1);
55775 static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
55779 SDValue Op0 = N->getOperand(0);
55780 SDValue Op1 = N->getOperand(1);
55783 if (SDValue Select = pushAddIntoCmovOfConsts(N, DL, DAG, Subtarget))
55786 if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget))
55788 if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, DL, VT, Subtarget))
55790 if (SDValue MAdd = combineAddOfPMADDWD(DAG, Op0, Op1, DL, VT))
55794 if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
55804 SDValue Sum =
55821 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0));
55828 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0));
55848 static SDValue combineSubABS(SDNode *N, SelectionDAG &DAG) {
55849 SDValue N0 = N->getOperand(0);
55850 SDValue N1 = N->getOperand(1);
55853 return SDValue();
55857 return SDValue();
55860 SDValue Cond = N1.getOperand(3);
55862 return SDValue();
55866 SDValue NegX = Cond.getValue(0);
55867 SDValue X = Cond.getOperand(1);
55869 SDValue FalseOp = N1.getOperand(0);
55870 SDValue TrueOp = N1.getOperand(1);
55874 return SDValue();
55879 SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, TrueOp, FalseOp,
55885 static SDValue combineSubSetcc(SDNode *N, SelectionDAG &DAG) {
55886 SDValue Op0 = N->getOperand(0);
55887 SDValue Op1 = N->getOperand(1);
55898 SDValue SetCC = Op1.getOperand(0);
55903 SDValue NewSetCC = getSETCC(NewCC, SetCC.getOperand(1), DL, DAG);
55909 return SDValue();
55912 static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) {
55918 return SDValue();
55920 SDValue Sub = N->getOperand(4);
55922 return SDValue();
55924 SDValue SetCC = Sub.getOperand(1);
55927 return SDValue();
55929 SmallVector<SDValue, 5> Ops(N->op_values());
55938 static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
55941 SDValue Op0 = N->getOperand(0);
55942 SDValue Op1 = N->getOperand(1);
55946 auto IsNonOpaqueConstant = [&](SDValue Op) {
55964 SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0),
55966 SDValue NewAdd =
55971 if (SDValue V = combineSubABS(N, DAG))
55975 if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
55991 SDValue ADC = DAG.getNode(X86ISD::ADC, SDLoc(Op1), Op1->getVTList(), Op0,
55997 if (SDValue V = combineXorSubCTLZ(N, DL, DAG, Subtarget))
56000 if (SDValue V = combineAddOrSubToADCOrSBB(N, DL, DAG))
56006 static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
56012 SDValue LHS = N->getOperand(0);
56013 SDValue RHS = N->getOperand(1);
56047 return SDValue();
56075 static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
56076 ArrayRef<SDValue> Ops, SelectionDAG &DAG,
56082 if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
56085 if (llvm::all_of(Ops, [](SDValue Op) {
56090 SDValue Op0 = Ops[0];
56127 SDValue SrcVec = Op0.getOperand(0);
56149 SDValue Src0 = peekThroughBitcasts(Ops[0]);
56150 SDValue Src1 = peekThroughBitcasts(Ops[1]);
56171 if (llvm::all_of(Ops, [Op0](SDValue Op) {
56174 auto ConcatSubOperand = [&](EVT VT, ArrayRef<SDValue> SubOps, unsigned I) {
56175 SmallVector<SDValue> Subs;
56176 for (SDValue SubOp : SubOps)
56184 for (SDValue &Sub : Subs)
56191 auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) {
56195 SDValue Sub = SubOps[I].getOperand(Op);
56197 SDValue BC = peekThroughBitcasts(Sub);
56209 if (!IsSplat && llvm::all_of(Ops, [](SDValue Op) {
56236 llvm::all_of(Ops, [Op0](SDValue Op) {
56251 none_of(Ops, [](SDValue Op) {
56276 all_of(Ops, [&Op0](SDValue Op) {
56280 SDValue Res = DAG.getBitcast(FloatVT, ConcatSubOperand(VT, Ops, 0));
56316 SmallVector<SDValue, 2> SubOps;
56326 SDValue Src = concatSubVectors(Ops[0].getOperand(1),
56330 SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
56342 SmallVector<SDValue, 2> SubOps;
56354 SDValue Src0 = concatSubVectors(Ops[0].getOperand(0),
56356 SDValue Src1 = concatSubVectors(Ops[0].getOperand(2),
56360 SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
56375 SDValue LHS = concatSubVectors(Ops[0].getOperand(0),
56377 SDValue RHS = concatSubVectors(Ops[1].getOperand(0),
56379 SDValue Res = DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT,
56394 SDValue LHS = concatSubVectors(Ops[0].getOperand(0),
56396 SDValue RHS = concatSubVectors(Ops[1].getOperand(0),
56422 llvm::all_of(Ops, [](SDValue Op) {
56425 SDValue Res = DAG.getBitcast(MVT::v8i32, ConcatSubOperand(VT, Ops, 0));
56426 SDValue Zero = getZeroVector(MVT::v8i32, Subtarget, DAG, DL);
56444 llvm::all_of(Ops, [Op0](SDValue Op) {
56455 llvm::all_of(Ops, [Op0](SDValue Op) {
56507 SDValue LHS = ConcatSubOperand(VT, Ops, 0);
56508 SDValue RHS = ConcatSubOperand(VT, Ops, 1);
56536 llvm::all_of(Ops, [Op0](SDValue Op) {
56604 llvm::all_of(Ops, [Op0](SDValue Op) {
56624 SDValue Sel =
56671 if (SDValue Ld =
56678 if (all_of(Ops, [](SDValue Op) { return getTargetConstantFromNode(Op); })) {
56694 SDValue CV = DAG.getConstantPool(C, PVT);
56697 SDValue Ld = DAG.getLoad(VT, DL, DAG.getEntryNode(), CV, MPI);
56698 SDValue Sub = extractSubVector(Ld, 0, DAG, DL, Op0.getValueSizeInBits());
56716 if (SDValue BcastLd =
56718 SDValue BcastSrc =
56730 SDValue Res = widenSubVector(Op0, false, Subtarget, DAG, DL, 512);
56737 return SDValue();
56740 static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG,
56746 SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
56764 return SDValue();
56768 if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops, DAG,
56773 return SDValue();
56776 static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
56780 return SDValue();
56787 SDValue Vec = N->getOperand(0);
56788 SDValue SubVec = N->getOperand(1);
56820 SDValue Ins = SubVec.getOperand(0);
56833 return SDValue();
56868 SmallVector<SDValue, 2> SubVectorOps;
56870 if (SDValue Fold =
56886 if (all_of(SubVectorOps, [](SDValue SubOp) {
56889 SDValue Op(N, 0);
56890 if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
56905 SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() };
56906 SDValue BcastLd =
56910 DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
56927 return SDValue();
56936 static SDValue narrowExtractedVectorSelect(SDNode *Ext, const SDLoc &DL,
56938 SDValue Sel = Ext->getOperand(0);
56941 return SDValue();
56948 return SDValue();
56952 return SDValue();
56971 return SDValue();
56980 SDValue ExtCond = extract128BitVector(Sel.getOperand(0), ExtIdx, DAG, DL);
56981 SDValue ExtT = extract128BitVector(Sel.getOperand(1), ExtIdx, DAG, DL);
56982 SDValue ExtF = extract128BitVector(Sel.getOperand(2), ExtIdx, DAG, DL);
56983 SDValue NarrowSel = DAG.getSelect(DL, NarrowSelVT, ExtCond, ExtT, ExtF);
56987 static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
57001 return SDValue();
57004 SDValue InVec = N->getOperand(0);
57006 SDValue InVecBC = peekThroughBitcasts(InVec);
57017 auto isConcatenatedNot = [](SDValue V) {
57021 SDValue NotOp = V->getOperand(0);
57027 SDValue Concat = splitVectorIntBinary(InVecBC, DAG, SDLoc(InVecBC));
57034 return SDValue();
57036 if (SDValue V = narrowExtractedVectorSelect(N, DL, DAG))
57059 SDValue NewExt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT,
57084 SmallVector<SDValue, 2> ShuffleInputs;
57094 SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs];
57104 auto IsExtractFree = [](SDValue V) {
57136 SDValue Src = InVec.getOperand(0);
57145 SDValue Ext = InVec.getOperand(0);
57155 SDValue Ext0 = extractSubVector(InVec.getOperand(0), 0, DAG, DL, 128);
57156 SDValue Ext1 = extractSubVector(InVec.getOperand(1), 0, DAG, DL, 128);
57157 SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128);
57162 SDValue InVecSrc = InVec.getOperand(0);
57164 SDValue Ext = extractSubVector(InVecSrc, 0, DAG, DL, Scale * SizeInBits);
57172 SDValue Ext0 =
57174 SDValue Ext1 =
57182 SDValue Ext0 =
57193 SDValue Ext =
57198 return SDValue();
57201 static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
57203 SDValue Src = N->getOperand(0);
57225 auto IsExt64 = [&DAG](SDValue Op, bool IsZeroExt) {
57227 return SDValue();
57242 return SDValue();
57245 if (SDValue AnyExt = IsExt64(peekThroughOneUseBitcasts(Src), false))
57250 if (SDValue ZeroExt = IsExt64(peekThroughOneUseBitcasts(Src), true))
57264 // Ensure the same SDValue from the SDNode use is being used.
57273 return SDValue(User, 0);
57275 return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits);
57280 return SDValue();
57284 static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
57287 SDValue LHS = N->getOperand(0);
57288 SDValue RHS = N->getOperand(1);
57302 if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(64), DCI))
57303 return SDValue(N, 0);
57333 return SDValue();
57337 static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG,
57340 SDValue LHS = N->getOperand(0);
57341 SDValue RHS = N->getOperand(1);
57376 if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
57377 return SDValue(N, 0);
57379 return SDValue();
57382 static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
57386 SDValue In = N->getOperand(0);
57403 SDValue Load = DAG.getExtLoad(
57406 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
57433 SmallVector<SDValue> Elts(Scale * NumElts, DAG.getConstant(0, DL, EltVT));
57441 SDValue Op(N, 0);
57443 if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
57447 return SDValue();
57450 static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG,
57459 if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
57460 return SDValue(N, 0);
57462 return SDValue();
57468 static SDValue combineFP16_TO_FP(SDNode *N, SelectionDAG &DAG,
57471 return SDValue();
57474 return SDValue();
57478 return SDValue();
57481 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32,
57490 static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
57495 SDValue Src = N->getOperand(IsStrict ? 1 : 0);
57505 return SDValue();
57522 return SDValue();
57525 return SDValue();
57528 return SDValue();
57532 return SDValue();
57536 return SDValue();
57545 SDValue Fill = NumElts == 4 ? DAG.getUNDEF(IntVT)
57547 SmallVector<SDValue, 4> Ops(NumConcats, Fill);
57555 SDValue Cvt, Chain;
57587 static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG,
57595 return SDValue();
57599 SDValue Ptr = MemIntrin->getBasePtr();
57600 SDValue Chain = MemIntrin->getChain();
57614 SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
57617 return DCI.CombineTo(N, Extract, SDValue(User, 1));
57620 return SDValue();
57623 static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
57626 return SDValue();
57630 SDValue Src = N->getOperand(IsStrict ? 1 : 0);
57635 return SDValue();
57639 SDValue Cvt, Chain;
57648 SDValue Cvt0, Cvt1;
57649 SDValue Op0 = Src.getOperand(0);
57650 SDValue Op1 = Src.getOperand(1);
57655 return SDValue();
57676 return SDValue();
57680 return SDValue();
57690 SDValue Rnd = DAG.getTargetConstant(4, dl, MVT::i32);
57714 static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) {
57715 SDValue Src = N->getOperand(0);
57722 SDValue NewLd = DAG.getLoad(MVT::x86mmx, SDLoc(N), LN->getChain(),
57727 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), NewLd.getValue(1));
57732 return SDValue();
57735 static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG,
57739 if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumBits), DCI))
57740 return SDValue(N, 0);
57742 return SDValue();
57745 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
57938 return SDValue();
58001 SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc &dl,
58002 SDValue Value, SDValue Addr,
58012 SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl);
58042 bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
58054 auto IsFoldableRMW = [](SDValue Load, SDValue Op) {
58065 auto IsFoldableAtomicRMW = [](SDValue Load, SDValue Op) {
58088 SDValue N0 = Op.getOperand(0);
58102 SDValue N0 = Op.getOperand(0);
58103 SDValue N1 = Op.getOperand(1);
58515 SDValue X86TargetLowering::LowerAsmOutputForConstraint(
58516 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
58520 return SDValue();
58533 SDValue CC = getSETCC(Cond, Glue, DL, DAG);
58535 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC);
58542 void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
58544 std::vector<SDValue> &Ops,
58546 SDValue Result;