DAGCombiner.cpp - OpenGrok cross reference for /llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines Matching +full:depth +full:- +full:wise
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
87 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
88 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
98 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
102 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
107 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
108                    cl::desc("Only use DAG-combiner alias analysis in this"
115 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
120   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
124     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
129     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
133     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
138     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
142     "combiner-reduce-load-op-store-width-force-narrowing-profitable",
148     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
153     "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
172     /// This must behave as a stack -- new nodes to process are pushed onto the
194     // BatchAA - Used for DAG load/store alias analysis.
205       for (SDNode *Node : N->users())
222         if (N->use_empty())
238         assert(N->getCombinerWorklistIndex() >= 0 &&
240         // Set to -2 to indicate that we combined the node.
241         N->setCombinerWorklistIndex(-2);
246     /// Call the node-specific routine that folds each particular type of node.
255       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
276       assert(N->getOpcode() != ISD::DELETED_NODE &&
280       // zero-use deletion strategy.
281       if (N->getOpcode() == ISD::HANDLENODE)
284       if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
290       if (N->getCombinerWorklistIndex() < 0) {
291         N->setCombinerWorklistIndex(Worklist.size());
301       int WorklistIndex = N->getCombinerWorklistIndex();
302       // If not in the worklist, the index might be -1 or -2 (was combined
310       N->setCombinerWorklistIndex(-1);
414     /// Call the node-specific routine that knows how to fold each
416     /// target-specific DAG combines.
419     // Visitation implementation - Implement dag node combining for different
422     //   SDValue.getNode() == 0 - No change was made
423     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
424     //   otherwise              - N should be replaced by the returned Operand.
690     /// Walk up chain skipping non-aliasing memory nodes,
698     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
750     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
841     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
845     /// single-use) and if missed an empty SDValue is returned.
912 //===----------------------------------------------------------------------===//
914 //===----------------------------------------------------------------------===//
917   ((DAGCombiner*)DC)->AddToWorklist(N);
922   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
927   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
932   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
937   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
942   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
945 //===----------------------------------------------------------------------===//
947 //===----------------------------------------------------------------------===//
953   // dead. Make sure to re-visit them and recursively delete dead nodes.
954   for (const SDValue &Op : N->ops())
958     if (Op->hasOneUse() || Op->getNumValues() > 1)
1010 /// Return true if this is a SetCC-equivalent operation with only one use.
1015   if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1052     return !(Const->isOpaque() && NoOpaques);
1056   for (const SDValue &Op : N->op_values()) {
1060     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1061         (Const->isOpaque() && NoOpaques))
1067 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1079          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1080           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1091   // (load/store (add, (add, x, offset1), offset2)) ->
1094   // (load/store (add, (add, x, y), offset2)) ->
1116       ScalableOffset = -ScalableOffset;
1117     if (all_of(N->users(), [&](SDNode *Node) {
1119               LoadStore && LoadStore->getBasePtr().getNode() == N) {
1123             EVT VT = LoadStore->getMemoryVT();
1124             unsigned AS = LoadStore->getAddressSpace();
1141   const APInt &C2APIntVal = C2->getAPIntValue();
1149     const APInt &C1APIntVal = C1->getAPIntValue();
1155     for (SDNode *Node : N->users()) {
1163         EVT VT = LoadStore->getMemoryVT();
1164         unsigned AS = LoadStore->getAddressSpace();
1177       if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1180     for (SDNode *Node : N->users()) {
1190       EVT VT = LoadStore->getMemoryVT();
1191       unsigned AS = LoadStore->getAddressSpace();
1217     if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1222       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1225                              N0->getFlags().hasDisjoint());
1231       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1240     // (N00 & N01) & N00 --> N00 & N01
1241     // (N00 & N01) & N01 --> N00 & N01
1242     // (N00 | N01) | N00 --> N00 | N01
1243     // (N00 | N01) | N01 --> N00 | N01
1248     // (N00 ^ N01) ^ N00 --> N01
1251     // (N00 ^ N01) ^ N01 --> N00
1285       if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1286           N01->getOpcode() == ISD::SETCC) {
1287         ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1288         ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1289         ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1311   // Floating-point reassociation is not allowed without loose FP math.
1324 // Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1332       N0->hasOneUse() && N1->hasOneUse() &&
1345   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1347   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1349              dbgs() << " and " << NumTo - 1 << " other values\n");
1352             N->getValueType(i) == To[i].getValueType()) &&
1368   if (N->use_empty())
1429   EVT VT = Load->getValueType(0);
1432   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1447     EVT MemVT = LD->getMemoryVT();
1449                                                       : LD->getExtensionType();
1452                           LD->getChain(), LD->getBasePtr(),
1453                           MemVT, LD->getMemOperand());
1552     Replace0 &= !N0->hasOneUse();
1553     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1560     if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1649     // fold (aext (aext x)) -> (aext x)
1650     // fold (aext (zext x)) -> (zext x)
1651     // fold (aext (sext x)) -> (sext x)
1684     EVT MemVT = LD->getMemoryVT();
1686                                                       : LD->getExtensionType();
1688                                    LD->getChain(), LD->getBasePtr(),
1689                                    MemVT, LD->getMemOperand());
1692     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1713   if (!N->use_empty())
1723     if (N->use_empty()) {
1724       for (const SDValue &ChildN : N->op_values())
1736 //===----------------------------------------------------------------------===//
1738 //===----------------------------------------------------------------------===//
1773     // If this combine is running after legalizing the DAG, re-legalize any
1786     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1792     for (const SDValue &ChildN : N->op_values())
1813     assert(N->getOpcode() != ISD::DELETED_NODE &&
1819     if (N->getNumValues() == RV->getNumValues())
1822       assert(N->getValueType(0) == RV.getValueType() &&
1823              N->getNumValues() == 1 && "Type mismatch");
1829     // out), because re-visiting the EntryToken and its users will not uncover
1848   // clang-format off
1849   switch (N->getOpcode()) {
2015   // clang-format on
2027   // If nothing happened, try a target-specific DAG combine.
2029     assert(N->getOpcode() != ISD::DELETED_NODE &&
2032     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2033         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2045     switch (N->getOpcode()) {
2074   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2075     SDValue N0 = N->getOperand(0);
2076     SDValue N1 = N->getOperand(1);
2081       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2082                                             N->getFlags());
2094   if (unsigned NumOps = N->getNumOperands()) {
2095     if (N->getOperand(0).getValueType() == MVT::Other)
2096       return N->getOperand(0);
2097     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2098       return N->getOperand(NumOps-1);
2099     for (unsigned i = 1; i < NumOps-1; ++i)
2100       if (N->getOperand(i).getValueType() == MVT::Other)
2101         return N->getOperand(i);
2107   SDValue Operand = N->getOperand(0);
2122   if (N->getNumOperands() == 2) {
2123     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2124       return N->getOperand(0);
2125     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2126       return N->getOperand(1);
2134   if (N->getNumOperands() > TokenFactorInlineLimit)
2140   if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2141     AddToWorklist(*(N->user_begin()));
2168     for (const SDValue &Op : TF->op_values()) {
2196   // Re-visit inlined Token Factors, to clean them up in case they have been
2202   // by walking up chains breath-first stopping when we've seen
2230       // Re-mark worklist from OrigOpNumber to OpNumber
2238       NumLeftToConsider--;
2255     switch (CurNode->getOpcode()) {
2264       for (const SDValue &Op : CurNode->op_values())
2271       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2275         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2278     OpWorkCount[CurOpNumber]--;
2280       NumLeftToConsider--;
2318     SmallVector<SDValue, 8> Ops(N->ops());
2320   } while (!N->use_empty());
2329   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2332 // isTruncateOf - If N is a truncate of some other value, return true, record
2338   if (N->getOpcode() == ISD::TRUNCATE) {
2339     Op = N->getOperand(0);
2341     if (N->getFlags().hasNoUnsignedWrap())
2363     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2365     VT = LD->getMemoryVT();
2366     AS = LD->getAddressSpace();
2368     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2370     VT = ST->getMemoryVT();
2371     AS = ST->getAddressSpace();
2373     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2375     VT = LD->getMemoryVT();
2376     AS = LD->getAddressSpace();
2378     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2380     VT = ST->getMemoryVT();
2381     AS = ST->getAddressSpace();
2387   if (N->getOpcode() == ISD::ADD) {
2389     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2391       // [reg +/- imm]
2392       AM.BaseOffs = Offset->getSExtValue();
2394       // [reg +/- reg]
2396   } else if (N->getOpcode() == ISD::SUB) {
2398     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2400       // [reg +/- imm]
2401       AM.BaseOffs = -Offset->getSExtValue();
2403       // [reg +/- reg]
2414 /// with an identity constant. Codegen improves if we re-use the variable
2420   // is only valid as operand 1 of a non-commutative binop.
2421   SDValue N0 = N->getOperand(0);
2422   SDValue N1 = N->getOperand(1);
2435   unsigned Opcode = N->getOpcode();
2436   EVT VT = N->getValueType(0);
2442   // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2444   if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2446     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2449   // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2450   if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2452     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2460   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2464   auto BinOpcode = BO->getOpcode();
2465   EVT VT = BO->getValueType(0);
2470     if (TLI.isCommutativeBinOp(BO->getOpcode()))
2479   SDValue Sel = BO->getOperand(0);
2482     Sel = BO->getOperand(1);
2510   // The exception is "and" and "or" with either 0 or -1 in which case we can
2512   // and (select Cond, 0, -1), X --> select Cond, 0, X
2513   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2519   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2542     // We have a select-of-constants followed by a binary operator with a
2544     // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2558   SelectOp->setFlags(BO->getFlags());
2564   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2570   bool IsAdd = N->getOpcode() == ISD::ADD;
2571   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2572   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2588   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2589   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2592   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2593                      : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2597 // Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2599   SDValue N0 = N->getOperand(0);
2616 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2620   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2625   bool IsAdd = N->getOpcode() == ISD::ADD;
2626   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2627   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2637   // The shift must be moving the sign bit to the least-significant-bit.
2641   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2645   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2646   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2668   SDValue N0 = N->getOperand(0);
2669   SDValue N1 = N->getOperand(1);
2673   // fold (add x, undef) -> undef
2679   // fold (add c1, c2) -> c1+c2
2696     // fold (add x, 0) -> x, vector edition
2701   // fold (add x, 0) -> x
2709     // fold ((A-c1)+c2) -> (A+(c2-c1))
2713     // fold ((c1-A)+c2) -> (c1+c2)-A
2718   // add (sext i1 X), 1 -> zext (not i1 X)
2720   //   add (zext i1 X), -1 -> sext (not i1 X)
2734   // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2736   // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2749     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2752     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2754     // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2780     // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2788   // fold ((0-A) + B) -> B-A
2792   // fold (A + (0-B)) -> A-B
2796   // fold (A+(B-A)) -> B
2800   // fold ((B-A)+A) -> B
2804   // fold ((A-B)+(C-A)) -> (C-B)
2809   // fold ((A-B)+(B-C)) -> (A-C)
2814   // fold (A+(B-(A+C))) to (B-C)
2815   // fold (A+(B-(C+A))) to (B-C)
2819   // fold (A+((B-A)+or-C)) to (B+or-C)
2825   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2833   // fold (add (umax X, C), -C) --> (usubsat X, C)
2837              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2849     // fold (add (xor a, -1), 1) -> (sub 0, a)
2854     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2873     //   sub y, (xor x, -1)
2877         (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2878                                        !N->getFlags().hasNoSignedWrap()))) {
2884   // (x - y) + -1  ->  add (xor y, -1), x
2891   // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2899               (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2903         if (N->getFlags().hasNoUnsignedWrap() &&
2904             N0->getFlags().hasNoUnsignedWrap() &&
2905             N0.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2907           if (N->getFlags().hasNoSignedWrap() &&
2908               N0->getFlags().hasNoSignedWrap() &&
2909               N0.getOperand(0)->getFlags().hasNoSignedWrap())
2916             DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2924               (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2930         if (N->getFlags().hasNoUnsignedWrap() &&
2931             N0->getFlags().hasNoUnsignedWrap() &&
2932             OMul->getFlags().hasNoUnsignedWrap() &&
2933             OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2935           if (N->getFlags().hasNoSignedWrap() &&
2936               N0->getFlags().hasNoSignedWrap() &&
2937               OMul->getFlags().hasNoSignedWrap() &&
2938               OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2946             DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2962   SDValue N0 = N->getOperand(0);
2981   SDValue N0 = N->getOperand(0);
2982   SDValue N1 = N->getOperand(1);
2999   // fold (a+b) -> (a|b) iff a and b share no bits.
3006     const APInt &C0 = N0->getConstantOperandAPInt(0);
3007     const APInt &C1 = N1->getConstantOperandAPInt(0);
3011   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3015     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3016     const APInt &VS1 = N1->getConstantOperandAPInt(0);
3024     const APInt &C0 = N0->getConstantOperandAPInt(0);
3025     const APInt &C1 = N1->getConstantOperandAPInt(0);
3034     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3035     const APInt &SV1 = N1->getConstantOperandAPInt(0);
3045   unsigned Opcode = N->getOpcode();
3046   SDValue N0 = N->getOperand(0);
3047   SDValue N1 = N->getOperand(1);
3052   // fold (add_sat x, undef) -> -1
3056   // fold (add_sat c1, c2) -> c3
3070     // fold (add_sat x, 0) -> x, vector edition
3075   // fold (add_sat x, 0) -> x
3120   EVT VT = V->getValueType(0);
3136 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3143   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3157   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3158   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3168   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3180   //   sub y, (xor x, -1)
3184       (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3185                                      !N0->getFlags().hasNoSignedWrap()))) {
3191     // Hoist one-use subtraction by non-opaque constant:
3192     //   (x - C) + y  ->  (x + y) - C
3193     // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3198     // Hoist one-use subtraction from non-opaque constant:
3199     //   (C - x) + y  ->  (y - x) + C
3206   // add (mul x, C), x -> mul x, C+1
3216   // rather than 'add 0/-1' (the zext should get folded).
3217   // add (sext i1 Y), X --> sub X, (zext i1 Y)
3225   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3228     if (TN->getVT() == MVT::i1) {
3235   // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3238     return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3241   // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3252   SDValue N0 = N->getOperand(0);
3253   SDValue N1 = N->getOperand(1);
3258   if (!N->hasAnyUseOfValue(1))
3266     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3268   // fold (addc x, 0) -> x + no carry out
3305   SDValue N0 = N->getOperand(0);
3306   SDValue N1 = N->getOperand(1);
3308   bool IsSigned = (ISD::SADDO == N->getOpcode());
3310   EVT CarryVT = N->getValueType(1);
3314   if (!N->hasAnyUseOfValue(1))
3321     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3323   // fold (addo x, 0) -> x + no carry out
3333     // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3335       return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3338     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3340       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3343           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3361   // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3367       return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3371   // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3374       return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3381   SDValue N0 = N->getOperand(0);
3382   SDValue N1 = N->getOperand(1);
3383   SDValue CarryIn = N->getOperand(2);
3389     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3392   // fold (adde x, y, false) -> (addc x, y)
3394     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3400   SDValue N0 = N->getOperand(0);
3401   SDValue N1 = N->getOperand(1);
3402   SDValue CarryIn = N->getOperand(2);
3409     return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3411   // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3414         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3415       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3418   // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3442       DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3491     EVT VT = Carry0->getValueType(1);
3502         DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3504     return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3579   EVT CarryOutType = N->getValueType(0);
3586   if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3611   CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3612                                   Carry1->getValueType(0));
3614       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3620   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3624   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3631   if (N->getOpcode() == ISD::AND)
3638   // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3643       SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3646           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3650   // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3656       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3657     return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3678   // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3681       return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3689   SDValue N0 = N->getOperand(0);
3690   SDValue N1 = N->getOperand(1);
3691   SDValue CarryIn = N->getOperand(2);
3698     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3700   // fold (saddo_carry x, y, false) -> (saddo x, y)
3703         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3704       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3727   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3744 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3747   if (N->getOpcode() != ISD::SUB ||
3751   EVT SubVT = N->getValueType(0);
3752   SDValue Op0 = N->getOperand(0);
3753   SDValue Op1 = N->getOperand(1);
3755   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3775   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3801 //       if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3802 //       -->
3814 //       and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3816 //       -->
3822   SDValue N0 = N->getOperand(0);
3842     if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
3851     unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
3877   SDValue N0 = N->getOperand(0);
3878   SDValue N1 = N->getOperand(1);
3884     if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3885       return N->getOperand(0);
3892   // fold (sub x, x) -> 0
3897   // fold (sub c1, c2) -> c3
3906     // fold (sub x, 0) -> x, vector edition
3914   // fold (sub x, c) -> (add x, -c)
3917                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3920     // Right-shifting everything out but the sign bit followed by negation is
3922     // -(X >>u 31) -> (X >>s 31)
3923     // -(X >>s 31) -> (X >>u 31)
3924     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3926       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3927         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3933     // 0 - X --> 0 if the sub is NUW.
3934     if (N->getFlags().hasNoUnsignedWrap())
3940       if (N->getFlags().hasNoSignedWrap())
3943       // 0 - X --> X if X is 0 or the minimum signed value.
3947     // Convert 0 - abs(x).
3954     // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
3956     // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
3965       unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
3970     // Fold neg(splat(neg(x)) -> splat(x)
3979   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3983   // fold (A - (0-B)) -> A+B
3987   // fold A-(A-B) -> B
3991   // fold (A+B)-A -> B
3995   // fold (A+B)-B -> A
3999   // fold (A+C1)-C2 -> A+(C1-C2)
4006   // fold C2-(A+C1) -> (C2-C1)-A
4013   // fold (A-C1)-C2 -> A-(C1+C2)
4020   // fold (c1-A)-c2 -> (c1-c2)-A
4029   // fold ((A+(B+C))-B) -> A+C
4033   // fold ((A+(B-C))-B) -> A-C
4037   // fold ((A-(B-C))-C) -> A-B
4041   // fold (A-(B-C)) -> A+(C-B)
4046   // A - (A & B)  ->  A & (~B)
4051   // fold (A - (-B * C)) -> (A + (B * C))
4078   // (A - B) - 1  ->  add (xor B, -1), A
4083   //   sub y, (xor x, -1)
4091   // Hoist one-use addition by non-opaque constant:
4092   //   (x + C) - y  ->  (x - y) + C
4099   // y - (x + C)  ->  (y - x) - C
4105   // (x - C) - y  ->  (x - y) - C
4106   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4112   // (C - x) - y  ->  C - (x + y)
4119   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4121   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4130   // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4132       sd_match(N1, m_Sra(m_Value(A), m_SpecificInt(BitWidth - 1))) &&
4139       // fold (sub Sym+c1, Sym+c2) -> c1-c2
4141         if (GA->getGlobal() == GB->getGlobal())
4142           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4146   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4149     if (TN->getVT() == MVT::i1) {
4156   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4159     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4162   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4164     APInt NewStep = -N1.getConstantOperandAPInt(0);
4170   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4174     if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4182   // N0 - (X << BW-1) --> N0 + (X << BW-1)
4185     if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4189   // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4192     return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4196     // (sub Carry, X)  ->  (uaddo_carry (sub 0, X), 0, Carry)
4208   // sub C0, X --> xor X, C0
4210     if (!C0->isOpaque()) {
4211       const APInt &C0Val = C0->getAPIntValue();
4213       if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4218   // smax(a,b) - smin(a,b) --> abds(a,b)
4224   // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4230   // umax(a,b) - umin(a,b) --> abdu(a,b)
4236   // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4246   unsigned Opcode = N->getOpcode();
4247   SDValue N0 = N->getOperand(0);
4248   SDValue N1 = N->getOperand(1);
4253   // fold (sub_sat x, undef) -> 0
4257   // fold (sub_sat x, x) -> 0
4261   // fold (sub_sat c1, c2) -> c3
4270     // fold (sub_sat x, 0) -> x, vector edition
4275   // fold (sub_sat x, 0) -> x
4287   SDValue N0 = N->getOperand(0);
4288   SDValue N1 = N->getOperand(1);
4293   if (!N->hasAnyUseOfValue(1))
4297   // fold (subc x, x) -> 0 + no borrow
4302   // fold (subc x, 0) -> x + no borrow
4306   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4315   SDValue N0 = N->getOperand(0);
4316   SDValue N1 = N->getOperand(1);
4318   bool IsSigned = (ISD::SSUBO == N->getOpcode());
4320   EVT CarryVT = N->getValueType(1);
4324   if (!N->hasAnyUseOfValue(1))
4328   // fold (subo x, x) -> 0 + no borrow
4333   // fold (subox, c) -> (addo x, -c)
4335     if (IsSigned && !N1C->isMinSignedValue())
4336       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4337                          DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4339   // fold (subo x, 0) -> x + no borrow
4348   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4357   SDValue N0 = N->getOperand(0);
4358   SDValue N1 = N->getOperand(1);
4359   SDValue CarryIn = N->getOperand(2);
4361   // fold (sube x, y, false) -> (subc x, y)
4363     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4369   SDValue N0 = N->getOperand(0);
4370   SDValue N1 = N->getOperand(1);
4371   SDValue CarryIn = N->getOperand(2);
4373   // fold (usubo_carry x, y, false) -> (usubo x, y)
4376         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4377       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4384   SDValue N0 = N->getOperand(0);
4385   SDValue N1 = N->getOperand(1);
4386   SDValue CarryIn = N->getOperand(2);
4388   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4391         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4392       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4401   SDValue N0 = N->getOperand(0);
4402   SDValue N1 = N->getOperand(1);
4403   SDValue Scale = N->getOperand(2);
4406   // fold (mulfix x, undef, scale) -> 0
4413     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4415   // fold (mulfix x, 0, scale) -> 0
4423   SDValue N0 = N->getOperand(0);
4424   SDValue N1 = N->getOperand(1);
4431   // fold (mul x, undef) -> 0
4435   // fold (mul c1, c2) -> c1*c2
4461       ConstValue1 = N1->getAsAPIntVal();
4462       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4466   // fold (mul x, 0) -> 0
4470   // fold (mul x, 1) -> x
4478   // fold (mul x, -1) -> 0-x
4482   // fold (mul x, (1 << c)) -> x << c
4492   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4494     unsigned Log2Val = (-ConstValue1).logBase2();
4497     // single-use add), we should put the negate there.
4505   // hi result is in use in case we hit this mid-legalization.
4512           if (LoHi->hasAnyUseOfValue(1))
4515           if (LoHi->hasAnyUseOfValue(1))
4522   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4523   // mul x, (2^N + 1) --> add (shl x, N), x
4524   // mul x, (2^N - 1) --> sub (shl x, N), x
4525   // Examples: x * 33 --> (x << 5) + x
4526   //           x * 15 --> (x << 4) - x
4527   //           x * -33 --> -((x << 5) + x)
4528   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4529   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4530   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4531   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4532   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4533   //           x * 0xf800 --> (x << 16) - (x << 11)
4534   //           x * -0x8800 --> -((x << 15) + (x << 11))
4535   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4546     if ((MulC - 1).isPowerOf2())
4553           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4556              "multiply-by-constant generated out of bounds shift");
4570   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4577   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4597   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4611     const APInt &C1 = NC1->getAPIntValue();
4624   // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4629           m_Mul(m_Or(m_Sra(m_Value(X), m_SpecificInt(BitWidth - 1)), m_One()),
4634   // Fold ((mul x, 0/undef) -> 0,
4635   //       (mul x, 1) -> x) -> x)
4636   // -> and(x, mask)
4643       if (!V || V->isZero()) {
4648       return V->isOne();
4667     if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4670   // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4677   // Simplify the operands using demanded-bits information.
4688   EVT NodeType = Node->getValueType(0);
4705   if (Node->use_empty())
4708   unsigned Opcode = Node->getOpcode();
4712   // DivMod lib calls can still work on non-legal types if using lib-calls.
4713   EVT VT = Node->getValueType(0);
4738   SDValue Op0 = Node->getOperand(0);
4739   SDValue Op1 = Node->getOperand(1);
4741   for (SDNode *User : Op0->users()) {
4742     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4743         User->use_empty())
4746     // otherwise, the DIVREM may get target-legalized into something
4747     // target-specific that we won't be able to recognize.
4748     unsigned UserOpc = User->getOpcode();
4750         User->getOperand(0) == Op0 &&
4751         User->getOperand(1) == Op1) {
4773   SDValue N0 = N->getOperand(0);
4774   SDValue N1 = N->getOperand(1);
4775   EVT VT = N->getValueType(0);
4778   unsigned Opc = N->getOpcode();
4782   // X / undef -> undef
4783   // X % undef -> undef
4784   // X / 0 -> undef
4785   // X % 0 -> undef
4790   // undef / X -> 0
4791   // undef % X -> 0
4795   // 0 / X -> 0
4796   // 0 % X -> 0
4798   if (N0C && N0C->isZero())
4801   // X / X -> 1
4802   // X % X -> 0
4806   // X / 1 -> X
4807   // X % 1 -> 0
4808   // If this is a boolean op (single-bit element type), we can't have
4809   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4810   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4812   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4819   SDValue N0 = N->getOperand(0);
4820   SDValue N1 = N->getOperand(1);
4821   EVT VT = N->getValueType(0);
4825   // fold (sdiv c1, c2) -> c1/c2
4834   // fold (sdiv X, -1) -> 0-X
4836   if (N1C && N1C->isAllOnes())
4839   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4840   if (N1C && N1C->isMinSignedValue())
4852   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4858     // (Dividend - (Quotient * Divisor).
4859     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4870   // sdiv, srem -> sdivrem
4874   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4882   // Helper for determining whether a value is a power-2 constant scalar or a
4885     if (C->isZero() || C->isOpaque())
4887     if (C->getAPIntValue().isPowerOf2())
4889     if (C->getAPIntValue().isNegatedPowerOf2())
4899   EVT VT = N->getValueType(0);
4903   // fold (sdiv X, pow2) -> simple ops after legalize
4905   // better results in that case. The target-specific lowering should learn how
4907   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4908     // Target-specific implementation of sdiv x, pow2.
4923                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4926     // Add (N0 < 0) ? abs2 - 1 : 0;
4934     // Special case: (sdiv X, 1) -> X
4935     // Special Case: (sdiv X, -1) -> 0-X
4948     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4956   // trade-offs.
4959       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4967   SDValue N0 = N->getOperand(0);
4968   SDValue N1 = N->getOperand(1);
4969   EVT VT = N->getValueType(0);
4973   // fold (udiv c1, c2) -> c1/c2
4982   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4984   if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4998     // (Dividend - (Quotient * Divisor).
4999     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5010   // sdiv, srem -> sdivrem
5014   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5018   // Simplify the operands using demanded-bits information.
5029   EVT VT = N->getValueType(0);
5031   // fold (udiv x, (1 << c)) -> x >>u c
5043   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5060   // fold (udiv x, c) -> alternate
5063       !TLI.isIntDivCheap(N->getValueType(0), Attr))
5071   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5072       !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5073     // Target-specific implementation of srem x, pow2.
5082   unsigned Opcode = N->getOpcode();
5083   SDValue N0 = N->getOperand(0);
5084   SDValue N1 = N->getOperand(1);
5085   EVT VT = N->getValueType(0);
5091   // fold (rem c1, c2) -> c1%c2
5095   // fold (urem X, -1) -> select(FX == -1, 0, FX)
5112     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5117       // fold (urem x, pow2) -> (and x, pow2-1)
5123     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5124     // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5138   // If X/C can be simplified by the division-by-constant logic, lower
5139   // X%C to the equivalent of X-X/C*C.
5140   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5157       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5168   // sdiv, srem -> sdivrem
5176   SDValue N0 = N->getOperand(0);
5177   SDValue N1 = N->getOperand(1);
5178   EVT VT = N->getValueType(0);
5188     return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5194     // fold (mulhs x, 0) -> 0
5200   // fold (mulhs x, 0) -> 0
5204   // fold (mulhs x, 1) -> (sra x, size(x)-1)
5208         DAG.getShiftAmountConstant(N0.getScalarValueSizeInBits() - 1, VT, DL));
5210   // fold (mulhs x, undef) -> 0
5235   SDValue N0 = N->getOperand(0);
5236   SDValue N1 = N->getOperand(1);
5237   EVT VT = N->getValueType(0);
5247     return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5253     // fold (mulhu x, 0) -> 0
5259   // fold (mulhu x, 0) -> 0
5263   // fold (mulhu x, 1) -> 0
5267   // fold (mulhu x, undef) -> 0
5271   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5301   // Simplify the operands using demanded-bits information.
5311   unsigned Opcode = N->getOpcode();
5312   SDValue N0 = N->getOperand(0);
5313   SDValue N1 = N->getOperand(1);
5314   EVT VT = N->getValueType(0);
5325     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5331   // fold (avg x, undef) -> x
5337   // fold (avg x, x) --> x
5341   // fold (avgfloor x, 0) -> x >> 1
5350   // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5351   // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5367   // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5368   // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5382   // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5383   // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5395       if (IsSigned && Add->getFlags().hasNoSignedWrap())
5398       if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5407   unsigned Opcode = N->getOpcode();
5408   SDValue N0 = N->getOperand(0);
5409   SDValue N1 = N->getOperand(1);
5410   EVT VT = N->getValueType(0);
5420     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5426   // fold (abd x, undef) -> 0
5430   // fold (abd x, x) -> 0
5436   // fold (abds x, 0) -> abs x
5441   // fold (abdu x, 0) -> x
5445   // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5459   bool HiExists = N->hasAnyUseOfValue(1);
5461                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5462     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5467   bool LoExists = N->hasAnyUseOfValue(0);
5469                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5470     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5480     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5490     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5506   SDValue N0 = N->getOperand(0);
5507   SDValue N1 = N->getOperand(1);
5508   EVT VT = N->getValueType(0);
5513     return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5518     return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5547   SDValue N0 = N->getOperand(0);
5548   SDValue N1 = N->getOperand(1);
5549   EVT VT = N->getValueType(0);
5554     return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5559     return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5561   // (umul_lohi N0, 0) -> (0, 0)
5567   // (umul_lohi N0, 1) -> (N0, 0)
5597   SDValue N0 = N->getOperand(0);
5598   SDValue N1 = N->getOperand(1);
5600   bool IsSigned = (ISD::SMULO == N->getOpcode());
5602   EVT CarryVT = N->getValueType(1);
5614         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5615                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5623     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5625   // fold (mulo x, 0) -> 0 + no carry out
5630   // (mulo x, 2) -> (addo x, x)
5632   if (N1C && N1C->getAPIntValue() == 2 &&
5635                        N->getVTList(), N0, N0);
5654 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5672     const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5673     const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5692         const fltSemantics &Semantics = InputTy->getFltSemantics();
5718     N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5728     N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5740   if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5743   const APInt &MinC = MinCOp->getAPIntValue();
5744   const APInt &MaxC = MaxCOp->getAPIntValue();
5746   if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5780   return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5786   // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5797   const APInt &C1 = N1C->getAPIntValue();
5798   const APInt &C3 = N3C->getAPIntValue();
5820   SDValue N0 = N->getOperand(0);
5821   SDValue N1 = N->getOperand(1);
5823   unsigned Opcode = N->getOpcode();
5830   // If the operands are the same, this is a no-op.
5845   if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5876   // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5895   // Simplify the operands using demanded-bits information.
5905   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5907   unsigned LogicOpcode = N->getOpcode();
5919   // Handle size-changing casts (or sign_extend_inreg).
5945     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5952   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5975   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5986   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5996   // logic_op (OP x, x1, s), (OP y, y1, s) -->
5997   // --> OP (logic_op x, y), (logic_op, x1, y1), s
6010   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6029   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6032   // The type-legalizer generates this pattern when loading illegal
6050     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6051         !SVN0->getMask().equals(SVN1->getMask()))
6060     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6064       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6073     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6077       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6098   // If we're here post-legalization or the logic op type is not i1, the logic
6109   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6110   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6125     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
6126     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6127     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
6128     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
6144     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6145     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
6146     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6147     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
6156   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6172     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6173     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6184       // Match a shared variable operand and 2 non-opaque constant operands.
6188             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6190             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6191         return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6194         // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6195         // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6214   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6215   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6239 // FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6299       (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6303   SDValue LHS = LogicOp->getOperand(0);
6304   SDValue RHS = LogicOp->getOperand(1);
6305   if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6306       !LHS->hasOneUse() || !RHS->hasOneUse())
6313   SDValue LHS0 = LHS->getOperand(0);
6314   SDValue RHS0 = RHS->getOperand(0);
6315   SDValue LHS1 = LHS->getOperand(1);
6316   SDValue RHS1 = RHS->getOperand(1);
6321   ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6322   ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6323   EVT VT = LogicOp->getValueType(0);
6328   // compare against the same value. Replace the and/or-cmp-cmp sequence with
6329   // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6330   // sequence will be replaced with min-cmp sequence:
6331   // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6332   // and and-cmp-cmp will be replaced with max-cmp sequence:
6333   // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6394         bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6401             getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6416       CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6418     const APInt &APLhs = LHS1C->getAPIntValue();
6419     const APInt &APRhs = RHS1C->getAPIntValue();
6423     if (APLhs == (-APRhs) &&
6427       // (icmp eq A, C) | (icmp eq A, -C)
6428       //    -> (icmp eq Abs(A), C)
6429       // (icmp ne A, C) & (icmp ne A, -C)
6430       //    -> (icmp ne Abs(A), C)
6439       //  IF IsPow2(smax(C0, C1)-smin(C0, C1))
6440       //    -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6442       //  IF IsPow2(smax(C0, C1)-smin(C0, C1))
6443       //    -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6447       //  IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6448       //    -> ~A & smin(C0, C1) == 0
6450       //  IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6451       //    -> ~A & smin(C0, C1) != 0
6455       APInt Dif = MaxC - MinC;
6467                                       DAG.getConstant(-MinC, DL, OpVT));
6480 // Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6515   // fold (and x, undef) -> 0
6523   //   and(x, add) -> and(add, x)
6529       VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6536         APInt ADDC = ADDI->getAPIntValue();
6537         APInt SRLC = SRLI->getAPIntValue();
6564   if (!AndC->getAPIntValue().isMask())
6567   unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6570   EVT LoadedVT = LoadN->getMemoryVT();
6581   if (!LoadN->isSimple())
6584   // Do not generate loads of non-round integer types since these can
6608   // Do not generate loads of non-round integer types since these can
6614   if (!LDST->isSimple())
6617   EVT LdStMemVT = LDST->getMemoryVT();
6632     const Align LDSTAlign = LDST->getAlign();
6635                                 LDST->getAddressSpace(), NarrowAlign,
6636                                 LDST->getMemOperand()->getFlags()))
6641   EVT PtrType = LDST->getBasePtr().getValueType();
6653         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6657     // (the value loaded and the chain).  Don't transform a pre-increment
6661     if (Load->getNumValues() > 2)
6667     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6668         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6677     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6681         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6694   for (SDValue Op : N->op_values()) {
6700       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6701           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6713       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6717         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6718             ExtVT.bitsGE(Load->getMemoryVT()))
6722         if (ExtVT.bitsLE(Load->getMemoryVT()))
6731       unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6734         cast<VTSDNode>(Op.getOperand(1))->getVT() :
6758     if (NodeToMask->getNumValues() > 1) {
6760       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6777   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6781   if (!Mask->getAPIntValue().isMask())
6785   if (isa<LoadSDNode>(N->getOperand(0)))
6795     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6796     SDValue MaskOp = N->getOperand(1);
6801       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6803                                 FixupNode->getValueType(0),
6812       SDValue Op0 = LogicN->getOperand(0);
6813       SDValue Op1 = LogicN->getOperand(1);
6831       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6832       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6843     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6850 //    x &  (-1 'logical shift' y)
6855   assert(N->getOpcode() == ISD::AND);
6857   SDValue N0 = N->getOperand(0);
6858   SDValue N1 = N->getOperand(1);
6864   // Try to match  (-1 '[outer] logical shift' y)
6868   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6871     OuterShift = M->getOpcode();
6878     if (!isAllOnesConstant(M->getOperand(0)))
6880     Y = M->getOperand(1);
6893   EVT VT = N->getValueType(0);
6907   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6910   SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6932   // Match a shift-right by constant.
6946   if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6963   // Turn this into a bit-test pattern using mask op + setcc:
6964   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6965   // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6971       APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6975   return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6978 /// For targets that support usubsat, match a bit-hack form of that operation
6981   EVT VT = N->getValueType(0);
6985   // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6986   // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6991                                         m_SpecificInt(BitWidth - 1))))) &&
6994                                         m_SpecificInt(BitWidth - 1))))))
7004 /// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7007   unsigned LogicOpcode = N->getOpcode();
7023   // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7024   // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7040   EVT VT = N->getValueType(0);
7051 /// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7052 /// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7055   unsigned LogicOpcode = N->getOpcode();
7079   EVT VT = N->getValueType(0);
7085   SDValue N0 = N->getOperand(0);
7086   SDValue N1 = N->getOperand(1);
7090   // x & x --> x
7094   // fold (and c1, c2) -> c1&c2
7111     // fold (and x, 0) -> 0, vector edition
7117     // fold (and x, -1) -> x, vector edition
7124     if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7125       EVT LoadVT = MLoad->getMemoryVT();
7133         if (Splat->getAPIntValue().isMask(ElementSize)) {
7135               ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7136               MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7137               LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7138               ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7149   // fold (and x, -1) -> x
7166   if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7169   // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7174   // fold (and (or x, C), D) -> D if (C & D) == D
7176     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7186     APInt Mask = ~N1C->getAPIntValue();
7189     // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7193     // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7194     if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7203   // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7209         N0->hasOneUse() && N0Op0->hasOneUse()) {
7222   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7242       Constant = C->getAPIntValue();
7244       unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7255           Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7279                                                     Load->getValueType(0),
7280                                                     Load->getMemoryVT());
7285     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7288     switch (Load->getExtensionType()) {
7303       if (Load->getExtensionType() == ISD::EXTLOAD) {
7304         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7305                               Load->getValueType(0), SDLoc(Load),
7306                               Load->getChain(), Load->getBasePtr(),
7307                               Load->getOffset(), Load->getMemoryVT(),
7308                               Load->getMemOperand());
7310         if (Load->getNumValues() == 3) {
7335     EVT ExtVT = Ext->getValueType(0);
7336     SDValue Extendee = Ext->getOperand(0);
7339     if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7351   // fold (and (masked_gather x)) -> (zext_masked_gather x)
7353     EVT MemVT = GN0->getMemoryVT();
7359       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
7360                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
7363           DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7364           GN0->getIndexType(), ISD::ZEXTLOAD);
7373   // fold (and (load x), 255) -> (zextload x, i8)
7374   // fold (and (extload x, i16), 255) -> (zextload x, i8)
7391   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
7401   // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7402   // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7408         (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7412   // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7417         (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7421   // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7427       Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7429                        DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7431   // Masking the negated extension of a boolean is just the zero-extended
7433   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7434   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7436   // Note: the SimplifyDemandedBits fold below can make an information-losing
7447   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7448   // fold (and (sra)) -> (and (srl)) when possible.
7452   // fold (zext_inreg (extload x)) -> (zextload x)
7453   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7458     EVT MemVT = LN0->getMemoryVT();
7463     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7465         ((!LegalOperations && LN0->isSimple()) ||
7468           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7469                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7476   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7477   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7496     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7503     if (!C->getAPIntValue().isMask(
7533   EVT VT = N->getValueType(0);
7547     if (!N0->hasOneUse())
7552     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7553                   N01C->getZExtValue() != 0xFFFF))
7560     if (!N1->hasOneUse())
7563     if (!N11C || N11C->getZExtValue() != 0xFF)
7573   if (!N0->hasOneUse() || !N1->hasOneUse())
7580   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7584   SDValue N00 = N0->getOperand(0);
7586     if (!N00->hasOneUse())
7589     if (!N001C || N001C->getZExtValue() != 0xFF)
7595   SDValue N10 = N1->getOperand(0);
7597     if (!N10->hasOneUse())
7602     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7603                    N101C->getZExtValue() != 0xFFFF))
7616     // If the left-shift isn't masked out then the only way this is a bswap is
7638                       DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7643 /// Return true if the specified node is an element that makes up a 32-bit
7650   if (!N->hasOneUse())
7672   switch (N1C->getZExtValue()) {
7697       if (!C || C->getZExtValue() != 8)
7705       if (!C || C->getZExtValue() != 8)
7714     if (!C || C->getZExtValue() != 8)
7722     if (!C || C->getZExtValue() != 8)
7741     if (!C || C->getAPIntValue() != 16)
7757   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7764   if (!N0->hasOneUse() || !N1->hasOneUse())
7770   if (Mask0->getAPIntValue() != 0xff00ff00 ||
7771       Mask1->getAPIntValue() != 0x00ff00ff)
7781   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7792 /// Match a 32-bit packed halfword bswap. That is
7802   EVT VT = N->getValueType(0);
7864   // fold (or x, undef) -> -1
7871   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
7874       (N0->hasOneUse() || N1->hasOneUse())) {
7883         const APInt &LHSMask = N0O1C->getAPIntValue();
7884         const APInt &RHSMask = N1O1C->getAPIntValue();
7897   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7902       (N0->hasOneUse() || N1->hasOneUse())) {
7919     if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7920       return V->getOperand(0);
7930     // fold or (and x, y), x --> x
7934     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7943     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7954   // fold or (xor X, N1), N1 --> or X, N1
7958   // fold or (xor x, y), (x and/or y) --> or x, y
7968     if (V->getOpcode() == ISD::ZERO_EXTEND)
7969       return V->getOperand(0);
7973   // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7979   // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7985   // Attempt to match a legalized build_pair-esque pattern:
7993     // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8009   SDValue N0 = N->getOperand(0);
8010   SDValue N1 = N->getOperand(1);
8014   // x | x --> x
8018   // fold (or c1, c2) -> c1|c2
8032     // fold (or x, 0) -> x, vector edition
8036     // fold (or x, -1) -> -1, vector edition
8041     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8056         SmallVector<int, 4> Mask(NumElts, -1);
8059           int M0 = SV0->getMaskElt(i);
8060           int M1 = SV1->getMaskElt(i);
8079           // We have a zero and non-zero element. If the non-zero came from
8098   // fold (or x, 0) -> x
8102   // fold (or x, -1) -> -1
8109   // fold (or x, c) -> c iff (x & ~c) == 0
8111   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8130   if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8133   // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8138   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8141     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8143   if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8158   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
8170   // Simplify the operands using demanded-bits information.
8218 ///   (or (add v v) (shrl v bitwidth-1)):
8219 ///     expands (add v v) -> (shl v 1)
8222 ///     expands (mul v c0) -> (shl (mul v c1) c3)
8225 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
8228 ///     expands (shl v c0) -> (shl (shl v c1) c3)
8231 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
8250   // (add v v) -> (shl v 1)
8256       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8293   // TODO: We should be able to handle non-uniform constant vectors for these values
8295   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8296       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8297       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8302   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8304   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8306   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8307   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8315     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8316     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8327     //      c2 - (bitwidth(op0 v c0) - c1) == c0
8328     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8341 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
8357   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8358   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8360   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8363   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
8365   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8370   //     Neg == EltSize - Pos                                    [B]
8383   // always invokes undefined behavior for 32-bit X.
8385   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8387   // un-demanded bits.
8429   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8442     Width = NegC->getAPIntValue();
8447   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8451   //                NegC & Mask == (EltSize - PosC) & Mask
8455       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8463     // EltSize & Mask is 0 since Mask is EltSize - 1.
8469 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
8479   //          (srl x, (*ext (sub 32, y)))) ->
8483   //          (srl x, (*ext y))) ->
8496 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
8510   //          (srl x1, (*ext (sub 32, y)))) ->
8514   //          (srl x1, (*ext y))) ->
8529       return Cst && (Cst->getAPIntValue() == Imm);
8533     //   -> (fshl x0, x1, y)
8535         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8542     //   -> (fshr x0, x1, y)
8544         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8551     //   -> (fshr x0, x1, y)
8552     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8554         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8564 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
8572   // We still try to match rotate by constant pre-legalization.
8573   // TODO: Support pre-legalization funnel-shift by constant.
8580   // lowering for rotate, allow matching rotate by non-constants. Only allow
8648   // Something has gone wrong - we've lost the shl/srl pair - bail.
8660     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8686   // TODO: Support pre-legalization funnel-shift by constant.
8712         // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8717         // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8731   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8732   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8733   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8734   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8751   // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8761   // If the shift amount is sign/zext/any-extended just peel it off.
8814 /// However, there is a special case when dealing with vector loads -- we allow
8849 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
8853   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8854   if (Depth == 10)
8859   if (Depth && !Op.hasOneUse() &&
8878         calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8882         calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8886     if (LHS->isConstantZero())
8888     if (RHS->isConstantZero())
8893     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8897     uint64_t BitShift = ShiftOp->getZExtValue();
8908                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8909                                        Depth + 1, VectorIndex, Index);
8914     SDValue NarrowOp = Op->getOperand(0);
8925     return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8929     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8930                                  Depth + 1, VectorIndex, StartingIndex);
8932     auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8936     VectorIndex = OffsetOp->getZExtValue();
8938     SDValue NarrowOp = Op->getOperand(0);
8959     return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8964     if (!L->isSimple() || L->isIndexed())
8967     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8976       return L->getExtensionType() == ISD::ZEXTLOAD
8994   return BW - i - 1;
9009     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9064   // We only handle merging simple stores of 1-4 bytes.
9066   EVT MemVT = N->getMemoryVT();
9068       !N->isSimple() || N->isIndexed())
9072   SDValue Chain = N->getChain();
9085     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9086         Store->isIndexed() || !Store->hasOneUse())
9089     Chain = Store->getChain();
9115     SDValue Trunc = Store->getValue();
9135       if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9165     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9187                                         *FirstStore->getMemOperand(), &Fast);
9199       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9210     // Special-case: check if byte offsets line up for the opposite endian.
9238       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9239                    FirstStore->getPointerInfo(), FirstStore->getAlign());
9277   assert(N->getOpcode() == ISD::OR &&
9281   EVT VT = N->getValueType(0);
9291     unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9311   for (int i = ByteWidth - 1; i >= 0; --i) {
9318     if (P->isConstantZero()) {
9320       // zero-extend the load.
9321       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9325     assert(P->hasSrc() && "provenance should either be memory or zero");
9326     auto *L = cast<LoadSDNode>(P->Src.value());
9329     SDValue LChain = L->getChain();
9346     if (L->getMemoryVT().isVector()) {
9347       unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9350       unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9357     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9381       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9407   auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9410   // replace it with a single (possibly zero-extended) load and bswap + shift if
9419   // We do not introduce illegal bswaps when zero-extending as this tends to
9435                              *FirstLoad->getMemOperand(), &Fast);
9441                      Chain, FirstLoad->getBasePtr(),
9442                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9459 // If the target has andn, bsl, or a similar bit-select instruction,
9476   assert(N->getOpcode() == ISD::XOR);
9478   // Don't touch 'not' (i.e. where y = -1).
9479   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9482   EVT VT = N->getValueType(0);
9495     // Don't touch 'not' (i.e. where y = -1).
9508   SDValue N0 = N->getOperand(0);
9509   SDValue N1 = N->getOperand(1);
9559   SDValue N0 = N->getOperand(0);
9560   SDValue N1 = N->getOperand(1);
9564   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9568   // fold (xor x, undef) -> undef
9574   // fold (xor c1, c2) -> c1^c2
9588     // fold (xor x, 0) -> x, vector edition
9593   // fold (xor x, 0) -> x
9601   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9604   // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9609   // fold (a^b) -> (a|b) iff a and b share no bits.
9614   // look for 'add-like' folds:
9621   // fold !(x cc y) -> (x !cc y)
9626     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9657   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9668   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9669   // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
9681   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9682   // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
9695   // fold (not (neg x)) -> (add X, -1)
9696   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9704   // fold (not (add X, -1)) -> (neg X)
9710   // fold (xor (and x, y), y) -> (and (not x), y)
9711   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9718   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9727           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9732   // fold (xor x, x) -> 0
9736   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9748   // - Try to see the operation as placing a single zero in a value of all ones.
9749   // - There exists no value for x which would allow the result to contain zero.
9750   // - Values of x larger than the bitwidth are undefined and do not require a
9752   // - Pushing the zero left requires shifting one bits in from the right.
9760   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
9776   // Simplify the expression using non-local knowledge.
9786 /// If we have a shift-by-constant of a bitwise logic op that itself has a
9787 /// shift-by-constant operand with identical opcode, we may be able to convert
9791   // Match a one-use bitwise logic op.
9792   SDValue LogicOp = Shift->getOperand(0);
9801   // Find a matching one-use shift by constant.
9802   unsigned ShiftOpcode = Shift->getOpcode();
9803   SDValue C1 = Shift->getOperand(1);
9806   const APInt &C1Val = C1Node->getAPIntValue();
9818     ShiftAmtVal = &ShiftCNode->getAPIntValue();
9822     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9849   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9851   EVT VT = Shift->getValueType(0);
9852   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9857                      LogicOp->getFlags());
9867   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9870   if (isBitwiseNot(N->getOperand(0)))
9873   // The inner binop must be one-use, since we want to replace it.
9874   SDValue LHS = N->getOperand(0);
9878   // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9894     if (N->getOpcode() != ISD::SHL)
9913   if (IsCopyOrSelect && N->hasOneUse())
9918   EVT VT = N->getValueType(0);
9920           N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9921     SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9922                                    N->getOperand(1));
9930   assert(N->getOpcode() == ISD::TRUNCATE);
9931   assert(N->getOperand(0).getOpcode() == ISD::AND);
9933   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9934   EVT TruncVT = N->getValueType(0);
9935   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9937     SDValue N01 = N->getOperand(0).getOperand(1);
9940       SDValue N00 = N->getOperand(0).getOperand(0);
9954   SDValue N0 = N->getOperand(0);
9955   SDValue N1 = N->getOperand(1);
9956   EVT VT = N->getValueType(0);
9959   // fold (rot x, 0) -> x
9963   // fold (rot x, c) -> x iff (c % BitSize) == 0
9965     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9970   // fold (rot x, c) -> (rot x, c % BitSize)
9973     OutOfRange |= C->getAPIntValue().uge(Bitsize);
9981       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9984   // rot i16 X, 8 --> bswap X
9986   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9990   // Simplify the operands using demanded-bits information.
9994   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9998       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10004   //   -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10010       bool SameSide = (N->getOpcode() == NextOp);
10024           return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10033   SDValue N0 = N->getOperand(0);
10034   SDValue N1 = N->getOperand(1);
10043   // fold (shl c1, c2) -> c1<<c2
10053     // If setcc produces all-one true value then:
10054     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10055     if (N1CV && N1CV->isConstant()) {
10057         SDValue N00 = N0->getOperand(0);
10058         SDValue N01 = N0->getOperand(1);
10061         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10079   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10086   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10090       APInt c1 = LHS->getAPIntValue();
10091       APInt c2 = RHS->getAPIntValue();
10100       APInt c1 = LHS->getAPIntValue();
10101       APInt c2 = RHS->getAPIntValue();
10111   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10127       APInt c1 = LHS->getAPIntValue();
10128       APInt c2 = RHS->getAPIntValue();
10130       return c2.uge(OpSizeInBits - InnerBitwidth) &&
10140       APInt c1 = LHS->getAPIntValue();
10141       APInt c2 = RHS->getAPIntValue();
10143       return c2.uge(OpSizeInBits - InnerBitwidth) &&
10156   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10165       APInt c1 = LHS->getAPIntValue();
10166       APInt c2 = RHS->getAPIntValue();
10184       const APInt &LHSC = LHS->getAPIntValue();
10185       const APInt &RHSC = RHS->getAPIntValue();
10190     // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
10191     // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10192     if (N0->getFlags().hasExact()) {
10209     // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10211     // Only fold this if the inner shift has no other uses -- if it does,
10240   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10248   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10249   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10261       if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10267   // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10272       N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10287   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10288   if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10296   if (N1C && !N1C->isOpaque())
10300   // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10321     const APInt &C1 = N1C->getAPIntValue();
10339 // Transform a right shift of a multiply into a multiply-high.
10341 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10342 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10345   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10350   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10355   SDValue ShiftOperand = N->getOperand(0);
10374     if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10377     ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10381     unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10390       llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10397                               ? Constant->getAPIntValue().getSignificantBits()
10398                               : Constant->getAPIntValue().getActiveBits();
10402         Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10426   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10449   bool IsSigned = N->getOpcode() == ISD::SRA;
10453 // fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10456   unsigned Opcode = N->getOpcode();
10460   SDValue N0 = N->getOperand(0);
10461   EVT VT = N->getValueType(0);
10490   SDValue N0 = N->getOperand(0);
10491   SDValue N1 = N->getOperand(1);
10499   // fold (sra c1, c2) -> (sra c1, c2)
10503   // Arithmetic shifting an all-sign-bit value is a no-op.
10504   // fold (sra 0, x) -> 0
10505   // fold (sra -1, x) -> -1
10519   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10527       APInt c1 = LHS->getAPIntValue();
10528       APInt c2 = RHS->getAPIntValue();
10532           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10552   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10553   // result_size - n != m.
10562       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10567       // Determine the residual right-shift amount.
10568       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10570       // If the shift is not a no-op (in which case this should be just a sign
10584                            N->getValueType(0), Trunc);
10590   //   sra (add (shl X, N1C), AddC), N1C -->
10591   //   sext (add (trunc X to (width - N1C)), AddC')
10592   //   sra (sub AddC, (shl X, N1C)), N1C -->
10593   //   sext (sub AddC1',(trunc X to (width - N1C)))
10606         unsigned ShiftAmt = N1C->getZExtValue();
10607         EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10612         //       implementation and/or target-specific overrides (because
10613         //       non-simple types likely require masking when legalized), but
10619               DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10633   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10640   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10641   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10643   // TODO - support non-uniform vector shift amounts.
10652       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10653       if (LargeShift->getAPIntValue() == TruncBits) {
10673   if (N1C && !N1C->isOpaque())
10677   // Try to transform this shift into a multiply-high if
10682   // Attempt to convert a sra of a load into a narrower sign-extending load.
10693   SDValue N0 = N->getOperand(0);
10694   SDValue N1 = N->getOperand(1);
10703   // fold (srl c1, c2) -> c1 >>u c2
10721   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10725       APInt c1 = LHS->getAPIntValue();
10726       APInt c2 = RHS->getAPIntValue();
10735       APInt c1 = LHS->getAPIntValue();
10736       APInt c2 = RHS->getAPIntValue();
10749     // TODO - support non-uniform vector shift amounts.
10751       uint64_t c1 = N001C->getZExtValue();
10752       uint64_t c2 = N1C->getZExtValue();
10756       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10767       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10774                                                             OpSizeInBits - c2),
10782   // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10785       (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10789       const APInt &LHSC = LHS->getAPIntValue();
10790       const APInt &RHSC = RHS->getAPIntValue();
10817   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10818   // TODO - support non-uniform vector shift amounts.
10823     if (N1C->getAPIntValue().uge(BitSize))
10827       uint64_t ShiftAmt = N1C->getZExtValue();
10833       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10840   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
10842   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10847   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit), and x has a power
10851       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10882   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10894   if (N1C && !N1C->isOpaque())
10898   // Attempt to convert a srl of a load into a narrower zero-extending load.
10926   if (N->hasOneUse()) {
10927     SDNode *User = *N->user_begin();
10930     if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
10931       User = *User->user_begin();
10933     if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
10934         User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
10938   // Try to transform this shift into a multiply-high if
10950   EVT VT = N->getValueType(0);
10951   SDValue N0 = N->getOperand(0);
10952   SDValue N1 = N->getOperand(1);
10953   SDValue N2 = N->getOperand(2);
10954   bool IsFSHL = N->getOpcode() == ISD::FSHL;
10958   // fold (fshl N0, N1, 0) -> N0
10959   // fold (fshr N0, N1, 0) -> N1
10962             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10969   // TODO - support non-uniform vector shift amounts.
10973     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10974     if (Cst->getAPIntValue().uge(BitWidth)) {
10975       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10976       return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
10980     unsigned ShAmt = Cst->getZExtValue();
10984     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10985     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10986     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10987     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10991           DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
10995           DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
10997     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10998     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10999     // TODO - bigendian support once we have test coverage.
11000     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11001     // TODO - permit LHS EXTLOAD if extensions are shifted out.
11006       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11007           LHS->getAddressSpace() == RHS->getAddressSpace() &&
11008           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
11013               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11014           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11017                                      RHS->getAddressSpace(), NewAlign,
11018                                      RHS->getMemOperand()->getFlags(), &Fast) &&
11021                 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11024                 VT, DL, RHS->getChain(), NewPtr,
11025                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11026                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11037   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11038   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11042     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11049   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11050   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11053   // non-constant (BW - N2).
11066   SDValue N0 = N->getOperand(0);
11067   SDValue N1 = N->getOperand(1);
11074   // fold (*shlsat c1, c2) -> c1<<c2
11075   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11081     // fold (sshlsat x, c) -> (shl x, c)
11082     if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11083         N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11086     // fold (ushlsat x, c) -> (shl x, c)
11087     if (N->getOpcode() == ISD::USHLSAT && N1C &&
11088         N1C->getAPIntValue().ule(
11101   EVT SrcVT = N->getValueType(0);
11103   if (N->getOpcode() == ISD::TRUNCATE)
11104     N = N->getOperand(0).getNode();
11106   if (N->getOpcode() != ISD::ABS)
11109   EVT VT = N->getValueType(0);
11110   SDValue AbsOp1 = N->getOperand(0);
11121   // Check if the operands of the sub are (zero|sign)-extended.
11126     // fold (abs (sub nsw x, y)) -> abds(x, y)
11128     if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
11138     VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11139     VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11146   // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11147   // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11149   if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11150       (VT1 == MaxVT || Op1->hasOneUse()) &&
11159   // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11160   // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11170   SDValue N0 = N->getOperand(0);
11171   EVT VT = N->getValueType(0);
11174   // fold (abs c1) -> c2
11177   // fold (abs (abs x)) -> (abs x)
11180   // fold (abs x) -> x iff not-negative
11187   // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11190     EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11205   SDValue N0 = N->getOperand(0);
11206   EVT VT = N->getValueType(0);
11209   // fold (bswap c1) -> c2
11212   // fold (bswap (bswap x)) -> x
11216   // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11225   // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11231     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11232         ShAmt->getZExtValue() >= (BW / 2) &&
11233         (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11237       if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11246   // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11247   // inverse-shift-of-bswap:
11248   // bswap (X u<< C) --> (bswap X) u>> C
11249   // bswap (X u>> C) --> (bswap X) u<< C
11253     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11254         ShAmt->getZExtValue() % 8 == 0) {
11268   SDValue N0 = N->getOperand(0);
11269   EVT VT = N->getValueType(0);
11272   // fold (bitreverse c1) -> c2
11276   // fold (bitreverse (bitreverse x)) -> x
11282   // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11287   // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11296   SDValue N0 = N->getOperand(0);
11297   EVT VT = N->getValueType(0);
11300   // fold (ctlz c1) -> c2
11313   SDValue N0 = N->getOperand(0);
11314   EVT VT = N->getValueType(0);
11317   // fold (ctlz_zero_undef c1) -> c2
11325   SDValue N0 = N->getOperand(0);
11326   EVT VT = N->getValueType(0);
11329   // fold (cttz c1) -> c2
11342   SDValue N0 = N->getOperand(0);
11343   EVT VT = N->getValueType(0);
11346   // fold (cttz_zero_undef c1) -> c2
11354   SDValue N0 = N->getOperand(0);
11355   EVT VT = N->getValueType(0);
11359   // fold (ctpop c1) -> c2
11367       const APInt &Amt = AmtC->getAPIntValue();
11460   const unsigned Opcode = N->getOpcode();
11462   // Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11467   auto VT = N->getValueType(0);
11497   if (IsUnsigned && !Add->getFlags().hasNoUnsignedWrap())
11500   if (!IsUnsigned && !Add->getFlags().hasNoSignedWrap())
11503   return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
11525   // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11547 /// If a (v)select has a condition value that is a sign-bit test, try to smear
11548 /// the condition operand sign-bit across the value width and use it as a mask.
11551   SDValue Cond = N->getOperand(0);
11552   SDValue C1 = N->getOperand(1);
11553   SDValue C2 = N->getOperand(2);
11557   EVT VT = N->getValueType(0);
11562   // The inverted-condition + commuted-select variants of these patterns are
11566   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11569     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11570     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11575     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11576     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11588   if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11593   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11603   SDValue Cond = N->getOperand(0);
11604   SDValue N1 = N->getOperand(1);
11605   SDValue N2 = N->getOperand(2);
11606   EVT VT = N->getValueType(0);
11619     // fold (select Cond, 0, 1) -> (xor Cond, 1)
11622     // have an integer-based boolean or a floating-point-based boolean unless we
11633         C1->isZero() && C2->isOne()) {
11644   // Only do this before legalization to avoid conflicting with target-specific
11646   // is also a target-independent combine here in DAGCombiner in the other
11647   // direction for (select Cond, -1, 0) when the condition is not i1.
11650   // select Cond, 1, 0 --> zext (Cond)
11651   if (C1->isOne() && C2->isZero())
11654   // select Cond, -1, 0 --> sext (Cond)
11655   if (C1->isAllOnes() && C2->isZero())
11658   // select Cond, 0, 1 --> zext (!Cond)
11659   if (C1->isZero() && C2->isOne()) {
11665   // select Cond, 0, -1 --> sext (!Cond)
11666   if (C1->isZero() && C2->isAllOnes()) {
11679   const APInt &C1Val = C1->getAPIntValue();
11680   const APInt &C2Val = C2->getAPIntValue();
11682   // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11683   if (C1Val - 1 == C2Val) {
11688   // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11694   // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11702   // select Cond, -1, C --> or (sext Cond), C
11703   if (C1->isAllOnes()) {
11708   // select Cond, C, -1 --> or (sext (not Cond)), C
11709   if (C2->isAllOnes()) {
11724   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11725           N->getOpcode() == ISD::VP_SELECT) &&
11727   SDValue Cond = N->getOperand(0);
11728   SDValue T = N->getOperand(1), F = N->getOperand(2);
11729   EVT VT = N->getValueType(0);
11736   // select Cond, Cond, F --> or Cond, freeze(F)
11737   // select Cond, 1, F    --> or Cond, freeze(F)
11741   // select Cond, T, Cond --> and Cond, freeze(T)
11742   // select Cond, T, 0    --> and Cond, freeze(T)
11746   // select Cond, T, 1 --> or (not Cond), freeze(T)
11753   // select Cond, 0, F --> and (not Cond), freeze(F)
11764   SDValue N0 = N->getOperand(0);
11765   SDValue N1 = N->getOperand(1);
11766   SDValue N2 = N->getOperand(2);
11767   EVT VT = N->getValueType(0);
11778   // compare is inverted from that pattern ("Cond0 s> -1").
11786   // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
11789     SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11794   // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
11797     SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11804   // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
11808     SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11816   //       (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
11822 // (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
11823 // (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
11824 // (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
11825 // (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
11869   SDValue N0 = N->getOperand(0);
11870   SDValue N1 = N->getOperand(1);
11871   SDValue N2 = N->getOperand(2);
11872   EVT VT = N->getValueType(0);
11875   SDNodeFlags Flags = N->getFlags();
11883   // select (not Cond), N1, N2 -> select Cond, N2, N1
11886     SelectOp->setFlags(Flags);
11909     //   -> select Cond0, (select Cond1, X, Y), Y
11910     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11911       SDValue Cond0 = N0->getOperand(0);
11912       SDValue Cond1 = N0->getOperand(1);
11922     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11923     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11924       SDValue Cond0 = N0->getOperand(0);
11925       SDValue Cond1 = N0->getOperand(1);
11936     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11937     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11938       SDValue N1_0 = N1->getOperand(0);
11939       SDValue N1_1 = N1->getOperand(1);
11940       SDValue N1_2 = N1->getOperand(2);
11955     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11956     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11957       SDValue N2_0 = N2->getOperand(0);
11958       SDValue N2_1 = N2->getOperand(1);
11959       SDValue N2_2 = N2->getOperand(2);
11974     // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
11982     // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
11996     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11998     // select (fcmp lt x, y), x, y -> fminnum x, y
11999     // select (fcmp gt x, y), x, y -> fmaxnum x, y
12008     // This is conservatively limited to pre-legal-operations to give targets
12017       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12018         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12019         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12024         //   %r = select %c, -1, %a
12029         //   %r = select %u1, -1, %u0
12041       Flags = N0->getFlags();
12044       SelectNode->setFlags(Flags);
12069   SDValue Cond = N->getOperand(0);
12070   SDValue LHS = N->getOperand(1);
12071   SDValue RHS = N->getOperand(2);
12072   EVT VT = N->getValueType(0);
12080   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12087   // length of the BV and see if all the non-undef nodes are the same.
12090     if (Cond->getOperand(i)->isUndef())
12095     else if (Cond->getOperand(i).getNode() != BottomHalf)
12102     if (Cond->getOperand(i)->isUndef())
12107     else if (Cond->getOperand(i).getNode() != TopHalf)
12116       BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12117       TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12189   SDValue Mask = MSC->getMask();
12190   SDValue Chain = MSC->getChain();
12191   SDValue Index = MSC->getIndex();
12192   SDValue Scale = MSC->getScale();
12193   SDValue StoreVal = MSC->getValue();
12194   SDValue BasePtr = MSC->getBasePtr();
12195   SDValue VL = MSC->getVectorLength();
12196   ISD::MemIndexType IndexType = MSC->getIndexType();
12203   if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12205     return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12206                             DL, Ops, MSC->getMemOperand(), IndexType);
12211     return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12212                             DL, Ops, MSC->getMemOperand(), IndexType);
12220   SDValue Mask = MSC->getMask();
12221   SDValue Chain = MSC->getChain();
12222   SDValue Index = MSC->getIndex();
12223   SDValue Scale = MSC->getScale();
12224   SDValue StoreVal = MSC->getValue();
12225   SDValue BasePtr = MSC->getBasePtr();
12226   ISD::MemIndexType IndexType = MSC->getIndexType();
12233   if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12235     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12236                                 DL, Ops, MSC->getMemOperand(), IndexType,
12237                                 MSC->isTruncatingStore());
12242     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12243                                 DL, Ops, MSC->getMemOperand(), IndexType,
12244                                 MSC->isTruncatingStore());
12252   SDValue Mask = MST->getMask();
12253   SDValue Chain = MST->getChain();
12254   SDValue Value = MST->getValue();
12255   SDValue Ptr = MST->getBasePtr();
12264     if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12265         MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12266         !MST->getBasePtr().isUndef() &&
12267         ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12268                                          MST1->getMemoryVT().getStoreSize()) ||
12270         TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12271                             MST->getMemoryVT().getStoreSize())) {
12272       CombineTo(MST1, MST1->getChain());
12273       if (N->getOpcode() != ISD::DELETED_NODE)
12281   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12282       !MST->isCompressingStore() && !MST->isTruncatingStore())
12283     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12284                         MST->getBasePtr(), MST->getPointerInfo(),
12285                         MST->getOriginalAlign(),
12286                         MST->getMemOperand()->getFlags(), MST->getAAInfo());
12292   if (MST->isTruncatingStore() && MST->isUnindexed() &&
12295        !cast<ConstantSDNode>(Value)->isOpaque())) {
12298                              MST->getMemoryVT().getScalarSizeInBits());
12303       // Re-visit the store if anything changed and the store hasn't been merged
12305       // node back to the worklist if necessary, but we also need to re-visit
12307       if (N->getOpcode() != ISD::DELETED_NODE)
12317   if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12318       MST->isUnindexed() && !MST->isCompressingStore() &&
12320                                MST->getMemoryVT(), LegalOperations)) {
12321     auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12324                               MST->getOffset(), Mask, MST->getMemoryVT(),
12325                               MST->getMemOperand(), MST->getAddressingMode(),
12334   EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12335   // Combine strided stores with unit-stride to a regular VP store.
12336   if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12337       CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12338     return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12339                           SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12340                           SST->getVectorLength(), SST->getMemoryVT(),
12341                           SST->getMemOperand(), SST->getAddressingMode(),
12342                           SST->isTruncatingStore(), SST->isCompressingStore());
12349   SDValue Vec = N->getOperand(0);
12350   SDValue Mask = N->getOperand(1);
12351   SDValue Passthru = N->getOperand(2);
12398   SDValue Mask = MGT->getMask();
12399   SDValue Chain = MGT->getChain();
12400   SDValue Index = MGT->getIndex();
12401   SDValue Scale = MGT->getScale();
12402   SDValue BasePtr = MGT->getBasePtr();
12403   SDValue VL = MGT->getVectorLength();
12404   ISD::MemIndexType IndexType = MGT->getIndexType();
12407   if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12410         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12411         Ops, MGT->getMemOperand(), IndexType);
12414   if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12417         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12418         Ops, MGT->getMemOperand(), IndexType);
12426   SDValue Mask = MGT->getMask();
12427   SDValue Chain = MGT->getChain();
12428   SDValue Index = MGT->getIndex();
12429   SDValue Scale = MGT->getScale();
12430   SDValue PassThru = MGT->getPassThru();
12431   SDValue BasePtr = MGT->getBasePtr();
12432   ISD::MemIndexType IndexType = MGT->getIndexType();
12437     return CombineTo(N, PassThru, MGT->getChain());
12439   if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12442         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12443         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12446   if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12449         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12450         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12458   SDValue Mask = MLD->getMask();
12463     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12467   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12468       !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12470         N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12471         MLD->getPointerInfo(), MLD->getOriginalAlign(),
12472         MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12485   SDValue Chain = HG->getChain();
12486   SDValue Inc = HG->getInc();
12487   SDValue Mask = HG->getMask();
12488   SDValue BasePtr = HG->getBasePtr();
12489   SDValue Index = HG->getIndex();
12492   EVT MemVT = HG->getMemoryVT();
12493   MachineMemOperand *MMO = HG->getMemOperand();
12494   ISD::MemIndexType IndexType = HG->getIndexType();
12500                    HG->getScale(), HG->getIntID()};
12501   if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL))
12514   EVT EltVT = SLD->getValueType(0).getVectorElementType();
12515   // Combine strided loads with unit-stride to a regular VP load.
12516   if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12517       CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12519         SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12520         SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12521         SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12522         SLD->getMemOperand(), SLD->isExpandingLoad());
12531   SDValue Cond = N->getOperand(0);
12532   SDValue N1 = N->getOperand(1);
12533   SDValue N2 = N->getOperand(2);
12534   EVT VT = N->getValueType(0);
12558     const APInt &C1 = N1Elt->getAsAPIntVal();
12559     const APInt &C2 = N2Elt->getAsAPIntVal();
12562     if (C1 != C2 - 1)
12566   // Further simplifications for the extra-special cases where the constants are
12567   // all 0 or all -1 should be implemented as folds of these patterns.
12570     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12571     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12577   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12589   // The general case for select-of-constants:
12590   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12592   // leave that to a machine-specific pass.
12597   SDValue N0 = N->getOperand(0);
12598   SDValue N1 = N->getOperand(1);
12599   SDValue N2 = N->getOperand(2);
12612   SDValue N0 = N->getOperand(0);
12613   SDValue N1 = N->getOperand(1);
12614   SDValue N2 = N->getOperand(2);
12615   EVT VT = N->getValueType(0);
12624   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12628   // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12629   if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12640   // vselect (setg[te] X,  0),  X, -X ->
12641   // vselect (setgt    X, -1),  X, -X ->
12642   // vselect (setl[te] X,  0), -X,  X ->
12643   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12646     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12664           DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
12671     // vselect x, y (fcmp lt x, y) -> fminnum x, y
12672     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12678         isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
12691     // TODO: This could be extended to handle non-loading patterns, but that
12707         //   vselect (ext (setcc load(X), C)), N1, N2 -->
12745         // x <= x+y ? x+y : ~0 --> uaddsat x, y
12746         // x+y >= x ? x+y : ~0 --> uaddsat x, y
12757           // x >= ~C ? x+C : ~0 --> uaddsat x, C
12759             return Cond->getAPIntValue() == ~Op->getAPIntValue();
12781       // zext(x) >= y ? trunc(zext(x) - y) : 0
12782       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12783       // zext(x) >  y ? trunc(zext(x) - y) : 0
12784       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12802           // x >= y ? x-y : 0 --> usubsat x, y
12803           // x >  y ? x-y : 0 --> usubsat x, y
12814               // x > C-1 ? x+-C : 0 --> usubsat x, C
12818                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12831               // x s< 0 ? x^C : 0 --> usubsat x, C
12852   // Fold (vselect all_ones, N1, N2) -> N1
12855   // Fold (vselect all_zeros, N1, N2) -> N2
12883   SDValue N0 = N->getOperand(0);
12884   SDValue N1 = N->getOperand(1);
12885   SDValue N2 = N->getOperand(2);
12886   SDValue N3 = N->getOperand(3);
12887   SDValue N4 = N->getOperand(4);
12888   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12891   // fold select_cc lhs, rhs, x, x, cc -> x
12895   // select_cc bool, 0, x, y, seteq -> select bool, y, x
12905     // cond always true -> true val
12906     // cond always false -> false val
12908       return SCCC->isZero() ? N3 : N2;
12912     if (SCC->isUndef())
12920       SelectOp->setFlags(SCC->getFlags());
12938       N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
12940   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12941   EVT VT = N->getValueType(0);
12942   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13008       auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13013         return CNode->getAPIntValue();
13022       if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13030           CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13033               ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13038             OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13050                                             NumBits - ShiftCAmt->getZExtValue())
13052                                            NumBits - ShiftCAmt->getZExtValue());
13069   SDValue LHS = N->getOperand(0);
13070   SDValue RHS = N->getOperand(1);
13071   SDValue Carry = N->getOperand(2);
13072   SDValue Cond = N->getOperand(3);
13076     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13096   ISD::LoadExtType LoadExt = Load->getExtensionType();
13110 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13111 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13112 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13118   unsigned Opcode = N->getOpcode();
13119   SDValue N0 = N->getOperand(0);
13120   EVT VT = N->getValueType(0);
13125   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
13129   SDValue Op1 = N0->getOperand(1);
13130   SDValue Op2 = N0->getOperand(2);
13144   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13145       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13146       (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13152   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
13164   unsigned Opcode = N->getOpcode();
13165   SDValue N0 = N->getOperand(0);
13166   EVT VT = N->getValueType(0);
13171   // fold (sext c1) -> c1
13172   // fold (zext c1) -> c1
13173   // fold (aext c1) -> c1
13177   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13178   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13179   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13180   if (N0->getOpcode() == ISD::SELECT) {
13181     SDValue Op1 = N0->getOperand(1);
13182     SDValue Op2 = N0->getOperand(2);
13188       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13190       // -->
13191       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13192       // -->
13197       return DAG.getSelect(DL, VT, N0->getOperand(0),
13203   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13204   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13205   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13213   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13230     APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13240 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13241 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13250   for (SDUse &Use : N0->uses()) {
13257     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
13258       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13264         SDValue UseOp = User->getOperand(i);
13279     // Remember if this value is live-out.
13280     if (User->getOpcode() == ISD::CopyToReg)
13286     for (SDUse &Use : N->uses()) {
13287       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
13309       SDValue SOp = SetCC->getOperand(j);
13313         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13316     Ops.push_back(SetCC->getOperand(2));
13317     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13323   SDValue N0 = N->getOperand(0);
13324   EVT DstVT = N->getValueType(0);
13327   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13328           N->getOpcode() == ISD::ZERO_EXTEND) &&
13345   // All legal types, and illegal non-vector types, are handled elsewhere.
13348   if (N0->getOpcode() != ISD::LOAD)
13354       !N0.hasOneUse() || !LN0->isSimple() ||
13360   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13364       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13387   SDValue BasePtr = LN0->getBasePtr();
13392         DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
13393                        BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
13394                        SplitSrcVT, LN0->getOriginalAlign(),
13395                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13415   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
13420 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13423   assert(N->getOpcode() == ISD::ZERO_EXTEND);
13424   EVT VT = N->getValueType(0);
13425   EVT OrigVT = N->getOperand(0).getValueType();
13430   SDValue N0 = N->getOperand(0);
13437   SDValue N1 = N0->getOperand(0);
13447   EVT MemVT = Load->getMemoryVT();
13449       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13468                                    Load->getChain(), Load->getBasePtr(),
13469                                    Load->getMemoryVT(), Load->getMemOperand());
13486                                 Load->getValueType(0), ExtLoad);
13501   unsigned CastOpcode = Cast->getOpcode();
13508   // obfuscated by target-specific operations after legalization. Do not create
13510   EVT VT = Cast->getValueType(0);
13514   SDValue VSel = Cast->getOperand(0);
13525   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13532     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13533     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13541 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13542 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13555   EVT MemVT = LN0->getMemoryVT();
13556   if ((LegalOperations || !LN0->isSimple() ||
13562       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13563                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13566   if (LN0->use_empty())
13571 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13588     for (SDNode *User : N0->users()) {
13589       if (User->getOpcode() == ISD::SETCC) {
13590         ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13604        !cast<LoadSDNode>(N0)->isSimple()) &&
13618   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13619                                    LN0->getBasePtr(), N0.getValueType(),
13620                                    LN0->getMemOperand());
13644   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13647   if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13648       !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13655   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13657       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13658       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13659       ExtLoadType, Ld->isExpandingLoad());
13664 // fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13670   if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13672   EVT MemoryVT = ALoad->getMemoryVT();
13676   ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13681   EVT OrigVT = ALoad->getValueType(0);
13684       ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13685       ALoad->getBasePtr(), ALoad->getMemOperand()));
13686   NewALoad->setExtensionType(ExtLoadType);
13697   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13698           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13700   SDValue SetCC = N->getOperand(0);
13707   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13708   EVT VT = N->getValueType(0);
13715     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13716     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13718     unsigned ShCt = VT.getSizeInBits() - 1;
13724         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13732   SDValue N0 = N->getOperand(0);
13738   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13739   EVT VT = N->getValueType(0);
13743   // Propagate fast-math-flags.
13744   SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13787         // Match a simple, non-extended load that can be converted to a
13788         // legal {z/s}ext-load.
13789         // TODO: Allow widening of an existing {z/s}ext-load?
13792               cast<LoadSDNode>(V)->isSimple() &&
13796         // Non-chain users of this value must either be the setcc in this
13797         // sequence or extends that can be folded into the new {z/s}ext-load.
13798         for (SDUse &Use : V->uses()) {
13806           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13820   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13821   // Here, T can be 1 or -1, depending on the type of the setcc and
13828   // sext(i1 1), that is, -1.
13856   SDValue N0 = N->getOperand(0);
13857   EVT VT = N->getValueType(0);
13871   // fold (sext (sext x)) -> (sext x)
13872   // fold (sext (aext x)) -> (sext x)
13876   // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13877   // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13883   // fold (sext (sext_inreg x)) -> (sext (trunc x))
13886     EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13895     // fold (sext (truncate (load x))) -> (sext (smaller load x))
13896     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13914     if (N0->getFlags().hasNoSignedWrap() ||
13915         DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
13932       Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
13936     // fold (sext (truncate x)) -> (sextinreg x).
13974   // fold (sext (and/or/xor (load x), cst)) ->
13981     EVT MemVT = LN00->getMemoryVT();
13983       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13989                                          LN00->getChain(), LN00->getBasePtr(),
13990                                          LN00->getMemoryVT(),
13991                                          LN00->getMemOperand());
14009                                       LN00->getValueType(0), ExtLoad);
14023   // fold (sext x) -> (zext x) if the sign bit is known zero.
14033   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14042   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14051   // fold sext (not i1 X) -> add (zext i1 X), -1
14058       // Returning N0 is a form of in-visit replacement that may have
14080 /// Given an extending node with a pop-count operand, if the target does not
14081 /// support a pop-count in the narrow source type but does support it in the
14082 /// destination type, widen the pop-count to the destination type.
14084   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14085           Extend->getOpcode() == ISD::ANY_EXTEND) &&
14088   SDValue CtPop = Extend->getOperand(0);
14092   EVT VT = Extend->getValueType(0);
14098   // zext (ctpop X) --> ctpop (zext X)
14106   assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14108   EVT VT = Extend->getValueType(0);
14112   SDValue Abs = Extend->getOperand(0);
14131   SDValue N0 = N->getOperand(0);
14132   EVT VT = N->getValueType(0);
14146   // fold (zext (zext x)) -> (zext x)
14147   // fold (zext (aext x)) -> (zext x)
14151       Flags.setNonNeg(N0->getFlags().hasNonNeg());
14155   // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14156   // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14161   // fold (zext (truncate x)) -> (zext x) or
14162   //      (zext (truncate x)) -> (truncate x)
14182   // fold (zext (truncate x)) -> (and x, mask)
14184     // fold (zext (truncate (load x))) -> (zext (smaller load x))
14185     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14199     if (N->getFlags().hasNonNeg()) {
14205       if (N0->getFlags().hasNoSignedWrap() ||
14206           DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14230     // possibly over several sub-vectors.
14255   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
14272           ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
14290   // fold (zext (and/or/xor (load x), cst)) ->
14299     EVT MemVT = LN00->getMemoryVT();
14301         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
14307           EVT LoadResultTy = AndC->getValueType(0);
14318                                          LN00->getChain(), LN00->getBasePtr(),
14319                                          LN00->getMemoryVT(),
14320                                          LN00->getMemOperand());
14338                                       LN00->getValueType(0), ExtLoad);
14346   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14360     // Propagate fast-math-flags.
14361     SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14376         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14393     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
14400             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14404   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
14414           unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
14416           if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14422             if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
14450   if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
14451     SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
14460   SDValue N0 = N->getOperand(0);
14461   EVT VT = N->getValueType(0);
14471   // fold (aext (aext x)) -> (aext x)
14472   // fold (aext (zext x)) -> (zext x)
14473   // fold (aext (sext x)) -> (sext x)
14478       Flags.setNonNeg(N0->getFlags().hasNonNeg());
14482   // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14483   // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14484   // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14490   // fold (aext (truncate (load x))) -> (aext (smaller load x))
14491   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14508   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14520   // fold (aext (load x)) -> (aext (truncate (extload x)))
14539       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14540                                        LN0->getBasePtr(), N0.getValueType(),
14541                                        LN0->getMemOperand());
14558   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14559   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14560   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
14564     ISD::LoadExtType ExtType = LN0->getExtensionType();
14565     EVT MemVT = LN0->getMemoryVT();
14568           DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14569                          MemVT, LN0->getMemOperand());
14578     // Propagate fast-math-flags.
14579     SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14582     // aext(setcc) -> vsetcc
14583     // aext(setcc) -> truncate(vsetcc)
14584     // aext(setcc) -> aext(vsetcc)
14598                             cast<CondCodeSDNode>(N0.getOperand(2))->get());
14606           cast<CondCodeSDNode>(N0.getOperand(2))->get());
14610     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14614             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14628   unsigned Opcode = N->getOpcode();
14629   SDValue N0 = N->getOperand(0);
14630   SDValue N1 = N->getOperand(1);
14631   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14633   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14635       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14643     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14644     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14647     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14652     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14662     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14667       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14677   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14678   SDValue N0 = N->getOperand(0);
14680   // Fold (assertalign (assertalign x, AL0), AL1) ->
14684                               std::max(AL, AAN->getAlign()));
14717   unsigned Opc = N->getOpcode();
14720   SDValue N0 = N->getOperand(0);
14721   EVT VT = N->getValueType(0);
14734   // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14741     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14743     // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14750     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14755     ShAmt = N1C->getZExtValue();
14756     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14761     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14766     if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14767          LN->getExtensionType() == ISD::ZEXTLOAD) &&
14768         LN->getExtensionType() != ExtType)
14771     // An AND with a constant mask is the same as a truncate + zero-extend.
14772     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14776     const APInt &Mask = AndC->getAPIntValue();
14798     // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14799     // check below? And maybe it could be non-profitable to do the transform in
14815     ShAmt = SRL1C->getZExtValue();
14816     uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14820     // Because a SRL must be assumed to *need* to zero-extend the high bits
14823     if (LN->getExtensionType() == ISD::SEXTLOAD)
14829     //   (i64 (truncate (i96 (srl (load x), 64)))) ->
14831     if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14837       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14842     SDNode *Mask = *(SRL->user_begin());
14843     if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14844         isa<ConstantSDNode>(Mask->getOperand(1))) {
14846       const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14875   //   (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14881       ShLeftAmt = N01->getZExtValue();
14893   if (!LN0->isSimple() ||
14899         LN0->getMemoryVT().getStoreSizeInBits().getFixedValue();
14901     return LVTStoreBits - EVTStoreBits - ShAmt;
14913       DAG.getMemBasePlusOffset(LN0->getBasePtr(), TypeSize::getFixed(PtrOff),
14919     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14920                        LN0->getPointerInfo().getWithOffset(PtrOff),
14921                        LN0->getOriginalAlign(),
14922                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14924     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14925                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14926                           LN0->getOriginalAlign(),
14927                           LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14962   SDValue N0 = N->getOperand(0);
14963   SDValue N1 = N->getOperand(1);
14964   EVT VT = N->getValueType(0);
14965   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14974   // fold (sext_in_reg c1) -> c1
14983   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14985       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14988   // fold (sext_in_reg (sext x)) -> (sext x)
14989   // fold (sext_in_reg (aext x)) -> (sext x)
15001   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15019   // fold (sext_in_reg (zext x)) -> (sext x)
15028   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15029   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15037   // fold (sext_in_reg (load x)) -> (smaller sextload x)
15038   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15042   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15043   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15044   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15047       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15051         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15057   // fold (sext_inreg (extload x)) -> (sextload x)
15062       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15063       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15068         DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15069                        LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15076   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15078       N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15079       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15083         DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15084                        LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15090   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15093     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15094         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15097           VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15098           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15099           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15106   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15108     if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15110       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
15111                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
15114           DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15115           GN0->getIndexType(), ISD::SEXTLOAD);
15134   //      -> (extract_subvector (signext iN_v to iM))
15138     EVT InnerExtVT = InnerExt->getValueType(0);
15139     SDValue Extendee = InnerExt->getOperand(0);
15157   unsigned InregOpcode = N->getOpcode();
15160   SDValue Src = N->getOperand(0);
15161   EVT VT = N->getValueType(0);
15169   // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15170   // FIXME: one-use check may be overly restrictive
15187   SDValue N0 = N->getOperand(0);
15188   EVT VT = N->getValueType(0);
15194     return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
15213   EVT VT = N->getValueType(0);
15214   SDValue N0 = N->getOperand(0);
15305   auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
15330   SDValue N0 = N->getOperand(0);
15331   EVT VT = N->getValueType(0);
15340   // fold (truncate (truncate x)) -> (truncate x)
15348   // fold (truncate c1) -> c1
15352   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
15367   // Try to narrow a truncate-of-sext_in_reg to the destination type:
15368   // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
15373     EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
15381   if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
15384   // Fold extract-and-trunc into a narrow extract. For example:
15387   //        -- becomes --
15395       N0->hasOneUse()) {
15396     EVT TrTy = N->getValueType(0);
15401     if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
15421       SDValue EltNo = Src->getOperand(1);
15423         int Elt = EltNo->getAsZExtVal();
15425                          : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
15433   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
15447   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
15471   // Attempt to pre-truncate BUILD_VECTOR sources.
15479     for (const SDValue &Op : N0->op_values()) {
15486   // trunc (splat_vector x) -> splat_vector (trunc x)
15492         VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
15513       unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
15527   // fold (truncate (load x)) -> (smaller load x)
15528   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
15537       if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
15539             LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
15540             LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
15547   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
15562       // Stop if more than one members are non-undef.
15593   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
15600       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
15606   // Simplify the operands using demanded-bits information.
15610   // fold (truncate (extract_subvector(ext x))) ->
15619       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15621         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15629   // Narrow a suitable binary operation with a non-opaque constant operand by
15630   // moving it ahead of the truncate. This is limited to pre-legalization
15643       // TODO: We already restricted this to pre-legalization, but for vectors
15645       // Target-specific changes are likely needed to avoid regressions here.
15655     // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15656     // (trunc uaddo_carry(X, Y, Carry)) ->
15662         N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15665       SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15670     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15688   SDValue Elt = N->getOperand(i);
15694 /// build_pair (load, load) -> load
15697   assert(N->getOpcode() == ISD::BUILD_PAIR);
15709       !LD1->hasOneUse() || !LD2->hasOneUse() ||
15710       LD1->getAddressSpace() != LD2->getAddressSpace())
15714   EVT LD1VT = LD1->getValueType(0);
15719                              *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15720     return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15721                        LD1->getPointerInfo(), LD1->getAlign());
15727   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15728   // and Lo parts; on big-endian machines it doesn't.
15735   // IEEE754-compliant FP logic, we're done.
15736   EVT VT = N->getValueType(0);
15737   SDValue N0 = N->getOperand(0);
15780   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15781   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15782   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15786   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15800   SDValue N0 = N->getOperand(0);
15801   EVT VT = N->getValueType(0);
15816       N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15817       cast<BuildVectorSDNode>(N0)->isConstant())
15824     // a fp -> int or int -> conversion and that the resulting operation will
15837   // (conv (conv x, t1), t2) -> (conv x, t2)
15841   // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15849               V->hasOneUse());
15857   // fold (conv (load x)) -> (load (conv*)x)
15868       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15873                                     *LN0->getMemOperand())) {
15875           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15876                       LN0->getMemOperand());
15885   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15886   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15889   // fold (bitcast (fneg x)) ->
15893   // fold (bitcast (fabs x)) ->
15899       N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15937   // fold (bitconvert (fcopysign cst, x)) ->
15943   // fold (bitcast (fcopysign cst, x)) ->
15948   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15968                         DAG.getConstant(OrigXWidth-VTWidth, DL,
16011   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16024   // Remove double bitcasts from shuffles - this is often a legacy of
16027   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16029       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16048     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16049     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16056     for (int M : SVN->getMask())
16058         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16070   EVT VT = N->getValueType(0);
16075   SDValue N0 = N->getOperand(0);
16086   // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16089   // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16090   // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16091   // the freeze through to the operands that are not guaranteed non-poison.
16092   // NOTE: we will strip poison-generating flags, so ignore them here.
16095       N0->getNumValues() != 1 || !N0->hasOneUse())
16119       for (const SDValue &Op : N0->op_values())
16128   for (auto [OpNo, Op] : enumerate(N0->ops())) {
16130                                              /*Depth*/ 1))
16139       // Multiple maybe-poison ops when not allowed - bail out.
16144   // it could create undef or poison due to it's poison-generating flags.
16145   // So not finding any maybe-poison operands is fine.
16158     SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
16176   if (N->getOpcode() == ISD::DELETED_NODE)
16180   // may no longer be valid. Re-fetch the operand we're `freeze`ing.
16181   N0 = N->getOperand(0);
16185   SmallVector<SDValue> Ops(N0->ops());
16186   // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
16196                              SVN->getMask());
16199     R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
16210   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
16219   // type, convert each element.  This handles FP<->INT cases.
16222     for (SDValue Op : BV->op_values()) {
16231                               BV->getValueType(0).getVectorNumElements());
16267   if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
16283 // Returns true if floating point contraction is allowed on the FMUL-SDValue
16289          N->getFlags().hasAllowContract();
16294   return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
16300   SDValue N0 = N->getOperand(0);
16301   SDValue N1 = N->getOperand(1);
16302   EVT VT = N->getValueType(0);
16309   // Floating-point multiply-add with intermediate rounding.
16314   // Floating-point multiply-add without intermediate rounding.
16326   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16329   // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
16352     return AllowFusionGlobally || N->getFlags().hasAllowContract();
16357     if (N0->use_size() > N1->use_size())
16361   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
16362   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
16367   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
16369   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
16374   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
16375   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
16377   // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
16379   // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
16383       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16396       SDValue FMul = TmpFMA->getOperand(2);
16407       TmpFMA = TmpFMA->getOperand(2);
16413   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
16426   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
16443     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
16467     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
16468     // FIXME: This turns two single-precision and one double-precision
16469     // operation into two double-precision operations, which might not be
16496     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
16512     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
16513     // FIXME: This turns two single-precision and one double-precision
16514     // operation into two double-precision operations, which might not be
16537   SDValue N0 = N->getOperand(0);
16538   SDValue N1 = N->getOperand(1);
16539   EVT VT = N->getValueType(0);
16546   // Floating-point multiply-add with intermediate rounding.
16551   // Floating-point multiply-add without intermediate rounding.
16560   const SDNodeFlags Flags = N->getFlags();
16565   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16581     return AllowFusionGlobally || N->getFlags().hasAllowContract();
16584   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16586     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
16594   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16597     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
16609       (N0->use_size() > N1->use_size())) {
16610     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
16613     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
16617     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16620     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16625   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
16627       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
16638   //   -> (fma (fpext x), (fpext y), (fneg z))
16653   //   -> (fma (fneg (fpext y)), (fpext z), x)
16670   //   -> (fneg (fma (fpext x), (fpext y), z))
16673   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16694   //   -> (fneg (fma (fpext x)), (fpext y), z)
16697   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16718     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16732     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16734     //   -> (fma x, y (fma u, v, (fneg z)))
16737         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16747     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
16750         N1->hasOneUse() && NoSignedZero) {
16762     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16763     if (isFusedOp(N0) && N0->hasOneUse()) {
16782     //   -> (fma (fpext x), (fpext y),
16784     // FIXME: This turns two single-precision and one double-precision
16785     // operation into two double-precision operations, which might not be
16808     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16810         N1->hasOneUse()) {
16830     //   -> (fma (fneg (fpext y)), (fpext z),
16832     // FIXME: This turns two single-precision and one double-precision
16833     // operation into two double-precision operations, which might not be
16866   SDValue N0 = N->getOperand(0);
16867   SDValue N1 = N->getOperand(1);
16868   EVT VT = N->getValueType(0);
16871   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16881   // Floating-point multiply-add without intermediate rounding.
16887   // Floating-point multiply-add with intermediate rounding. This can result
16900   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16901   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16903     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16905         if (C->isExactlyValue(+1.0))
16908         if (C->isExactlyValue(-1.0))
16921   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16922   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16923   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16924   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16926     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16928         if (C0->isExactlyValue(+1.0))
16932         if (C0->isExactlyValue(-1.0))
16938         if (C1->isExactlyValue(+1.0))
16941         if (C1->isExactlyValue(-1.0))
16960   // FADD -> FMA combines:
16970   SDValue N0 = N->getOperand(0);
16971   SDValue N1 = N->getOperand(1);
16974   EVT VT = N->getValueType(0);
16977   SDNodeFlags Flags = N->getFlags();
16980   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16983   // fold (fadd c1, c2) -> c1 + c2
16996   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16998   if (N1C && N1C->isZero())
16999     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
17005   // fold (fadd A, (fneg B)) -> (fsub A, B)
17011   // fold (fadd (fneg A), B) -> (fsub B, A)
17021     return C && C->isExactlyValue(-2.0);
17024   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17030   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17043     // If allowed, fold (fadd (fneg x), x) -> 0.0
17047     // If allowed, fold (fadd x, (fneg x)) -> 0.0
17058     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17073         // (fadd (fmul x, c), x) -> (fmul x, c+1)
17080         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17094         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17101         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17113         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17123         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17131       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17141     // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17145   } // enable-unsafe-fp-math
17147   // FADD -> FMA combines:
17157   SDValue Chain = N->getOperand(0);
17158   SDValue N0 = N->getOperand(1);
17159   SDValue N1 = N->getOperand(2);
17160   EVT VT = N->getValueType(0);
17161   EVT ChainVT = N->getValueType(1);
17165   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17173   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
17184   SDValue N0 = N->getOperand(0);
17185   SDValue N1 = N->getOperand(1);
17188   EVT VT = N->getValueType(0);
17191   const SDNodeFlags Flags = N->getFlags();
17194   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17197   // fold (fsub c1, c2) -> c1-c2
17209   // (fsub A, 0) -> A
17210   if (N1CFP && N1CFP->isZero()) {
17211     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
17218     // (fsub x, x) -> 0.0
17223   // (fsub -0.0, N1) -> -N1
17224   if (N0CFP && N0CFP->isZero()) {
17225     if (N0CFP->isNegative() ||
17227       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
17245     // X - (X + Y) -> -Y
17246     if (N0 == N1->getOperand(0))
17247       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
17248     // X - (Y + X) -> -Y
17249     if (N0 == N1->getOperand(1))
17250       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
17253   // fold (fsub A, (fneg B)) -> (fadd A, B)
17258   // FSUB -> FMA combines:
17269 //          -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
17271 //          -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
17283   EVT VT = N->getValueType(0);
17288     if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
17291     ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
17292     Pow2Op = N->getOperand(1 - ConstOpIdx);
17308       const APFloat &APF = CFP->getValueAPF();
17319           N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
17322           N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
17328       int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
17367       DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
17374   SDValue N0 = N->getOperand(0);
17375   SDValue N1 = N->getOperand(1);
17377   EVT VT = N->getValueType(0);
17380   const SDNodeFlags Flags = N->getFlags();
17383   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17386   // fold (fmul c1, c2) -> c1*c2
17404     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
17418     // Match a special-case: we convert X * 2.0 into fadd.
17419     // fmul (fadd X, X), C -> fmul X, 2.0 * C
17427     // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
17433   // fold (fmul X, 2.0) -> (fadd X, X)
17434   if (N1CFP && N1CFP->isExactlyValue(+2.0))
17437   // fold (fmul X, -1.0) -> (fsub -0.0, X)
17438   if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
17441                          DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
17445   // -N0 * -N1 --> N0 * N1
17461   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
17462   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
17477         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
17478       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17495         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
17499         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
17507   // FMUL -> FMA combines:
17513   // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
17522   SDValue N0 = N->getOperand(0);
17523   SDValue N1 = N->getOperand(1);
17524   SDValue N2 = N->getOperand(2);
17527   EVT VT = N->getValueType(0);
17536           DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
17539   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
17557     if (N0CFP && N0CFP->isZero())
17559     if (N1CFP && N1CFP->isZero())
17564   if (N0CFP && N0CFP->isExactlyValue(1.0))
17566   if (N1CFP && N1CFP->isExactlyValue(1.0))
17569   // Canonicalize (fma c, x, y) -> (fma x, c, y)
17575       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17577     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
17586     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
17596   // (fma x, -1, y) -> (fadd (fneg x), y)
17599     if (N1CFP->isExactlyValue(1.0))
17602     if (N1CFP->isExactlyValue(-1.0) &&
17609     // fma (fneg x), K, y -> fma x -K, y
17613           !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
17621     // (fma x, c, x) -> (fmul x, (c+1))
17628     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
17632                                              DAG.getConstantFP(-1.0, DL, VT)));
17636   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
17637   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
17646   SDValue N0 = N->getOperand(0);
17647   SDValue N1 = N->getOperand(1);
17648   SDValue N2 = N->getOperand(2);
17649   EVT VT = N->getValueType(0);
17661 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17667   // TODO: Limit this transform based on optsize/minsize - it always creates at
17671   const SDNodeFlags Flags = N->getFlags();
17675   // Skip if current node is a reciprocal/fneg-reciprocal.
17676   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17678   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17688   EVT VT = N->getValueType(0);
17692   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17698   for (auto *U : N1->users()) {
17699     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17701       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17702           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17703           U->getFlags().hasAllowReassociation() &&
17704           U->getFlags().hasNoSignedZeros())
17709       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17723   // Dividend / Divisor -> Dividend * Reciprocal
17725     SDValue Dividend = U->getOperand(0);
17731       // In the absence of fast-math-flags, this user node is always the
17740   SDValue N0 = N->getOperand(0);
17741   SDValue N1 = N->getOperand(1);
17742   EVT VT = N->getValueType(0);
17745   SDNodeFlags Flags = N->getFlags();
17748   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17751   // fold (fdiv c1, c2) -> c1/c2
17766   // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17770     const APFloat &N1APF = N1CFP->getValueAPF();
17790     // into a target-specific square root estimate instruction.
17825             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17832             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17833             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17846         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17861   // Fold X/Sqrt(X) -> Sqrt(X)
17867   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17890   SDValue N0 = N->getOperand(0);
17891   SDValue N1 = N->getOperand(1);
17892   EVT VT = N->getValueType(0);
17893   SDNodeFlags Flags = N->getFlags();
17897   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17900   // fold (frem c1, c2) -> fmod(c1,c2)
17907   // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
17933   SDNodeFlags Flags = N->getFlags();
17942   SDValue N0 = N->getOperand(0);
17948   //       transform the fdiv, we may produce a sub-optimal estimate sequence
17954 /// copysign(x, fp_extend(y)) -> copysign(x, y)
17955 /// copysign(x, fp_round(y)) -> copysign(x, y)
17958   // Always fold no-op FP casts.
17973   SDValue N1 = N->getOperand(1);
17977   EVT N1VT = N1->getValueType(0);
17978   EVT N1Op0VT = N1->getOperand(0).getValueType();
17983   SDValue N0 = N->getOperand(0);
17984   SDValue N1 = N->getOperand(1);
17985   EVT VT = N->getValueType(0);
17988   // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17992   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17993     const APFloat &V = N1C->getValueAPF();
17994     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
17995     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
18006   // copysign(fabs(x), y) -> copysign(x, y)
18007   // copysign(fneg(x), y) -> copysign(x, y)
18008   // copysign(copysign(x,z), y) -> copysign(x, y)
18013   // copysign(x, abs(y)) -> abs(x)
18017   // copysign(x, copysign(y,z)) -> copysign(x, z)
18021   // copysign(x, fp_extend(y)) -> copysign(x, y)
18022   // copysign(x, fp_round(y)) -> copysign(x, y)
18032   // We only take the non-sign bits from the value operand
18041   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18050   EVT VT = N->getValueType(0);
18051   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18052       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18053     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18054     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18055     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
18059     SDNodeFlags Flags = N->getFlags();
18071     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18077   // power-of-2 fractional exponents.
18078   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18079   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18081     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18082     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
18083     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18084     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
18088     SDNodeFlags Flags = N->getFlags();
18104     // pow(X, 0.25) --> sqrt(sqrt(X))
18106     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18110     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18120   // replacing casts with a libcall. We also must be allowed to ignore -0.0
18121   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
18123   // FIXME: We should be able to use node-level FMF here.
18125   EVT VT = N->getValueType(0);
18131   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18132   SDValue N0 = N->getOperand(0);
18133   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18137   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18145   SDValue N0 = N->getOperand(0);
18146   EVT VT = N->getValueType(0);
18154   // fold (sint_to_fp c1) -> c1fp
18155   // ...but only if the target supports immediate floating-point values
18170   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18174     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18177   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18193   SDValue N0 = N->getOperand(0);
18194   EVT VT = N->getValueType(0);
18202   // fold (uint_to_fp c1) -> c1fp
18203   // ...but only if the target supports immediate floating-point values
18217   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
18229 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
18231   SDValue N0 = N->getOperand(0);
18232   EVT VT = N->getValueType(0);
18240   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
18251   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
18272   SDValue N0 = N->getOperand(0);
18273   EVT VT = N->getValueType(0);
18276   // fold (fp_to_sint undef) -> undef
18280   // fold (fp_to_sint c1fp) -> c1
18288   SDValue N0 = N->getOperand(0);
18289   EVT VT = N->getValueType(0);
18292   // fold (fp_to_uint undef) -> undef
18296   // fold (fp_to_uint c1fp) -> c1
18304   SDValue N0 = N->getOperand(0);
18305   EVT VT = N->getValueType(0);
18307   // fold (lrint|llrint undef) -> undef
18308   // fold (lround|llround undef) -> undef
18312   // fold (lrint|llrint c1fp) -> c1
18313   // fold (lround|llround c1fp) -> c1
18315           DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
18322   SDValue N0 = N->getOperand(0);
18323   SDValue N1 = N->getOperand(1);
18324   EVT VT = N->getValueType(0);
18327   // fold (fp_round c1fp) -> c1fp
18331   // fold (fp_round (fp_extend x)) -> x
18335   // fold (fp_round (fp_round x)) -> (fp_round x)
18337     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
18340     // Avoid folding legal fp_rounds into non-legal ones.
18348     // instructions from f32 or f64.  Moreover, the first (value-preserving)
18356     // single-step fp_round we want to fold to.
18365   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
18370   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
18386   SDValue N0 = N->getOperand(0);
18387   EVT VT = N->getValueType(0);
18395   if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
18398   // fold (fp_extend c1fp) -> c1fp
18402   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
18407   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
18417   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
18422                                      LN0->getChain(),
18423                                      LN0->getBasePtr(), N0.getValueType(),
18424                                      LN0->getMemOperand());
18441   SDValue N0 = N->getOperand(0);
18442   EVT VT = N->getValueType(0);
18444   // fold (fceil c1) -> fceil(c1)
18452   SDValue N0 = N->getOperand(0);
18453   EVT VT = N->getValueType(0);
18455   // fold (ftrunc c1) -> ftrunc(c1)
18459   // fold ftrunc (known rounded int x) -> x
18477   SDValue N0 = N->getOperand(0);
18479   // fold (ffrexp c1) -> ffrexp(c1)
18481     return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
18486   SDValue N0 = N->getOperand(0);
18487   EVT VT = N->getValueType(0);
18489   // fold (ffloor c1) -> ffloor(c1)
18497   SDValue N0 = N->getOperand(0);
18498   EVT VT = N->getValueType(0);
18509   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
18515        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
18527   SDValue N0 = N->getOperand(0);
18528   SDValue N1 = N->getOperand(1);
18529   EVT VT = N->getValueType(0);
18530   const SDNodeFlags Flags = N->getFlags();
18531   unsigned Opc = N->getOpcode();
18543     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
18546     const APFloat &AF = N1CFP->getValueAPF();
18548     // minnum(X, nan) -> X
18549     // maxnum(X, nan) -> X
18550     // minimum(X, nan) -> nan
18551     // maximum(X, nan) -> nan
18553       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
18558       // minnum(X, -inf) -> -inf
18559       // maxnum(X, +inf) -> +inf
18560       // minimum(X, -inf) -> -inf if nnan
18561       // maximum(X, +inf) -> +inf if nnan
18563         return N->getOperand(1);
18565       // minnum(X, +inf) -> X if nnan
18566       // maxnum(X, -inf) -> X if nnan
18567       // minimum(X, +inf) -> X
18568       // maximum(X, -inf) -> X
18570         return N->getOperand(0);
18585   SDValue N0 = N->getOperand(0);
18586   EVT VT = N->getValueType(0);
18589   // fold (fabs c1) -> fabs(c1)
18593   // fold (fabs (fabs x)) -> (fabs x)
18595     return N->getOperand(0);
18597   // fold (fabs (fneg x)) -> (fabs x)
18598   // fold (fabs (fcopysign x, y)) -> (fabs x)
18609   SDValue Chain = N->getOperand(0);
18610   SDValue N1 = N->getOperand(1);
18611   SDValue N2 = N->getOperand(2);
18615   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
18617                        N1->getOperand(0), N2, N->getFlags());
18628   // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
18629   // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
18630   if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
18631     SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
18632     ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
18639       bool False = (Cond == ISD::SETULT && C->isZero()) ||
18640                    (Cond == ISD::SETLT && C->isMinSignedValue()) ||
18641                    (Cond == ISD::SETUGT && C->isAllOnes()) ||
18642                    (Cond == ISD::SETGT && C->isMaxSignedValue());
18643       bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
18644                   (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
18645                   (Cond == ISD::SETUGE && C->isZero()) ||
18646                   (Cond == ISD::SETGE && C->isMinSignedValue());
18650     if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
18652         S0 = S0->getOperand(0);
18656     if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
18658         S1 = S1->getOperand(0);
18666           DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
18667           N->getFlags());
18692                          ChainHandle.getValue(), NewN1, N2, N->getFlags());
18722     // SRL constant is equal to the log2 of the AND constant. The back-end is
18731         const APInt &AndConst = AndOp1->getAsAPIntVal();
18734             Op1->getAsAPIntVal() == AndConst.logBase2()) {
18744   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18745   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18758       // Returning N is form in-visit replacement that may invalidated
18769     SDValue Op0 = N->getOperand(0);
18770     SDValue Op1 = N->getOperand(1);
18774       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18778         Op0 = N->getOperand(0);
18779         Op1 = N->getOperand(1);
18787       // it would introduce illegal operations post-legalization as this can
18788       // result in infinite looping between converting xor->setcc here, and
18789       // expanding setcc->xor in LegalizeSetCCCondCode if requested.
18802   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18803   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18813                                CondLHS, CondRHS, CC->get(), SDLoc(N),
18820                        N->getOperand(0), Simp.getOperand(2),
18822                        N->getOperand(4));
18831     if (LD->isIndexed())
18833     EVT VT = LD->getMemoryVT();
18836     Ptr = LD->getBasePtr();
18838     if (ST->isIndexed())
18840     EVT VT = ST->getMemoryVT();
18843     Ptr = ST->getBasePtr();
18846     if (LD->isIndexed())
18848     EVT VT = LD->getMemoryVT();
18852     Ptr = LD->getBasePtr();
18855     if (ST->isIndexed())
18857     EVT VT = ST->getMemoryVT();
18861     Ptr = ST->getBasePtr();
18870 /// Try turning a load/store into a pre-indexed load/store when the base
18889       Ptr->hasOneUse())
18899   // Backends without true r+i pre-indexed forms may need to pass a
18912   // Try turning it into a pre-indexed load / store except when:
18927     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18928                            : cast<StoreSDNode>(N)->getValue();
18935     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18950     for (SDUse &Use : BasePtr->uses()) {
18960       if (Use.getUser()->getOpcode() != ISD::ADD &&
18961           Use.getUser()->getOpcode() != ISD::SUB) {
18966       SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
18987   for (SDNode *User : Ptr->users()) {
19019   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19038     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19040     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
19048     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19049     // indexed load/store and the expression that needs to be re-written.
19052     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19054     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
19055     const APInt &Offset0 = CN->getAPIntValue();
19056     const APInt &Offset1 = Offset->getAsAPIntVal();
19057     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19058     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19059     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19060     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19065     if (X0 < 0) CNV = -CNV;
19067     else CNV = CNV - Offset1;
19072     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19077                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
19096       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19111   for (SDNode *User : BasePtr->users()) {
19131     if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
19132       for (SDNode *UserUser : User->users())
19148       Ptr->hasOneUse())
19151   // Try turning it into a post-indexed load / store except when
19158   for (SDNode *Op : Ptr->users()) {
19178 /// post-indexed load/store. The transformation folded the add/subtract into the
19209   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
19229 /// Return the base-pointer arithmetic from an indexed \p LD.
19231   ISD::MemIndexedMode AM = LD->getAddressingMode();
19233   SDValue BP = LD->getOperand(1);
19234   SDValue Inc = LD->getOperand(2);
19240           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
19244     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
19245                           ConstInc->getValueType(0));
19259   EVT STMemType = ST->getMemoryVT();
19282   EVT LDMemType = LD->getMemoryVT();
19283   EVT LDType = LD->getValueType(0);
19285          "Attempting to extend value of non-matching type");
19289     switch (LD->getExtensionType()) {
19309   SDValue Chain = LD->getOperand(0);
19313     Chain = Chain->getOperand(0);
19319     for (SDValue Op : Chain->ops()) {
19348   if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
19350   SDValue Chain = LD->getOperand(0);
19355   if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
19358   EVT LDType = LD->getValueType(0);
19359   EVT LDMemType = LD->getMemoryVT();
19360   EVT STMemType = ST->getMemoryVT();
19361   EVT STType = ST->getValue().getValueType();
19369   //     no cost-benefit analysis to prove it's worth it.
19377   // analysis on big-endian platforms it seems better to bail out for now.
19387     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
19389                  8 -
19403   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
19404     if (LD->isIndexed()) {
19421     // Simple case: Direct non-truncating forwarding
19423       return ReplaceLd(LD, ST->getValue(), Chain);
19426         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
19432       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
19437   // Handle some cases for big-endian that would be Offset 0 and handled for
19438   // little-endian.
19439   SDValue Val = ST->getValue();
19451   if (LD->getBasePtr().isUndef() || Offset != 0)
19474   if (Val->use_empty())
19481   SDValue Chain = LD->getChain();
19482   SDValue Ptr   = LD->getBasePtr();
19488   if (LD->isSimple()) {
19489     if (N->getValueType(1) == MVT::Other) {
19491       if (!N->hasAnyUseOfValue(0)) {
19498         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
19504         if (N->use_empty())
19511       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
19519       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
19520         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
19522         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
19528           Index = DAG.getUNDEF(N->getValueType(1));
19529         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
19548   if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
19549       !LD->isAtomic()) {
19551       if (*Alignment > LD->getAlign() &&
19552           isAligned(*Alignment, LD->getSrcValueOffset())) {
19554             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
19555             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
19556             LD->getMemOperand()->getFlags(), LD->getAAInfo());
19564   if (LD->isUnindexed()) {
19565     // Walk up chain skipping non-aliasing memory nodes.
19573       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
19574         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
19575                                BetterChain, Ptr, LD->getMemOperand());
19577         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
19578                                   LD->getValueType(0),
19579                                   BetterChain, Ptr, LD->getMemoryVT(),
19580                                   LD->getMemOperand());
19636       EVT TruncType = LS.Inst->getValueType(0);
19639           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19649       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
19650       if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
19720     // - Start from the truncated value.
19721     // - Zero extend to the desired bit width.
19722     // - Shift left.
19724     unsigned BitWidth = Origin->getValueSizeInBits(0);
19726     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19728     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19746     LLVMContext &Ctxt = *DAG->getContext();
19752     Align Alignment = Origin->getAlign();
19766     if (!Origin->getOffset().isUndef())
19769     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19782     EVT PtrType = Origin->getBasePtr().getValueType();
19795     EVT TruncateType = Inst->getValueType(0);
19808     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19811     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19812     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19820       Offset = TySizeInBytes - Offset - getLoadedSize();
19827   /// \pre this->Inst && this->Origin are valid Instructions and this
19831     assert(Inst && Origin && "Unable to replace a non-existing slice.");
19832     const SDValue &OldBaseAddr = Origin->getBasePtr();
19841       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19842                               DAG->getConstant(Offset, DL, ArithType));
19850         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19851                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
19852                      Origin->getMemOperand()->getFlags());
19855     EVT FinalType = Inst->getValueType(0);
19858           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19867     if (!Inst || !Inst->hasOneUse())
19869     SDNode *User = *Inst->user_begin();
19870     if (User->getOpcode() != ISD::BITCAST)
19873     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19874     EVT ResVT = User->getValueType(0);
19876         TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
19878         TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
19879                            User->getOperand(0)->isDivergent());
19883     // At this point, we know that we perform a cross-register-bank copy.
19885     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
19888     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19894     if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19895                                 Origin->getAddressSpace(), getAlign(),
19896                                 Origin->getMemOperand()->getFlags(), &IsFast) ||
19905     if (Inst->getValueType(0) != getLoadedType())
19961   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19976     EVT LoadedType = First->getLoadedType();
19979     if (LoadedType != Second->getLoadedType())
19990     if (First->getAlign() < RequiredAlignment)
19998     --GlobalLSCost.Loads;
20059   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20060       !LD->getValueType(0).isInteger())
20066   if (LD->getValueType(0).isScalableVector())
20071   APInt UsedBits(LD->getValueSizeInBits(0), 0);
20078   for (SDUse &U : LD->uses()) {
20087     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20088         isa<ConstantSDNode>(User->getOperand(1))) {
20089       Shift = User->getConstantOperandVal(1);
20090       User = *User->user_begin();
20095     if (User->getOpcode() != ISD::TRUNCATE)
20098     // The width of the type must be a power of 2 and greater than 8-bits.
20100     // Moreover, if we shifted with a non-8-bits multiple, the slice
20102     unsigned Width = User->getValueSizeInBits(0);
20140     assert(SliceInst->getOpcode() == ISD::LOAD &&
20160   if (V->getOpcode() != ISD::AND ||
20161       !isa<ConstantSDNode>(V->getOperand(1)) ||
20162       !ISD::isNormalLoad(V->getOperand(0).getNode()))
20166   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20167   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
20178   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
20191     NotMaskLZ -= 64-V.getValueSizeInBits();
20193   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
20198   default: return Result; // All one mask, or 5-byte mask.
20209   else if (Chain->getOpcode() == ISD::TokenFactor &&
20212     if (!LD->isOperandOf(Chain.getNode()))
20231   SelectionDAG &DAG = DC->getDAG();
20246   if (DC->isTypeLegal(VT))
20255   if (St->isIndexed())
20259   if (St->getMemOperand() &&
20261                               *St->getMemOperand()))
20278     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
20280   SDValue Ptr = St->getBasePtr();
20288     return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
20289                              St->getPointerInfo().getWithOffset(StOffset),
20290                              VT, St->getOriginalAlign());
20296       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
20297                 St->getPointerInfo().getWithOffset(StOffset),
20298                 St->getOriginalAlign());
20307   if (!ST->isSimple())
20310   SDValue Chain = ST->getChain();
20311   SDValue Value = ST->getValue();
20312   SDValue Ptr   = ST->getBasePtr();
20315   if (ST->isTruncatingStore() || VT.isVector())
20355     if (LD->getBasePtr() != Ptr ||
20356         LD->getPointerInfo().getAddrSpace() !=
20357         ST->getPointerInfo().getAddrSpace())
20363     APInt Imm = N1->getAsAPIntVal();
20373     unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
20374     unsigned NewBW = NextPowerOf2(MSB - LSB);
20389     // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
20412                                          ? VTStoreSize - NewBW - ShAmt
20418       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20420                                  LD->getAddressSpace(), NewAlign,
20421                                  LD->getMemOperand()->getFlags(), &IsFast) &&
20432     Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20436         DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
20437                     LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
20438                     LD->getMemOperand()->getFlags(), LD->getAAInfo());
20443                      ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
20462   SDValue Value = ST->getValue();
20466     EVT VT = LD->getMemoryVT();
20467     if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
20468         LD->isNonTemporal() || ST->isNonTemporal() ||
20469         LD->getPointerInfo().getAddrSpace() != 0 ||
20470         ST->getPointerInfo().getAddrSpace() != 0)
20487                                 *LD->getMemOperand(), &FastLD) ||
20489                                 *ST->getMemOperand(), &FastST) ||
20493     SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
20494                                 LD->getBasePtr(), LD->getMemOperand());
20496     SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
20497                                  ST->getBasePtr(), ST->getMemOperand());
20511 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
20529   if (AddNode->hasOneUse() &&
20534   for (SDNode *User : ConstNode->users()) {
20538     if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
20543       if (User->getOperand(0) == ConstNode)
20544         OtherOp = User->getOperand(1).getNode();
20546         OtherOp = User->getOperand(0).getNode();
20551       //     User = ConstNode * A  <-- visiting User. OtherOp is A.
20553       //     AddNode  = (A + c1)  <-- MulVar is A.
20554       //         = AddNode * ConstNode   <-- current visiting instruction.
20566       //     ...   = AddNode * ConstNode <-- current visiting instruction.
20569       //     User    = OtherOp * ConstNode <-- visiting User.
20574       if (OtherOp->getOpcode() == ISD::ADD &&
20575           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
20576           OtherOp->getOperand(0).getNode() == MulVar)
20597     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20598       Chains.push_back(StoreNodes[i].MemNode->getChain());
20608     const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
20611     if (MMO->getPseudoValue())
20614     if (!MMO->getValue())
20617     const Value *Obj = getUnderlyingObject(MMO->getValue());
20651       Flags = St->getMemOperand()->getFlags();
20652       AAInfo = St->getAAInfo();
20656     if (Flags != St->getMemOperand()->getFlags())
20659     AAInfo = AAInfo.concat(St->getAAInfo());
20676         SDValue Val = St->getValue();
20691             Val = DAG.getConstant(C->getAPIntValue()
20708         SDValue Val = peekThroughBitcasts(St->getValue());
20752       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20755       SDValue Val = St->getValue();
20759         StoreInt |= C->getAPIntValue()
20763         StoreInt |= C->getValueAPF()
20795         NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20797             ? FirstInChain->getPointerInfo()
20798             : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20799         FirstInChain->getAlign(), *Flags, AAInfo);
20806         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20809         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20811             ? FirstInChain->getPointerInfo()
20812             : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20813         StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20835   SDValue Val = peekThroughBitcasts(St->getValue());
20840   EVT MemVT = St->getMemoryVT();
20846     LoadVT = Ld->getMemoryVT();
20851     if (!Ld->hasNUsesOfValue(1, 0))
20855     if (!Ld->isSimple() || Ld->isIndexed())
20859                             int64_t &Offset) -> bool {
20862     if (!Other->isSimple() || Other->isIndexed())
20864     // Don't mix temporal stores with non-temporal stores.
20865     if (St->isNonTemporal() != Other->isNonTemporal())
20869     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20871     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20872                                            : Other->getMemoryVT() != MemVT;
20882       if (LoadVT != OtherLd->getMemoryVT())
20885       if (!OtherLd->hasNUsesOfValue(1, 0))
20889       if (!OtherLd->isSimple() || OtherLd->isIndexed())
20891       // Don't mix temporal loads with non-temporal loads.
20892       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20909       if (Other->isTruncatingStore())
20932   // |-------|-------|
20940   SDNode *RootNode = St->getChain().getNode();
20948                                         SDNode *RootNode) -> bool {
20951            RootCount->second.first == RootNode &&
20952            RootCount->second.second > StoreMergeDependenceLimit;
20971     RootNode = Ldn->getChain().getNode();
20975     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20977       SDNode *User = I->getUser();
20978       if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
20979         for (SDUse &U2 : User->uses())
20983       if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
20988     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21019     if (N->getOpcode() == ISD::TokenFactor) {
21020       for (SDValue Op : N->ops())
21031     //   * Chain (Op 0) -> We have already considered these
21034     //                     dependency to a load, that has a non-chain dep to
21037     //                     of chain and non-chain deps, and we need to include
21039     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21040     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21043     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21044     //               non-indexed stores). Not constant on all targets (e.g. ARM)
21046     for (const SDValue &Op : N->op_values())
21088     // non-consecutive store memory address.
21095       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21120     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21121     Align FirstStoreAlign = FirstInChain->getAlign();
21129       SDValue StoredVal = ST->getValue();
21132         IsElementZero = C->isZero();
21134         IsElementZero = C->getConstantFPValue()->isNullValue();
21156                                  *FirstInChain->getMemOperand(), &IsFast) &&
21169                                    *FirstInChain->getMemOperand(), &IsFast) &&
21186                                    *FirstInChain->getMemOperand(), &IsFast) &&
21204       // improved or we've dropped a non-zero value. Drop as many
21209              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21213       NumConsecutiveStores -= NumSkip;
21221       NumConsecutiveStores -= NumElem;
21231     NumConsecutiveStores -= NumElem;
21247     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21248     Align FirstStoreAlign = FirstInChain->getAlign();
21263                                  *FirstInChain->getMemOperand(), &IsFast) &&
21279              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21283       NumConsecutiveStores -= NumSkip;
21292       NumConsecutiveStores -= NumStoresToMerge;
21301     NumConsecutiveStores -= NumStoresToMerge;
21326     SDValue Val = peekThroughBitcasts(St->getValue());
21352           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
21361       if (Offset0 - Offset1 == ElementSizeBytes &&
21369     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21370     Align FirstStoreAlign = FirstInChain->getAlign();
21374     // non-consecutive load memory address. These variables hold the index in
21385     SDValue LoadChain = FirstLoad->getChain();
21388       if (LoadNodes[i].MemNode->getChain() != LoadChain)
21392       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21396       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
21417                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
21420                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
21432                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
21435                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
21450                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
21453                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
21472     Align FirstLoadAlign = FirstLoad->getAlign();
21484              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
21485              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21489       NumConsecutiveStores -= NumSkip;
21498       NumConsecutiveStores -= NumElem;
21541           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
21542           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
21547                "Unexpected type for rotate-able load pair");
21554           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
21555           CanReusePtrInfo ? FirstInChain->getPointerInfo()
21562                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
21563                                FirstLoad->getPointerInfo(), JointMemOpVT,
21566           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
21567           CanReusePtrInfo ? FirstInChain->getPointerInfo()
21569           JointMemOpVT, FirstInChain->getAlign(),
21570           FirstInChain->getMemOperand()->getFlags());
21583       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
21585       if (Val->use_empty())
21592     NumConsecutiveStores -= NumElem;
21605   EVT MemVT = St->getMemoryVT();
21611   // This function cannot currently deal with non-byte-sized memory sizes.
21618   SDValue StoredVal = peekThroughBitcasts(St->getValue());
21624   // Find potential store merge candidates by searching through chain sub-DAG
21639   bool IsNonTemporalStore = St->isNonTemporal();
21641                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
21646   // case that a non-mergeable store is found first, e.g., {p[-2],
21694   if (ST->isTruncatingStore()) {
21695     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
21696                                   ST->getBasePtr(), ST->getMemoryVT(),
21697                                   ST->getMemOperand());
21699     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
21700                              ST->getMemOperand());
21705                               MVT::Other, ST->getChain(), ReplStore);
21715   SDValue Value = ST->getValue();
21724   SDValue Chain = ST->getChain();
21725   SDValue Ptr = ST->getBasePtr();
21730   // the number of stores.  For example, on x86-32 an f64 can be stored in one
21735   switch (CFP->getSimpleValueType(0).SimpleTy) {
21745     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21747       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21750       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21756          ST->isSimple()) ||
21758       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21761                           Ptr, ST->getMemOperand());
21764     if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21765         !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21768       // 64-bit integer store into two 32-bit stores.
21769       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
21775       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21776       AAMDNodes AAInfo = ST->getAAInfo();
21778       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21779                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
21782                                  ST->getPointerInfo().getWithOffset(4),
21783                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
21792 // (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21799   SDValue Value = ST->getValue();
21800   SDValue Ptr = ST->getBasePtr();
21801   SDValue Chain = ST->getChain();
21816   if (!Ld || Ld->getBasePtr() != Ptr ||
21817       ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21819       Ld->getAddressSpace() != ST->getAddressSpace() ||
21825                               Elt.getValueType(), ST->getAddressSpace(),
21826                               ST->getAlign(), ST->getMemOperand()->getFlags(),
21831   MachinePointerInfo PointerInfo(ST->getAddressSpace());
21837     unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21839     PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21844   return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21845                       ST->getMemOperand()->getFlags());
21850   SDValue Val = ST->getVal();
21852   EVT MemVT = ST->getMemoryVT();
21868   SDValue Chain = ST->getChain();
21869   SDValue Value = ST->getValue();
21870   SDValue Ptr   = ST->getBasePtr();
21874   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21875       ST->isUnindexed()) {
21883     if (((!LegalOperations && ST->isSimple()) ||
21886                                      DAG, *ST->getMemOperand())) {
21888                           ST->getMemOperand());
21892   // Turn 'store undef, Ptr' -> nothing.
21893   if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
21897   if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21898       !ST->isAtomic()) {
21900       if (*Alignment > ST->getAlign() &&
21901           isAligned(*Alignment, ST->getSrcValueOffset())) {
21903             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21904                               ST->getMemoryVT(), *Alignment,
21905                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
21922   if (ST->isUnindexed()) {
21923     // Walk up chain skipping non-aliasing memory nodes, on this store and any
21930     Chain = ST->getChain();
21934   if (ST->isTruncatingStore() && ST->isUnindexed() &&
21937        !cast<ConstantSDNode>(Value)->isOpaque())) {
21942         Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21943         TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21945                           ST->getMemOperand());
21949                              ST->getMemoryVT().getScalarSizeInBits());
21955       // Re-visit the store if anything changed and the store hasn't been merged
21957       // node back to the worklist if necessary, but we also need to re-visit
21959       if (N->getOpcode() != ISD::DELETED_NODE)
21966     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
21969       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21970                                ST->getMemOperand());
21975       if (!Cst->isOpaque()) {
21976         const APInt &CValue = Cst->getAPIntValue();
21982                                    ST->getMemoryVT(), ST->getMemOperand());
21989   // TODO: Add big-endian truncate support with test coverage.
21995     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21996         ST->isUnindexed() && ST->isSimple() &&
21997         Ld->getAddressSpace() == ST->getAddressSpace() &&
22012     if (ST->isUnindexed() && ST->isSimple() &&
22013         ST1->isUnindexed() && ST1->isSimple()) {
22014       if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22015           ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22016           ST->getAddressSpace() == ST1->getAddressSpace()) {
22022       if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
22023           !ST1->getBasePtr().isUndef() &&
22024           ST->getAddressSpace() == ST1->getAddressSpace()) {
22029         if (ST->getMemoryVT().isScalableVector() ||
22030             ST1->getMemoryVT().isScalableVector()) {
22031           if (ST1->getBasePtr() == Ptr &&
22032               TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
22033                                   ST->getMemoryVT().getStoreSize())) {
22034             CombineTo(ST1, ST1->getChain());
22044           if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
22046                               ST1->getMemoryVT().getFixedSizeInBits())) {
22047             CombineTo(ST1, ST1->getChain());
22059       Value->hasOneUse() && ST->isUnindexed() &&
22061                                ST->getMemoryVT(), LegalOperations)) {
22063                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
22069   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
22078       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
22087   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
22092   if (isa<ConstantFPSDNode>(ST->getValue())) {
22105   if (!LifetimeEnd->hasOffset())
22108   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
22109                                         LifetimeEnd->getOffset(), false);
22112   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
22120         Chains.push_back(Chain.getOperand(--Nops));
22132       if (!ST->isSimple() || ST->isIndexed())
22134       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
22142       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
22147         CombineTo(ST, ST->getChain());
22162 ///              (shl (zext I to i64), 32)), addr)  -->
22166 /// For pair of {i32, i32}, i64 store --> two i32 stores.
22167 /// For pair of {i32, i16}, i64 store --> two i32 stores.
22168 /// For pair of {i16, i16}, i32 store --> two i16 stores.
22169 /// For pair of {i16, i8},  i32 store --> two i16 stores.
22170 /// For pair of {i8, i8},   i16 store --> two i8 stores.
22190   if (!ST->isSimple())
22193   SDValue Val = ST->getValue();
22217   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
22220   // Lo and Hi are zero-extended from int with size less equal than 32
22242   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22243   AAMDNodes AAInfo = ST->getAAInfo();
22250   SDValue Chain = ST->getChain();
22251   SDValue Ptr = ST->getBasePtr();
22253   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22254                              ST->getOriginalAlign(), MMOFlags, AAInfo);
22259       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
22260       ST->getOriginalAlign(), MMOFlags, AAInfo);
22267 //   --> (vector_shuffle X, Y, NewMask)
22276   // Vec's operand 0 is using indices from 0 to N-1 and
22277   // operand 1 from N to 2N - 1, where N is the number of
22280   int ElementOffset = -1;
22304       for (SDValue Op : reverse(ArgVal->ops())) {
22305         CurrentArgOffset -= Step;
22317   if (ElementOffset == -1) {
22334 //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
22337   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22339   SDValue InsertVal = N->getOperand(1);
22340   SDValue Vec = N->getOperand(0);
22346   ArrayRef<int> Mask = SVN->getMask();
22362 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
22367   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22369   SDValue InsertVal = N->getOperand(1);
22376   SDValue DestVec = N->getOperand(0);
22391   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
22426   EVT VT = N->getValueType(0);
22430       (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
22435   auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
22436   SDValue Scalar = N->getOperand(1);
22437   if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
22439                (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
22440                (InsIndex == VT.getVectorNumElements() - 1 &&
22459   SDValue Vec = Shuffle->getOperand(0);
22469   int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
22470   if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
22471       !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22472       ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22473       ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
22480                                             -1))
22484             VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
22490   Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
22492                               Vec.getValueType(), VecLoad->getAddressSpace(),
22493                               NewAlign, VecLoad->getMemOperand()->getFlags(),
22500   SDValue Ptr = ScalarLoad->getBasePtr();
22502     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
22505       InsIndex == 0 ? ScalarLoad->getPointerInfo()
22506                     : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
22508   SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
22509                              ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
22516   SDValue InVec = N->getOperand(0);
22517   SDValue InVal = N->getOperand(1);
22518   SDValue EltNo = N->getOperand(2);
22524   // Insert into out-of-bounds element is undefined.
22526       IndexC->getZExtValue() >= VT.getVectorNumElements())
22530   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
22537     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
22549   unsigned Elt = IndexC->getZExtValue();
22553     // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
22563   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
22591     // vXi1 vector - we don't need to recurse.
22626       // UNDEF - build new BUILD_VECTOR from already inserted operands.
22630       // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
22637       // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
22643       // INSERT_VECTOR_ELT - insert operand and continue up the chain.
22646           if (CurIdx->getAPIntValue().ult(NumElts)) {
22647             unsigned Idx = CurIdx->getZExtValue();
22654             CurVec = CurVec->getOperand(0);
22658       // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
22663         SDValue LHS = SVN->getOperand(0);
22664         SDValue RHS = SVN->getOperand(1);
22665         SmallVector<int, 16> Mask(SVN->getMask());
22685       // TODO: Do this for -1 with OR mask?
22698       // Failed to find a match in the chain - bail.
22726   assert(OriginalLoad->isSimple());
22728   EVT ResultVT = EVE->getValueType(0);
22743   Align Alignment = OriginalLoad->getAlign();
22747     int Elt = ConstEltNo->getZExtValue();
22749     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22754     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22760                               OriginalLoad->getAddressSpace(), Alignment,
22761                               OriginalLoad->getMemOperand()->getFlags(),
22766   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22778     Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22780                           OriginalLoad->getMemOperand()->getFlags(),
22781                           OriginalLoad->getAAInfo());
22785     Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22786                        Alignment, OriginalLoad->getMemOperand()->getFlags(),
22787                        OriginalLoad->getAAInfo());
22803   SDValue Vec = ExtElt->getOperand(0);
22804   SDValue Index = ExtElt->getOperand(1);
22808       Vec->getNumValues() != 1)
22815   EVT ResVT = ExtElt->getValueType(0);
22820   // Extracting an element of a vector constant is constant-folded, so this
22832   // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
22833   // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
22839         DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
22840     // We may need to sign- or zero-extend the result to match the same
22870   // We perform this optimization post type-legalization because
22871   // the type-legalizer often scalarizes integer-promoted vectors.
22876   // TODO: Add support for big-endian.
22880   SDValue VecOp = N->getOperand(0);
22885   auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22889   assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22894   EVT ScalarVT = N->getValueType(0);
22898   // TODO: deal with the cases other than everything being integer-typed.
22923   Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22935     for (SDNode *User : E.Producer->users()) {
22936       switch (User->getOpcode()) {
22944                               /*NumBits=*/User->getValueSizeInBits(0));
22950         if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22951             User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22952           // Logical right-shift means that we start extraction later,
22954           unsigned ShAmt = ShAmtC->getZExtValue();
22955           Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22965         if (User->getOpcode() != ISD::BUILD_VECTOR)
22989                E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
23012            "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
23022   SDValue VecOp = N->getOperand(0);
23023   SDValue Index = N->getOperand(1);
23024   EVT ScalarVT = N->getValueType(0);
23029   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
23031   // This only really matters if the index is non-constant since other combines
23041   // (vextract (scalar_to_vector val, 0) -> val
23060   // extract_vector_elt of out-of-bounds element -> UNDEF
23063       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
23066   // extract_vector_elt (build_vector x, y), 1 -> y
23074         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
23104     APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
23119     // The vector index of the LSBs of the source depend on the endian-ness.
23121     unsigned ExtractIndex = IndexC->getZExtValue();
23122     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
23123     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
23133       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
23142       BCTruncElt = IsLE ? 0 : Scale - 1;
23153               IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
23164   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
23173     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
23176     if (OrigElt == -1)
23185       OrigElt -= NumElts;
23212   if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
23213         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23214                Use->getOperand(0) == VecOp &&
23215                isa<ConstantSDNode>(Use->getOperand(1));
23218     for (SDNode *User : VecOp->users()) {
23219       auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
23220       if (CstElt->getAPIntValue().ult(NumElts))
23221         DemandedElts.setBit(CstElt->getZExtValue());
23226       if (N->getOpcode() != ISD::DELETED_NODE)
23234       if (N->getOpcode() != ISD::DELETED_NODE)
23266   // extract (vector load $addr), i --> load $addr + i * size
23269       !Index->hasPredecessor(VecOp.getNode())) {
23271     if (VecLoad && VecLoad->isSimple())
23280   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
23281   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
23282   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
23283   int Elt = IndexC->getZExtValue();
23311     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
23323       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
23332     //      -> extract_vector_elt a, 0
23334     //      -> extract_vector_elt a, 1
23336     //      -> extract_vector_elt b, 0
23338     //      -> extract_vector_elt b, 1
23351   // Make sure we found a non-volatile load and the extractelement is
23353   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
23356   // If Idx was -1 above, Elt is going to be -1, so just return undef.
23357   if (Elt == -1)
23365   // We perform this optimization post type-legalization because
23366   // the type-legalizer often scalarizes integer-promoted vectors.
23367   // Performing this optimization before may create bit-casts which
23368   // will be type-legalized to complex code sequences.
23374   unsigned NumInScalars = N->getNumOperands();
23376   EVT VT = N->getValueType(0);
23380   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
23381   // optimizations. We do not handle sign-extend because we can't fill the sign
23387     SDValue In = N->getOperand(i);
23446   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
23447     SDValue Cast = N->getOperand(i);
23455       In = Cast->getOperand(0);
23457                             (i * ElemRatio + (ElemRatio - 1));
23483 //                     (trunc (srl $1 half-width))
23484 //                     (trunc (srl $1 (2 * half-width))))
23487   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23489   EVT VT = N->getValueType(0);
23508   unsigned NumInScalars = N->getNumOperands();
23520     SDValue In = PeekThroughBitcast(N->getOperand(i));
23572   EVT VT = N->getValueType(0);
23588          "Inputs must be sorted to be in non-increasing vector size order.");
23628         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
23676   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
23679   // total number of elements in the shuffle - if we are shuffling a wider
23685     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
23709   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23713   int NumBVOps = BV->getNumOperands();
23714   int ZextElt = -1;
23716     SDValue Op = BV->getOperand(i);
23719     if (ZextElt == -1)
23724   // Bail out if there's no non-undef element.
23725   if (ZextElt == -1)
23729   // one other element. That other element must be a zero-extended scalar
23734   EVT VT = BV->getValueType(0);
23735   SDValue Zext = BV->getOperand(ZextElt);
23742   // The zero-extend must be a multiple of the source size, and we must be
23755   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23761       // that vector (mask value is number-of-elements) for the high bits.
23762       int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23768     // the shuffle mask with -1.
23771   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23789     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23798   EVT VT = N->getValueType(0);
23800   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23812   unsigned NumElems = N->getNumOperands();
23815   // that element comes from. -1 stands for undef, 0 for the zero vector,
23820   SmallVector<int, 8> VectorMask(NumElems, -1);
23828   // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
23832     SDValue Op = N->getOperand(i);
23847     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23858     if (ExtractIdx->getAsAPIntVal().uge(
23867     OneConstExtractIndex = ExtractIdx->getZExtValue();
23874     if (Idx == -1) { // A new source vector?
23915       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23948   // while preserving the relative order of equally-sized vectors.
23949   // Note that we keep the first "implicit zero vector as-is.
24009   // Update the vector mask to point to the post-shuffle vectors.
24012       Vec = Shuffles.size() - 1;
24014       Vec = (Vec - 1) / 2;
24039       SmallVector<int, 8> Mask(NumElems, -1);
24046         LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
24055         RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
24086   EVT VT = N->getValueType(0);
24089   SDValue Op0 = N->getOperand(0);
24090   auto checkElem = [&](SDValue Op) -> int64_t {
24097         return C->getZExtValue();
24098     return -1;
24104   // known-minimum vector length of the result type.
24109   unsigned NumElems = N->getNumOperands();
24120     if ((Offset + i) != checkElem(N->getOperand(i)))
24137 //       non-constant-zero op, UNDEF's, and to be KnownBits-based,
24143   // FIXME: support big-endian.
24147   EVT VT = N->getValueType(0);
24148   EVT OpVT = N->getOperand(0).getValueType();
24164   for (auto I : enumerate(N->ops())) {
24169           Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
24174       // Profitability check: don't allow non-zero constant operands.
24177     // Profitability check: there must only be a single non-zero operand,
24181     // The operand must be a zero-extension itself.
24187     assert(!ActiveBits && "Already encountered non-constant-zero operand?");
24194   // This BUILD_VECTOR must have at least one non-constant-zero operand.
24208   for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
24215                                 Scale * N->getNumOperands());
24233   for (auto I : enumerate(N->ops())) {
24244     NewOps.append(*Factor - 1, ZeroOp);
24253   EVT VT = N->getValueType(0);
24262   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
24266   // TODO: Maybe this is useful for non-splat too?
24268     SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
24275         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
24279           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
24289   if (!LegalTypes && (N->getNumOperands() > 1)) {
24290     SDValue Op0 = N->getOperand(0);
24291     auto checkElem = [&](SDValue Op) -> uint64_t {
24295           return CNode->getZExtValue();
24296       return -1;
24300     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
24301       if (Offset + i != checkElem(N->getOperand(i))) {
24302         Offset = -1;
24308         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
24310     if ((Offset != -1) &&
24311         ((Offset % N->getValueType(0).getVectorNumElements()) ==
24313       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
24335     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
24345   EVT OpVT = N->getOperand(0).getValueType();
24352   EVT VT = N->getValueType(0);
24359   for (const SDValue &Op : N->ops()) {
24399 //  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
24402   EVT VT = N->getValueType(0);
24407   for (const SDValue &Op : N->ops()) {
24422   assert(FirstConcat && "Concat of all-undefs found");
24425   for (const SDValue &Op : N->ops()) {
24427       ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
24430     ConcatOps.append(Op->op_begin(), Op->op_end());
24440   EVT VT = N->getValueType(0);
24441   EVT OpVT = N->getOperand(0).getValueType();
24453   for (SDValue Op : N->ops()) {
24458       Mask.append((unsigned)NumOpElts, -1);
24476       Mask.append((unsigned)NumOpElts, -1);
24514   unsigned CastOpcode = N->getOperand(0).getOpcode();
24531   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
24538   for (SDValue Op : N->ops()) {
24548   EVT VT = N->getValueType(0);
24550   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
24570   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
24582   EVT VT = N->getValueType(0);
24583   EVT OpVT = N->getOperand(0).getValueType();
24587   // For now, only allow simple 2-operand concatenations.
24588   if (N->getNumOperands() != 2)
24603   for (SDValue Op : N->ops()) {
24605         CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
24606         all_of(N->ops(), [CurSVN](SDValue Op) {
24609                  (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
24621   AdjustedMask.reserve(SVN->getMask().size());
24622   assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
24623   append_range(AdjustedMask, SVN->getMask());
24639   for (SDValue Op : N->ops()) {
24645     if (Op == SVN->getOperand(0)) {
24649     if (Op == SVN->getOperand(1)) {
24663   for (auto I : zip(SVN->ops(), ShufOps)) {
24669       SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
24681   if (N->getNumOperands() == 1)
24682     return N->getOperand(0);
24685   EVT VT = N->getValueType(0);
24690   if (all_of(drop_begin(N->ops()),
24692     SDValue In = N->getOperand(0);
24704       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
24705       SmallVector<SDValue, 4> Ops(In->ops());
24712     // concat_vectors(scalar_to_vector(scalar), undef) ->
24721     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
24725       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
24726       if (Scalar->getOpcode() == ISD::TRUNCATE &&
24728           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
24729         Scalar = Scalar->getOperand(0);
24756   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
24760   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
24769       for (const SDValue &Op : N->ops())
24778     for (const SDValue &Op : N->ops()) {
24788           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24832       N->getOperand(0).getValueType().getVectorMinNumElements();
24834   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24835     SDValue Op = N->getOperand(i);
24854       if (SingleSource.getValueType() != N->getValueType(0))
24880       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24881     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24891   SDValue BinOp = Extract->getOperand(0);
24893   if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24901   SDValue Index = Extract->getOperand(1);
24902   EVT SubVT = Extract->getValueType(0);
24917   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24919                      BinOp->getFlags());
24933   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24940   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24942   if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24945   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24947   // with fneg in a target-specific way.
24950     if (C && C->getValueAPF().isNegZero())
24962   EVT VT = Extract->getValueType(0);
24963   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24994       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24995     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
25003         DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
25013   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
25014   // flavors, but no other 256-bit integer support. This could be extended to
25022   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
25032     // half-sized operand for our new narrow binop:
25033     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
25034     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
25035     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
25055 /// (extract_subvector (load wide vector)) --> (load narrow vector)
25057   // TODO: Add support for big-endian. The offset calculation must be adjusted.
25061   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
25062   if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
25065   // Allow targets to opt-out.
25066   EVT VT = Extract->getValueType(0);
25072   unsigned Index = Extract->getConstantOperandVal(1);
25076   if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
25088   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
25092   // we are extracting from something besides index 0 (little-endian).
25096   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
25103         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
25104     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
25106     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
25109   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
25125   assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
25128   SDValue N0 = N->getOperand(0);
25130   // Only deal with non-scalable vectors.
25131   EVT NarrowVT = N->getValueType(0);
25142   if (!WideShuffleVector->hasOneUse())
25150   uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
25163   for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
25165     assert((M >= -1) && (M < (2 * WideNumElts)) &&
25166            "Out-of-bounds shuffle mask?");
25182     // And which NumEltsExtracted-sized subvector of that operand is that?
25194     SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
25198       NewMask.emplace_back(-1);
25243   if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
25269   EVT NVT = N->getValueType(0);
25270   SDValue V = N->getOperand(0);
25271   uint64_t ExtIdx = N->getConstantOperandVal(1);
25283   // ext (ext X, C), 0 --> ext X, C
25293   // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
25300   //  --> extract_subvector(y,c2-c1)
25309         TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
25313                          DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
25317   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
25380     // extract_subvec (concat V1, V2, ...), i --> Vi
25387     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
25391       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
25427           SDValue Src = V->getOperand(IdxVal);
25435             DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
25463         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
25464         N->getOperand(1));
25482   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
25490   ArrayRef<int> Mask = Shuf->getMask();
25491   EVT VT = Shuf->getValueType(0);
25494   SmallVector<int, 16> Mask0(HalfNumElts, -1);
25495   SmallVector<int, 16> Mask1(HalfNumElts, -1);
25497     if (Mask[i] == -1)
25502     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
25506       Mask1[i - HalfNumElts] = M;
25517   // shuffle (concat X, undef), (concat Y, undef), Mask -->
25529   EVT VT = N->getValueType(0);
25532   SDValue N0 = N->getOperand(0);
25533   SDValue N1 = N->getOperand(1);
25535   ArrayRef<int> Mask = SVN->getMask();
25542   auto IsUndefMaskElt = [](int i) { return i == -1; };
25558   // subvector-sized copies from a concatenated vector
25569     int OpIdx = -1;
25585       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
25591 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25594 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
25604 // We don't fold shuffles where one side is a non-zero constant, and we don't
25605 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
25606 // non-constant operands. This seems to work out reasonably well in practice.
25610   EVT VT = SVN->getValueType(0);
25612   SDValue N0 = SVN->getOperand(0);
25613   SDValue N1 = SVN->getOperand(1);
25615   if (!N0->hasOneUse())
25621     if (!N1->hasOneUse())
25638     if (SDValue Splat0 = BV0->getSplatValue())
25639       IsSplat = (Splat0 == BV1->getSplatValue());
25643   for (int M : SVN->getMask()) {
25646       int Idx = M < (int)NumElts ? M : M - NumElts;
25654         // Operand can't be combined - bail out.
25659     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
25661     // generate low-quality code if the target can't reconstruct an appropriate
25687 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
25696   // TODO Add support for big-endian when we have a test case.
25704   // power-of-2 extensions as they are the most likely.
25727 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
25732   EVT VT = SVN->getValueType(0);
25735   // TODO Add support for big-endian when we have a test case.
25739   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
25741                       Mask = SVN->getMask()](unsigned Scale) {
25753   SDValue N0 = SVN->getOperand(0);
25755   // are pre-legalization.
25765 // e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25771   EVT VT = SVN->getValueType(0);
25776   // TODO: add support for big-endian when we have a test case.
25781   SmallVector<int, 16> Mask(SVN->getMask());
25787       int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25801   // Element-wise(!), which of these demanded elements are know to be zero?
25803   for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25815       Indice = -2; // Zeroable element.
25827   // The shuffle may be more fine-grained than we want. Widen elements first.
25846   // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25847   // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25854       // Analyze the shuffle mask in Scale-sized chunks.
25859       // FIXME: undef should be fine, but that results in more-defined result.
25863       // FIXME: undef should be fine, but that results in more-defined result.
25865                   [](int Indice) { return Indice == -2; }))
25874     SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25894   EVT VT = SVN->getValueType(0);
25897   // TODO Add support for big-endian when we have a test case.
25901   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
25908   ArrayRef<int> Mask = SVN->getMask();
25918   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25919   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25920   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25946 // Combine shuffles of splat-shuffles of the form:
25947 // shuffle (shuffle V, undef, splat-mask), undef, M
25948 // If splat-mask contains undef elements, we need to be careful about
25953   EVT VT = Shuf->getValueType(0);
25956   if (!Shuf->getOperand(1).isUndef())
25959   // See if this unary non-splat shuffle actually *is* a splat shuffle,
25961   // FIXME: this can be done per-operand.
25962   if (!Shuf->isSplat()) {
25964     for (int Idx : Shuf->getMask()) {
25967       assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25972     if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25974       // Which lowest demanded element is *not* known-undef?
25976       for (int Idx : Shuf->getMask()) {
25982         return DAG.getUNDEF(VT); // All undef - result is undef.
25984       SmallVector<int, 8> SplatMask(Shuf->getMask());
25988         // Otherwise, just pick the lowest demanded non-undef element.
25989         // Or sentinel undef, if we know we'd pick a known-undef element.
25990         Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25992       assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25993       return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25994                                   Shuf->getOperand(1), SplatMask);
26001   if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
26002     return Shuf->getOperand(0);
26004   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26005   if (!Splat || !Splat->isSplat())
26008   ArrayRef<int> ShufMask = Shuf->getMask();
26009   ArrayRef<int> SplatMask = Splat->getMask();
26012   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
26013   // every undef mask element in the splat-shuffle has a corresponding undef
26014   // element in the user-shuffle's mask or if the composition of mask elements
26017   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
26018   //   In this case it is not legal to simplify to the splat-shuffle because we
26021   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
26023   //   simplify to the splat-shuffle.
26024   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
26027   //   the splat-shuffle.
26031       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
26032           SplatMask[UserMask[i]] != -1)
26037     return Shuf->getOperand(0);
26043     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
26045   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
26046                               Splat->getOperand(0), Splat->getOperand(1),
26056   SDValue Op0 = SVN->getOperand(0);
26057   SDValue Op1 = SVN->getOperand(1);
26058   EVT VT = SVN->getValueType(0);
26080   ArrayRef<int> Mask = SVN->getMask();
26098 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
26101   if (!OuterShuf->getOperand(1).isUndef())
26103   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
26104   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
26107   ArrayRef<int> OuterMask = OuterShuf->getMask();
26108   ArrayRef<int> InnerMask = InnerShuf->getMask();
26111   SmallVector<int, 32> CombinedMask(NumElts, -1);
26112   int SplatIndex = -1;
26116     if (OuterMaskElt == -1)
26121     if (InnerMaskElt == -1)
26125     if (SplatIndex == -1)
26128     // Non-matching index - this is not a splat.
26134   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
26135           getSplatIndex(CombinedMask) != -1) &&
26139   EVT VT = OuterShuf->getValueType(0);
26140   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
26144   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
26145                               InnerShuf->getOperand(1), CombinedMask);
26151 /// from the first operand. Otherwise, return -1.
26154   int EltFromOp0 = -1;
26156   // Should we ignore undefs in the shuffle mask instead? The trade-off is
26162       if (EltFromOp0 != -1)
26163         return -1;
26167       return -1;
26180   ArrayRef<int> Mask = Shuf->getMask();
26182   SDValue Op0 = Shuf->getOperand(0);
26183   SDValue Op1 = Shuf->getOperand(1);
26185   if (ShufOp0Index == -1) {
26189     if (ShufOp0Index == -1)
26211     // this to a scalar-to-vector plus shuffle.
26217     // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
26232   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26233   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
26236   ArrayRef<int> Mask = Shuf->getMask();
26237   ArrayRef<int> Mask0 = Shuf0->getMask();
26240     if (Mask[i] == -1)
26251   return Shuf->getOperand(0);
26255   EVT VT = N->getValueType(0);
26258   SDValue N0 = N->getOperand(0);
26259   SDValue N1 = N->getOperand(1);
26263   // Canonicalize shuffle undef, undef -> undef
26269   // Canonicalize shuffle v, v -> v, undef
26272                                 createUnaryMask(SVN->getMask(), NumElts));
26274   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
26283       int Idx = SVN->getMaskElt(i);
26285         Idx = -1;
26306   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
26307     int SplatIndex = SVN->getSplatIndex();
26309         TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
26310       // splat (vector_bo L, R), Index -->
26319           DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
26325     // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
26326     // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
26334           if (Idx->getAPIntValue() == SplatIndex)
26359     if (V->getOpcode() == ISD::BITCAST) {
26360       SDValue ConvInput = V->getOperand(0);
26366     if (V->getOpcode() == ISD::BUILD_VECTOR) {
26367       assert(V->getNumOperands() == NumElts &&
26372         if (!V->getOperand(i).isUndef()) {
26373           Base = V->getOperand(i);
26381         if (V->getOperand(i) != Base) {
26392       SDValue Splatted = V->getOperand(SplatIndex);
26397         if (SVN->getMaskElt(i) < 0)
26401       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
26405       if (V->getValueType(0) != VT)
26439   // only low-half elements of a concat with undef:
26440   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
26447       int Idx = SVN->getMaskElt(i);
26450         Idx -= HalfNumElts;
26465   // --> insert_subvector(lhs,rhs1,4).
26514     ArrayRef<int> Mask = SVN->getMask();
26527   // shuffle into a AND node, with all the out-of-lane elements are known zero.
26530     ArrayRef<int> Mask = SVN->getMask();
26531     SmallVector<int, 16> ClearMask(NumElts, -1);
26554       // original type, incase the value is split into two (eg i64->i32).
26580   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26596       EVT InnerVT = BC0->getValueType(0);
26613         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
26614         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
26619           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
26622         SDValue SV0 = BC0->getOperand(0);
26623         SDValue SV1 = BC0->getOperand(1);
26648   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
26649   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
26654                      SmallVectorImpl<int> &Mask) -> bool {
26657     if (OtherSVN->isSplat())
26664       int Idx = SVN->getMaskElt(i);
26672         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
26678         Idx = OtherSVN->getMaskElt(Idx);
26684         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
26685                                           : OtherSVN->getOperand(1);
26693         Mask.push_back(-1);
26715       // Last chance - see if the vector is another shuffle and if it
26718         int InnerIdx = CurrentSVN->getMaskElt(Idx);
26720           Mask.push_back(-1);
26724                                ? CurrentSVN->getOperand(0)
26725                                : CurrentSVN->getOperand(1);
26727           Mask.push_back(-1);
26749     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26750     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26751     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26752     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
26753     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
26754     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
26765     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
26766     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
26767     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
26772       assert(N1->getOperand(0).getValueType() == VT &&
26775       SDValue SV0 = N1->getOperand(0);
26776       SDValue SV1 = N1->getOperand(1);
26785     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26788         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26789         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26794     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26795     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26796     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26801       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26802           N->isOnlyUserOf(N->getOperand(i).getNode())) {
26805         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26806         assert(OtherSV->getOperand(0).getValueType() == VT &&
26811         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26829     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26831          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26857           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26860                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26864         // Ensure we don't increase the number of shuffles - we must merge a
26873           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26884           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26917   EVT VT = N->getValueType(0);
26926   SDValue Scalar = N->getOperand(0);
26929   if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26933       Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26934       Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26937     SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26940       // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26941       // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26953                          DAG.getConstant(C->getAPIntValue(), DL, VT)};
26954           SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26984     // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26985     SmallVector<int, 8> Mask(SrcNumElts, -1);
26986     Mask[0] = ExtIndexC->getZExtValue();
27010   EVT VT = N->getValueType(0);
27011   SDValue N0 = N->getOperand(0);
27012   SDValue N1 = N->getOperand(1);
27013   SDValue N2 = N->getOperand(2);
27014   uint64_t InsIdx = N->getConstantOperandVal(2);
27044   // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
27050   // insert_subvector undef, (splat X), N2 -> splat X
27057   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
27071   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
27089   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
27097   // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
27107   // -> bitcast(insert_subvector(v, s, c2))
27145   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
27166     SmallVector<SDValue, 8> Ops(N0->ops());
27179   SDValue N0 = N->getOperand(0);
27181   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
27182   if (N0->getOpcode() == ISD::FP16_TO_FP)
27183     return N0->getOperand(0);
27189   auto Op = N->getOpcode();
27192   SDValue N0 = N->getOperand(0);
27194   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
27195   // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27196   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
27198     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
27199       return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
27206   SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
27207                                               N->getValueType(0), {N0});
27212   SDValue N0 = N->getOperand(0);
27214   // fold (fp_to_bf16 (bf16_to_fp op)) -> op
27215   if (N0->getOpcode() == ISD::BF16_TO_FP)
27216     return N0->getOperand(0);
27222   // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27227   SDValue N0 = N->getOperand(0);
27229   unsigned Opcode = N->getOpcode();
27231   // VECREDUCE over 1-element vector is just an extract.
27237     if (Res.getValueType() != N->getValueType(0))
27238       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
27250       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
27253   // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
27254   // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
27263       return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
27266   // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
27277     return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
27285   // FSUB -> FMA combines:
27295   if (N->getOpcode() == ISD::VP_GATHER)
27299   if (N->getOpcode() == ISD::VP_SCATTER)
27303   if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
27307   if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
27311   // VP operations in which all vector elements are disabled - either by
27312   // determining that the mask is all false or that the EVL is 0 - can be
27315   if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
27316     AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
27317   if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
27319         ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
27323     switch (N->getOpcode()) {
27343   if (ISD::isVPBinaryOp(N->getOpcode()))
27344     return DAG.getUNDEF(N->getValueType(0));
27349     if (MemSD->writeMem())
27350       return MemSD->getChain();
27351     return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
27355   if (ISD::isVPReduction(N->getOpcode()))
27356     return N->getOperand(0);
27362   SDValue Chain = N->getOperand(0);
27363   SDValue Ptr = N->getOperand(1);
27364   EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27369   for (auto *U : Ptr->users()) {
27380   if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27381       !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27382       !LdNode->getChain().reachesChainWithoutSideEffects(SDValue(N, 0)))
27387   for (SDUse &U : LdNode->uses()) {
27398   if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27399       !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27400       !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27405   SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
27406                                 StNode->getMemOperand());
27412   SDValue Chain = N->getOperand(0);
27413   SDValue Ptr = N->getOperand(1);
27414   EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27418   for (auto *U : Ptr->users()) {
27429   if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27430       !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27436   SDValue StValue = StNode->getValue();
27438   if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27439       !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27440       !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27446       DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
27447                       LdNode->getMemOperand());
27456   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
27458   EVT VT = N->getValueType(0);
27459   SDValue LHS = N->getOperand(0);
27460   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
27476   // all zeros or all ones - suitable for shuffle masking.
27486       // X & undef --> 0 (not undef). So this lane must be converted to choose
27487       // from the zero constant vector (same as if the element had all 0-bits).
27495         Bits = Cst->getAPIntValue();
27497         Bits = CstFP->getValueAPF().bitcastToAPInt();
27503         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
27544   SDValue N0 = N->getOperand(0);
27545   SDValue N1 = N->getOperand(1);
27546   unsigned Opcode = N->getOpcode();
27547   EVT VT = N->getValueType(0);
27579     // constant or undef. Avoid splatting which would over-define potentially
27582     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
27589       EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
27596   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
27598   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
27604   EVT VT = N->getValueType(0);
27607   unsigned Opcode = N->getOpcode();
27609   SDValue N0 = N->getOperand(0);
27625     SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
27637   EVT VT = N->getValueType(0);
27640   SDValue LHS = N->getOperand(0);
27641   SDValue RHS = N->getOperand(1);
27642   unsigned Opcode = N->getOpcode();
27643   SDNodeFlags Flags = N->getFlags();
27647   //   --> shuffle (VBinOp A, B), Undef, Mask
27650   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
27655     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
27661       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
27666     // undefined elements because that could be poison-unsafe or inhibit
27669     // load-folding or other target-specific behaviors.
27670     if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
27671         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
27672         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27673       // binop (splat X), (splat C) --> splat (binop X, C)
27674       SDValue X = Shuf0->getOperand(0);
27677                                   Shuf0->getMask());
27679     if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
27680         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
27681         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27682       // binop (splat C), (splat X) --> splat (binop C, X)
27683       SDValue X = Shuf1->getOperand(0);
27686                                   Shuf1->getMask());
27693   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
27716            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
27725   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
27756                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
27765       const SDNodeFlags Flags = N0->getFlags();
27773       SelectNode->setFlags(Flags);
27785 /// should return the appropriate thing (e.g. the node) back to the top-level of
27789   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27792     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
27799       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
27800         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
27801         CmpLHS = TheSelect->getOperand(0);
27802         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
27805         SDValue Cmp = TheSelect->getOperand(0);
27807           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
27812       if (Zero && Zero->isZero() &&
27815         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27822   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27843         !LLD->isSimple() || !RLD->isSimple() ||
27846         LLD->isIndexed() || RLD->isIndexed() ||
27848         LLD->getMemoryVT() != RLD->getMemoryVT() ||
27850         (LLD->getExtensionType() != RLD->getExtensionType() &&
27852          LLD->getExtensionType() != ISD::EXTLOAD &&
27853          RLD->getExtensionType() != ISD::EXTLOAD) ||
27855         // over-conservative. It would be beneficial to be able to remember
27859         LLD->getPointerInfo().getAddrSpace() != 0 ||
27860         RLD->getPointerInfo().getAddrSpace() != 0 ||
27863         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
27864         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
27865         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27866                                       LLD->getBasePtr().getValueType()))
27870     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27892     if (TheSelect->getOpcode() == ISD::SELECT) {
27898       SDNode *CondNode = TheSelect->getOperand(0).getNode();
27901       if ((LLD->hasAnyUseOfValue(1) &&
27903           (RLD->hasAnyUseOfValue(1) &&
27908                            LLD->getBasePtr().getValueType(),
27909                            TheSelect->getOperand(0), LLD->getBasePtr(),
27910                            RLD->getBasePtr());
27918       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27919       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27923       if ((LLD->hasAnyUseOfValue(1) &&
27925           (RLD->hasAnyUseOfValue(1) &&
27930                          LLD->getBasePtr().getValueType(),
27931                          TheSelect->getOperand(0),
27932                          TheSelect->getOperand(1),
27933                          LLD->getBasePtr(), RLD->getBasePtr(),
27934                          TheSelect->getOperand(4));
27941     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27942     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27943     if (!RLD->isInvariant())
27945     if (!RLD->isDereferenceable())
27947     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27949       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27950                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27955           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
27956                                                   : LLD->getExtensionType(),
27957           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27958           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27965     // old-load value is dead now.
27981   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27982   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27992     // (X > -1) ? A : 0
27993     // (X >  0) ? X : 0 <-- This is canonical signed max.
27998     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
28005   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
28008   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
28009     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
28027   unsigned ShCt = XType.getSizeInBits() - 1;
28046 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
28048   SDValue N0 = N->getOperand(0);
28049   SDValue N1 = N->getOperand(1);
28050   SDValue N2 = N->getOperand(2);
28062   if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
28067   SDVTList OpVTs = N1->getVTList();
28070   //  --> binop(select(cond, x, z), y)
28076     NewBinOp->setFlags(N1->getFlags());
28077     NewBinOp->intersectFlagsWith(N2->getFlags());
28082   //  --> binop(x, select(cond, y, z))
28091       NewBinOp->setFlags(N1->getFlags());
28092       NewBinOp->intersectFlagsWith(N2->getFlags());
28103   SDValue N0 = N->getOperand(0);
28104   EVT VT = N->getValueType(0);
28105   bool IsFabs = N->getOpcode() == ISD::FABS;
28118   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
28119   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
28161       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
28162       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
28167   if (!TV->hasOneUse() && !FV->hasOneUse())
28170   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
28171                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
28172   Type *FPTy = Elts[0]->getType();
28179   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
28184   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
28193   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
28203   // (x ? y : y) -> y.
28217       // fold select_cc true, x, y -> x
28218       // fold select_cc false, x, y -> y
28219       return !(SCCC->isZero()) ? N2 : N3;
28230   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
28233   // when the condition can be materialized as an all-ones register.  Any
28234   // single bit-test can be materialized as an all-ones register with
28235   // shift-left and shift-right-arith.
28236   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
28237       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
28238     SDValue AndLHS = N0->getOperand(0);
28239     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
28240     if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
28242       const APInt &AndMask = ConstAndRHS->getAPIntValue();
28244         unsigned ShCt = AndMask.getBitWidth() - 1;
28250         // either all-ones, or zero.
28259   // fold select C, 16, 0 -> shl C, 4
28260   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
28261   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
28275     if (NotExtCompare && N2C->isOne())
28291     if (N2C->isOne())
28294     unsigned ShCt = N2C->getAPIntValue().logBase2();
28304   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
28305   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
28306   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
28307   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
28308   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
28309   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
28310   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
28311   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
28312   if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
28320       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
28339   // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
28340   // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
28342       N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
28343       ((N1C->isAllOnes() && CC == ISD::SETGT) ||
28344        (N1C->isZero() && CC == ISD::SETLT)) &&
28345       !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
28348         DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
28395   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28400   if (C->isZero())
28404   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
28436   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28441   if (C->isZero())
28445   if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
28461 // This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
28463 // `Op` is non-zero).
28465                                    SDValue Op, unsigned Depth,
28487   // Helper for determining whether a value is a power-2 constant scalar or a
28491     if (C->isZero() || C->isOpaque())
28494     if (C->getAPIntValue().isPowerOf2()) {
28495       Pow2Constants.emplace_back(C->getAPIntValue());
28516   if (Depth >= DAG.MaxRecursionDepth)
28531   // log2(X << Y) -> log2(X) + Y
28533     // 1 << Y and X nuw/nsw << Y are all non-zero.
28534     if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
28535         Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
28537                                              Depth + 1, AssumeNonZero))
28542   // c ? X : Y -> c ? Log2(X) : Log2(Y)
28546                                            Depth + 1, AssumeNonZero))
28548                                              Depth + 1, AssumeNonZero))
28552   // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
28553   // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
28559             takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
28562               takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
28570 /// Determines the LogBase2 value for a non-null input value using the
28571 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
28577       takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
28582   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
28587 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28589 ///   F(X) = 1/X - A [which has a zero at X = 1/A]
28591 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
28594 ///   Result = N X_i + X_i (N - N A X_i)
28622       // Newton iterations: Est = Est + Est (N - Arg * Est)
28627         if (i == Iterations - 1) {
28636                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
28657 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28659 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28661 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
28670   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
28675   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
28683   // If non-reciprocal square root is requested, multiply the result by Arg.
28690 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28692 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28694 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
28700   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
28701   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
28708   // E = (E * -0.5) * ((A * E) * E + -3.0)
28715     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
28719       // RSQRT: LHS = (E * -0.5)
28722       // SQRT: LHS = (A * E) * -0.5
28804   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
28807       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
28808         Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
28809                  : (LSN->getAddressingMode() == ISD::PRE_DEC)
28810                      ? -1 * C->getSExtValue()
28812       TypeSize Size = LSN->getMemoryVT().getStoreSize();
28813       return {LSN->isVolatile(),           LSN->isAtomic(),
28814               LSN->getBasePtr(),           Offset /*base offset*/,
28815               LocationSize::precise(Size), LSN->getMemOperand()};
28820               LN->getOperand(1),
28821               (LN->hasOffset()) ? LN->getOffset() : 0,
28822               (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28852     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28853         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28879   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28880       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28888   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28889   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28890   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28891   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28922   if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28930         Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28932         Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28937     if (BatchAA->isNoAlias(
28938             MemoryLocation(MUC0.MMO->getValue(), Loc0,
28939                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28940             MemoryLocation(MUC1.MMO->getValue(), Loc1,
28941                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28949 /// Walk up chain skipping non-aliasing memory nodes,
28958   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28962   unsigned Depth = 0;
28965   auto ImproveChain = [&](SDValue &C) -> bool {
28976                       cast<LSBaseSDNode>(C.getNode())->isSimple();
29018     // chain until we reach the depth limit.
29020     // FIXME: The depth check could be made to return the last non-aliasing
29023     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
29039         Chains.push_back(Chain.getOperand(--n));
29040       ++Depth;
29048       ++Depth;
29056 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
29087 // we improve the chains of all the potential candidates up-front
29090 // to go from a partially-merged state to the desired final
29091 // fully-merged state.
29118   if (St->getMemoryVT().isZeroSized())
29121   // BaseIndexOffset assumes that offsets are fixed-size, which
29124   if (St->getMemoryVT().isScalableVT())
29128   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
29131   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
29132     if (Chain->getMemoryVT().isScalableVector())
29136     if (!SDValue(Chain, 0)->hasOneUse())
29139     if (!Chain->isSimple() || Chain->isIndexed())
29148     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
29156     if (I != Intervals.begin() && (--I).stop() <= Offset)
29170   SDValue NewChain = STChain->getChain();
29173     StoreSDNode *S = ChainedStores[--I];
29176         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
29184   if (St->isTruncatingStore())
29185     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
29186                               St->getBasePtr(), St->getMemoryVT(),
29187                               St->getMemOperand());
29189     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
29190                          St->getBasePtr(), St->getMemOperand());
29197   auto hasImprovedChain = [&](SDValue ST) -> bool {
29198     return ST->getOperand(0) != NewChain;
29209   for (const SDValue &Op : TF->ops())
29234   SDValue BetterChain = FindBetterChain(St, St->getChain());
29235   if (St->getChain() != BetterChain) {