Lines Matching +full:depth +full:- +full:wise

1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
87 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
88 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
98 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
102 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
107 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
108 cl::desc("Only use DAG-combiner alias analysis in this"
115 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
120 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
124 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
129 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
133 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
138 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
142 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
148 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
153 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
172 /// This must behave as a stack -- new nodes to process are pushed onto the
194 // BatchAA - Used for DAG load/store alias analysis.
205 for (SDNode *Node : N->users())
222 if (N->use_empty())
238 assert(N->getCombinerWorklistIndex() >= 0 &&
240 // Set to -2 to indicate that we combined the node.
241 N->setCombinerWorklistIndex(-2);
246 /// Call the node-specific routine that folds each particular type of node.
255 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
276 assert(N->getOpcode() != ISD::DELETED_NODE &&
280 // zero-use deletion strategy.
281 if (N->getOpcode() == ISD::HANDLENODE)
284 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
290 if (N->getCombinerWorklistIndex() < 0) {
291 N->setCombinerWorklistIndex(Worklist.size());
301 int WorklistIndex = N->getCombinerWorklistIndex();
302 // If not in the worklist, the index might be -1 or -2 (was combined
310 N->setCombinerWorklistIndex(-1);
414 /// Call the node-specific routine that knows how to fold each
416 /// target-specific DAG combines.
419 // Visitation implementation - Implement dag node combining for different
422 // SDValue.getNode() == 0 - No change was made
423 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
424 // otherwise - N should be replaced by the returned Operand.
690 /// Walk up chain skipping non-aliasing memory nodes,
698 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
750 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
841 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
845 /// single-use) and if missed an empty SDValue is returned.
912 //===----------------------------------------------------------------------===//
914 //===----------------------------------------------------------------------===//
917 ((DAGCombiner*)DC)->AddToWorklist(N);
922 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
927 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
932 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
937 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
942 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
945 //===----------------------------------------------------------------------===//
947 //===----------------------------------------------------------------------===//
953 // dead. Make sure to re-visit them and recursively delete dead nodes.
954 for (const SDValue &Op : N->ops())
958 if (Op->hasOneUse() || Op->getNumValues() > 1)
1010 /// Return true if this is a SetCC-equivalent operation with only one use.
1015 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1052 return !(Const->isOpaque() && NoOpaques);
1056 for (const SDValue &Op : N->op_values()) {
1060 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1061 (Const->isOpaque() && NoOpaques))
1067 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1079 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1080 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1091 // (load/store (add, (add, x, offset1), offset2)) ->
1094 // (load/store (add, (add, x, y), offset2)) ->
1116 ScalableOffset = -ScalableOffset;
1117 if (all_of(N->users(), [&](SDNode *Node) {
1119 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1123 EVT VT = LoadStore->getMemoryVT();
1124 unsigned AS = LoadStore->getAddressSpace();
1141 const APInt &C2APIntVal = C2->getAPIntValue();
1149 const APInt &C1APIntVal = C1->getAPIntValue();
1155 for (SDNode *Node : N->users()) {
1163 EVT VT = LoadStore->getMemoryVT();
1164 unsigned AS = LoadStore->getAddressSpace();
1177 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1180 for (SDNode *Node : N->users()) {
1190 EVT VT = LoadStore->getMemoryVT();
1191 unsigned AS = LoadStore->getAddressSpace();
1217 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1222 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1225 N0->getFlags().hasDisjoint());
1231 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1240 // (N00 & N01) & N00 --> N00 & N01
1241 // (N00 & N01) & N01 --> N00 & N01
1242 // (N00 | N01) | N00 --> N00 | N01
1243 // (N00 | N01) | N01 --> N00 | N01
1248 // (N00 ^ N01) ^ N00 --> N01
1251 // (N00 ^ N01) ^ N01 --> N00
1285 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1286 N01->getOpcode() == ISD::SETCC) {
1287 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1288 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1289 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1311 // Floating-point reassociation is not allowed without loose FP math.
1324 // Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1332 N0->hasOneUse() && N1->hasOneUse() &&
1345 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1347 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1349 dbgs() << " and " << NumTo - 1 << " other values\n");
1352 N->getValueType(i) == To[i].getValueType()) &&
1368 if (N->use_empty())
1429 EVT VT = Load->getValueType(0);
1432 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1447 EVT MemVT = LD->getMemoryVT();
1449 : LD->getExtensionType();
1452 LD->getChain(), LD->getBasePtr(),
1453 MemVT, LD->getMemOperand());
1552 Replace0 &= !N0->hasOneUse();
1553 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1560 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1649 // fold (aext (aext x)) -> (aext x)
1650 // fold (aext (zext x)) -> (zext x)
1651 // fold (aext (sext x)) -> (sext x)
1684 EVT MemVT = LD->getMemoryVT();
1686 : LD->getExtensionType();
1688 LD->getChain(), LD->getBasePtr(),
1689 MemVT, LD->getMemOperand());
1692 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1713 if (!N->use_empty())
1723 if (N->use_empty()) {
1724 for (const SDValue &ChildN : N->op_values())
1736 //===----------------------------------------------------------------------===//
1738 //===----------------------------------------------------------------------===//
1773 // If this combine is running after legalizing the DAG, re-legalize any
1786 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1792 for (const SDValue &ChildN : N->op_values())
1813 assert(N->getOpcode() != ISD::DELETED_NODE &&
1819 if (N->getNumValues() == RV->getNumValues())
1822 assert(N->getValueType(0) == RV.getValueType() &&
1823 N->getNumValues() == 1 && "Type mismatch");
1829 // out), because re-visiting the EntryToken and its users will not uncover
1848 // clang-format off
1849 switch (N->getOpcode()) {
2015 // clang-format on
2027 // If nothing happened, try a target-specific DAG combine.
2029 assert(N->getOpcode() != ISD::DELETED_NODE &&
2032 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2033 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2045 switch (N->getOpcode()) {
2074 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2075 SDValue N0 = N->getOperand(0);
2076 SDValue N1 = N->getOperand(1);
2081 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2082 N->getFlags());
2094 if (unsigned NumOps = N->getNumOperands()) {
2095 if (N->getOperand(0).getValueType() == MVT::Other)
2096 return N->getOperand(0);
2097 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2098 return N->getOperand(NumOps-1);
2099 for (unsigned i = 1; i < NumOps-1; ++i)
2100 if (N->getOperand(i).getValueType() == MVT::Other)
2101 return N->getOperand(i);
2107 SDValue Operand = N->getOperand(0);
2122 if (N->getNumOperands() == 2) {
2123 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2124 return N->getOperand(0);
2125 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2126 return N->getOperand(1);
2134 if (N->getNumOperands() > TokenFactorInlineLimit)
2140 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2141 AddToWorklist(*(N->user_begin()));
2168 for (const SDValue &Op : TF->op_values()) {
2196 // Re-visit inlined Token Factors, to clean them up in case they have been
2202 // by walking up chains breath-first stopping when we've seen
2230 // Re-mark worklist from OrigOpNumber to OpNumber
2238 NumLeftToConsider--;
2255 switch (CurNode->getOpcode()) {
2264 for (const SDValue &Op : CurNode->op_values())
2271 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2275 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2278 OpWorkCount[CurOpNumber]--;
2280 NumLeftToConsider--;
2318 SmallVector<SDValue, 8> Ops(N->ops());
2320 } while (!N->use_empty());
2329 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2332 // isTruncateOf - If N is a truncate of some other value, return true, record
2338 if (N->getOpcode() == ISD::TRUNCATE) {
2339 Op = N->getOperand(0);
2341 if (N->getFlags().hasNoUnsignedWrap())
2363 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2365 VT = LD->getMemoryVT();
2366 AS = LD->getAddressSpace();
2368 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2370 VT = ST->getMemoryVT();
2371 AS = ST->getAddressSpace();
2373 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2375 VT = LD->getMemoryVT();
2376 AS = LD->getAddressSpace();
2378 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2380 VT = ST->getMemoryVT();
2381 AS = ST->getAddressSpace();
2387 if (N->getOpcode() == ISD::ADD) {
2389 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2391 // [reg +/- imm]
2392 AM.BaseOffs = Offset->getSExtValue();
2394 // [reg +/- reg]
2396 } else if (N->getOpcode() == ISD::SUB) {
2398 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2400 // [reg +/- imm]
2401 AM.BaseOffs = -Offset->getSExtValue();
2403 // [reg +/- reg]
2414 /// with an identity constant. Codegen improves if we re-use the variable
2420 // is only valid as operand 1 of a non-commutative binop.
2421 SDValue N0 = N->getOperand(0);
2422 SDValue N1 = N->getOperand(1);
2435 unsigned Opcode = N->getOpcode();
2436 EVT VT = N->getValueType(0);
2442 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2444 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2446 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2449 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2450 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2452 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2460 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2464 auto BinOpcode = BO->getOpcode();
2465 EVT VT = BO->getValueType(0);
2470 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2479 SDValue Sel = BO->getOperand(0);
2482 Sel = BO->getOperand(1);
2510 // The exception is "and" and "or" with either 0 or -1 in which case we can
2512 // and (select Cond, 0, -1), X --> select Cond, 0, X
2513 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2519 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2542 // We have a select-of-constants followed by a binary operator with a
2544 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2558 SelectOp->setFlags(BO->getFlags());
2564 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2570 bool IsAdd = N->getOpcode() == ISD::ADD;
2571 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2572 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2588 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2589 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2592 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2593 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2597 // Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2599 SDValue N0 = N->getOperand(0);
2616 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2620 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2625 bool IsAdd = N->getOpcode() == ISD::ADD;
2626 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2627 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2637 // The shift must be moving the sign bit to the least-significant-bit.
2641 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2645 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2646 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2668 SDValue N0 = N->getOperand(0);
2669 SDValue N1 = N->getOperand(1);
2673 // fold (add x, undef) -> undef
2679 // fold (add c1, c2) -> c1+c2
2696 // fold (add x, 0) -> x, vector edition
2701 // fold (add x, 0) -> x
2709 // fold ((A-c1)+c2) -> (A+(c2-c1))
2713 // fold ((c1-A)+c2) -> (c1+c2)-A
2718 // add (sext i1 X), 1 -> zext (not i1 X)
2720 // add (zext i1 X), -1 -> sext (not i1 X)
2734 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2736 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2749 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2752 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2754 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2780 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2788 // fold ((0-A) + B) -> B-A
2792 // fold (A + (0-B)) -> A-B
2796 // fold (A+(B-A)) -> B
2800 // fold ((B-A)+A) -> B
2804 // fold ((A-B)+(C-A)) -> (C-B)
2809 // fold ((A-B)+(B-C)) -> (A-C)
2814 // fold (A+(B-(A+C))) to (B-C)
2815 // fold (A+(B-(C+A))) to (B-C)
2819 // fold (A+((B-A)+or-C)) to (B+or-C)
2825 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2833 // fold (add (umax X, C), -C) --> (usubsat X, C)
2837 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2849 // fold (add (xor a, -1), 1) -> (sub 0, a)
2854 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2873 // sub y, (xor x, -1)
2877 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2878 !N->getFlags().hasNoSignedWrap()))) {
2884 // (x - y) + -1 -> add (xor y, -1), x
2891 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2899 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2903 if (N->getFlags().hasNoUnsignedWrap() &&
2904 N0->getFlags().hasNoUnsignedWrap() &&
2905 N0.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2907 if (N->getFlags().hasNoSignedWrap() &&
2908 N0->getFlags().hasNoSignedWrap() &&
2909 N0.getOperand(0)->getFlags().hasNoSignedWrap())
2916 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2924 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2930 if (N->getFlags().hasNoUnsignedWrap() &&
2931 N0->getFlags().hasNoUnsignedWrap() &&
2932 OMul->getFlags().hasNoUnsignedWrap() &&
2933 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2935 if (N->getFlags().hasNoSignedWrap() &&
2936 N0->getFlags().hasNoSignedWrap() &&
2937 OMul->getFlags().hasNoSignedWrap() &&
2938 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2946 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2962 SDValue N0 = N->getOperand(0);
2981 SDValue N0 = N->getOperand(0);
2982 SDValue N1 = N->getOperand(1);
2999 // fold (a+b) -> (a|b) iff a and b share no bits.
3006 const APInt &C0 = N0->getConstantOperandAPInt(0);
3007 const APInt &C1 = N1->getConstantOperandAPInt(0);
3011 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3015 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3016 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3024 const APInt &C0 = N0->getConstantOperandAPInt(0);
3025 const APInt &C1 = N1->getConstantOperandAPInt(0);
3034 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3035 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3045 unsigned Opcode = N->getOpcode();
3046 SDValue N0 = N->getOperand(0);
3047 SDValue N1 = N->getOperand(1);
3052 // fold (add_sat x, undef) -> -1
3056 // fold (add_sat c1, c2) -> c3
3070 // fold (add_sat x, 0) -> x, vector edition
3075 // fold (add_sat x, 0) -> x
3120 EVT VT = V->getValueType(0);
3136 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3143 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3157 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3158 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3168 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3180 // sub y, (xor x, -1)
3184 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3185 !N0->getFlags().hasNoSignedWrap()))) {
3191 // Hoist one-use subtraction by non-opaque constant:
3192 // (x - C) + y -> (x + y) - C
3193 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3198 // Hoist one-use subtraction from non-opaque constant:
3199 // (C - x) + y -> (y - x) + C
3206 // add (mul x, C), x -> mul x, C+1
3216 // rather than 'add 0/-1' (the zext should get folded).
3217 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3225 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3228 if (TN->getVT() == MVT::i1) {
3235 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3238 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3241 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3252 SDValue N0 = N->getOperand(0);
3253 SDValue N1 = N->getOperand(1);
3258 if (!N->hasAnyUseOfValue(1))
3266 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3268 // fold (addc x, 0) -> x + no carry out
3305 SDValue N0 = N->getOperand(0);
3306 SDValue N1 = N->getOperand(1);
3308 bool IsSigned = (ISD::SADDO == N->getOpcode());
3310 EVT CarryVT = N->getValueType(1);
3314 if (!N->hasAnyUseOfValue(1))
3321 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3323 // fold (addo x, 0) -> x + no carry out
3333 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3335 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3338 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3340 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3343 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3361 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3367 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3371 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3374 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3381 SDValue N0 = N->getOperand(0);
3382 SDValue N1 = N->getOperand(1);
3383 SDValue CarryIn = N->getOperand(2);
3389 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3392 // fold (adde x, y, false) -> (addc x, y)
3394 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3400 SDValue N0 = N->getOperand(0);
3401 SDValue N1 = N->getOperand(1);
3402 SDValue CarryIn = N->getOperand(2);
3409 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3411 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3414 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3415 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3418 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3442 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3491 EVT VT = Carry0->getValueType(1);
3502 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3504 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3579 EVT CarryOutType = N->getValueType(0);
3586 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3611 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3612 Carry1->getValueType(0));
3614 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3620 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3624 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3631 if (N->getOpcode() == ISD::AND)
3638 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3643 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3646 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3650 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3656 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3657 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3678 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3681 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3689 SDValue N0 = N->getOperand(0);
3690 SDValue N1 = N->getOperand(1);
3691 SDValue CarryIn = N->getOperand(2);
3698 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3700 // fold (saddo_carry x, y, false) -> (saddo x, y)
3703 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3704 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3727 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3744 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3747 if (N->getOpcode() != ISD::SUB ||
3751 EVT SubVT = N->getValueType(0);
3752 SDValue Op0 = N->getOperand(0);
3753 SDValue Op1 = N->getOperand(1);
3755 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3775 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3801 // if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3802 // -->
3814 // and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3816 // -->
3822 SDValue N0 = N->getOperand(0);
3842 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
3851 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
3877 SDValue N0 = N->getOperand(0);
3878 SDValue N1 = N->getOperand(1);
3884 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3885 return N->getOperand(0);
3892 // fold (sub x, x) -> 0
3897 // fold (sub c1, c2) -> c3
3906 // fold (sub x, 0) -> x, vector edition
3914 // fold (sub x, c) -> (add x, -c)
3917 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3920 // Right-shifting everything out but the sign bit followed by negation is
3922 // -(X >>u 31) -> (X >>s 31)
3923 // -(X >>s 31) -> (X >>u 31)
3924 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3926 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3927 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3933 // 0 - X --> 0 if the sub is NUW.
3934 if (N->getFlags().hasNoUnsignedWrap())
3940 if (N->getFlags().hasNoSignedWrap())
3943 // 0 - X --> X if X is 0 or the minimum signed value.
3947 // Convert 0 - abs(x).
3954 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
3956 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
3965 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
3970 // Fold neg(splat(neg(x)) -> splat(x)
3979 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3983 // fold (A - (0-B)) -> A+B
3987 // fold A-(A-B) -> B
3991 // fold (A+B)-A -> B
3995 // fold (A+B)-B -> A
3999 // fold (A+C1)-C2 -> A+(C1-C2)
4006 // fold C2-(A+C1) -> (C2-C1)-A
4013 // fold (A-C1)-C2 -> A-(C1+C2)
4020 // fold (c1-A)-c2 -> (c1-c2)-A
4029 // fold ((A+(B+C))-B) -> A+C
4033 // fold ((A+(B-C))-B) -> A-C
4037 // fold ((A-(B-C))-C) -> A-B
4041 // fold (A-(B-C)) -> A+(C-B)
4046 // A - (A & B) -> A & (~B)
4051 // fold (A - (-B * C)) -> (A + (B * C))
4078 // (A - B) - 1 -> add (xor B, -1), A
4083 // sub y, (xor x, -1)
4091 // Hoist one-use addition by non-opaque constant:
4092 // (x + C) - y -> (x - y) + C
4099 // y - (x + C) -> (y - x) - C
4105 // (x - C) - y -> (x - y) - C
4106 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4112 // (C - x) - y -> C - (x + y)
4119 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4121 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4130 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4132 sd_match(N1, m_Sra(m_Value(A), m_SpecificInt(BitWidth - 1))) &&
4139 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4141 if (GA->getGlobal() == GB->getGlobal())
4142 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4146 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4149 if (TN->getVT() == MVT::i1) {
4156 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4159 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4162 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4164 APInt NewStep = -N1.getConstantOperandAPInt(0);
4170 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4174 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4182 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4185 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4189 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4192 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4196 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4208 // sub C0, X --> xor X, C0
4210 if (!C0->isOpaque()) {
4211 const APInt &C0Val = C0->getAPIntValue();
4213 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4218 // smax(a,b) - smin(a,b) --> abds(a,b)
4224 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4230 // umax(a,b) - umin(a,b) --> abdu(a,b)
4236 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4246 unsigned Opcode = N->getOpcode();
4247 SDValue N0 = N->getOperand(0);
4248 SDValue N1 = N->getOperand(1);
4253 // fold (sub_sat x, undef) -> 0
4257 // fold (sub_sat x, x) -> 0
4261 // fold (sub_sat c1, c2) -> c3
4270 // fold (sub_sat x, 0) -> x, vector edition
4275 // fold (sub_sat x, 0) -> x
4287 SDValue N0 = N->getOperand(0);
4288 SDValue N1 = N->getOperand(1);
4293 if (!N->hasAnyUseOfValue(1))
4297 // fold (subc x, x) -> 0 + no borrow
4302 // fold (subc x, 0) -> x + no borrow
4306 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4315 SDValue N0 = N->getOperand(0);
4316 SDValue N1 = N->getOperand(1);
4318 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4320 EVT CarryVT = N->getValueType(1);
4324 if (!N->hasAnyUseOfValue(1))
4328 // fold (subo x, x) -> 0 + no borrow
4333 // fold (subox, c) -> (addo x, -c)
4335 if (IsSigned && !N1C->isMinSignedValue())
4336 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4337 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4339 // fold (subo x, 0) -> x + no borrow
4348 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4357 SDValue N0 = N->getOperand(0);
4358 SDValue N1 = N->getOperand(1);
4359 SDValue CarryIn = N->getOperand(2);
4361 // fold (sube x, y, false) -> (subc x, y)
4363 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4369 SDValue N0 = N->getOperand(0);
4370 SDValue N1 = N->getOperand(1);
4371 SDValue CarryIn = N->getOperand(2);
4373 // fold (usubo_carry x, y, false) -> (usubo x, y)
4376 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4377 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4384 SDValue N0 = N->getOperand(0);
4385 SDValue N1 = N->getOperand(1);
4386 SDValue CarryIn = N->getOperand(2);
4388 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4391 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4392 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4401 SDValue N0 = N->getOperand(0);
4402 SDValue N1 = N->getOperand(1);
4403 SDValue Scale = N->getOperand(2);
4406 // fold (mulfix x, undef, scale) -> 0
4413 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4415 // fold (mulfix x, 0, scale) -> 0
4423 SDValue N0 = N->getOperand(0);
4424 SDValue N1 = N->getOperand(1);
4431 // fold (mul x, undef) -> 0
4435 // fold (mul c1, c2) -> c1*c2
4461 ConstValue1 = N1->getAsAPIntVal();
4462 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4466 // fold (mul x, 0) -> 0
4470 // fold (mul x, 1) -> x
4478 // fold (mul x, -1) -> 0-x
4482 // fold (mul x, (1 << c)) -> x << c
4492 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4494 unsigned Log2Val = (-ConstValue1).logBase2();
4497 // single-use add), we should put the negate there.
4505 // hi result is in use in case we hit this mid-legalization.
4512 if (LoHi->hasAnyUseOfValue(1))
4515 if (LoHi->hasAnyUseOfValue(1))
4522 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4523 // mul x, (2^N + 1) --> add (shl x, N), x
4524 // mul x, (2^N - 1) --> sub (shl x, N), x
4525 // Examples: x * 33 --> (x << 5) + x
4526 // x * 15 --> (x << 4) - x
4527 // x * -33 --> -((x << 5) + x)
4528 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4529 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4530 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4531 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4532 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4533 // x * 0xf800 --> (x << 16) - (x << 11)
4534 // x * -0x8800 --> -((x << 15) + (x << 11))
4535 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4546 if ((MulC - 1).isPowerOf2())
4553 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4556 "multiply-by-constant generated out of bounds shift");
4570 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4577 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4597 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4611 const APInt &C1 = NC1->getAPIntValue();
4624 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4629 m_Mul(m_Or(m_Sra(m_Value(X), m_SpecificInt(BitWidth - 1)), m_One()),
4634 // Fold ((mul x, 0/undef) -> 0,
4635 // (mul x, 1) -> x) -> x)
4636 // -> and(x, mask)
4643 if (!V || V->isZero()) {
4648 return V->isOne();
4667 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4670 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4677 // Simplify the operands using demanded-bits information.
4688 EVT NodeType = Node->getValueType(0);
4705 if (Node->use_empty())
4708 unsigned Opcode = Node->getOpcode();
4712 // DivMod lib calls can still work on non-legal types if using lib-calls.
4713 EVT VT = Node->getValueType(0);
4738 SDValue Op0 = Node->getOperand(0);
4739 SDValue Op1 = Node->getOperand(1);
4741 for (SDNode *User : Op0->users()) {
4742 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4743 User->use_empty())
4746 // otherwise, the DIVREM may get target-legalized into something
4747 // target-specific that we won't be able to recognize.
4748 unsigned UserOpc = User->getOpcode();
4750 User->getOperand(0) == Op0 &&
4751 User->getOperand(1) == Op1) {
4773 SDValue N0 = N->getOperand(0);
4774 SDValue N1 = N->getOperand(1);
4775 EVT VT = N->getValueType(0);
4778 unsigned Opc = N->getOpcode();
4782 // X / undef -> undef
4783 // X % undef -> undef
4784 // X / 0 -> undef
4785 // X % 0 -> undef
4790 // undef / X -> 0
4791 // undef % X -> 0
4795 // 0 / X -> 0
4796 // 0 % X -> 0
4798 if (N0C && N0C->isZero())
4801 // X / X -> 1
4802 // X % X -> 0
4806 // X / 1 -> X
4807 // X % 1 -> 0
4808 // If this is a boolean op (single-bit element type), we can't have
4809 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4810 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4812 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4819 SDValue N0 = N->getOperand(0);
4820 SDValue N1 = N->getOperand(1);
4821 EVT VT = N->getValueType(0);
4825 // fold (sdiv c1, c2) -> c1/c2
4834 // fold (sdiv X, -1) -> 0-X
4836 if (N1C && N1C->isAllOnes())
4839 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4840 if (N1C && N1C->isMinSignedValue())
4852 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4858 // (Dividend - (Quotient * Divisor).
4859 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4870 // sdiv, srem -> sdivrem
4874 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4882 // Helper for determining whether a value is a power-2 constant scalar or a
4885 if (C->isZero() || C->isOpaque())
4887 if (C->getAPIntValue().isPowerOf2())
4889 if (C->getAPIntValue().isNegatedPowerOf2())
4899 EVT VT = N->getValueType(0);
4903 // fold (sdiv X, pow2) -> simple ops after legalize
4905 // better results in that case. The target-specific lowering should learn how
4907 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4908 // Target-specific implementation of sdiv x, pow2.
4923 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4926 // Add (N0 < 0) ? abs2 - 1 : 0;
4934 // Special case: (sdiv X, 1) -> X
4935 // Special Case: (sdiv X, -1) -> 0-X
4948 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4956 // trade-offs.
4959 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4967 SDValue N0 = N->getOperand(0);
4968 SDValue N1 = N->getOperand(1);
4969 EVT VT = N->getValueType(0);
4973 // fold (udiv c1, c2) -> c1/c2
4982 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4984 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4998 // (Dividend - (Quotient * Divisor).
4999 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5010 // sdiv, srem -> sdivrem
5014 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5018 // Simplify the operands using demanded-bits information.
5029 EVT VT = N->getValueType(0);
5031 // fold (udiv x, (1 << c)) -> x >>u c
5043 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5060 // fold (udiv x, c) -> alternate
5063 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5071 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5072 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5073 // Target-specific implementation of srem x, pow2.
5082 unsigned Opcode = N->getOpcode();
5083 SDValue N0 = N->getOperand(0);
5084 SDValue N1 = N->getOperand(1);
5085 EVT VT = N->getValueType(0);
5091 // fold (rem c1, c2) -> c1%c2
5095 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5112 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5117 // fold (urem x, pow2) -> (and x, pow2-1)
5123 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5124 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5138 // If X/C can be simplified by the division-by-constant logic, lower
5139 // X%C to the equivalent of X-X/C*C.
5140 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5157 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5168 // sdiv, srem -> sdivrem
5176 SDValue N0 = N->getOperand(0);
5177 SDValue N1 = N->getOperand(1);
5178 EVT VT = N->getValueType(0);
5188 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5194 // fold (mulhs x, 0) -> 0
5200 // fold (mulhs x, 0) -> 0
5204 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5208 DAG.getShiftAmountConstant(N0.getScalarValueSizeInBits() - 1, VT, DL));
5210 // fold (mulhs x, undef) -> 0
5235 SDValue N0 = N->getOperand(0);
5236 SDValue N1 = N->getOperand(1);
5237 EVT VT = N->getValueType(0);
5247 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5253 // fold (mulhu x, 0) -> 0
5259 // fold (mulhu x, 0) -> 0
5263 // fold (mulhu x, 1) -> 0
5267 // fold (mulhu x, undef) -> 0
5271 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5301 // Simplify the operands using demanded-bits information.
5311 unsigned Opcode = N->getOpcode();
5312 SDValue N0 = N->getOperand(0);
5313 SDValue N1 = N->getOperand(1);
5314 EVT VT = N->getValueType(0);
5325 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5331 // fold (avg x, undef) -> x
5337 // fold (avg x, x) --> x
5341 // fold (avgfloor x, 0) -> x >> 1
5350 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5351 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5367 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5368 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5382 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5383 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5395 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5398 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5407 unsigned Opcode = N->getOpcode();
5408 SDValue N0 = N->getOperand(0);
5409 SDValue N1 = N->getOperand(1);
5410 EVT VT = N->getValueType(0);
5420 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5426 // fold (abd x, undef) -> 0
5430 // fold (abd x, x) -> 0
5436 // fold (abds x, 0) -> abs x
5441 // fold (abdu x, 0) -> x
5445 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5459 bool HiExists = N->hasAnyUseOfValue(1);
5461 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5462 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5467 bool LoExists = N->hasAnyUseOfValue(0);
5469 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5470 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5480 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5490 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5506 SDValue N0 = N->getOperand(0);
5507 SDValue N1 = N->getOperand(1);
5508 EVT VT = N->getValueType(0);
5513 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5518 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5547 SDValue N0 = N->getOperand(0);
5548 SDValue N1 = N->getOperand(1);
5549 EVT VT = N->getValueType(0);
5554 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5559 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5561 // (umul_lohi N0, 0) -> (0, 0)
5567 // (umul_lohi N0, 1) -> (N0, 0)
5597 SDValue N0 = N->getOperand(0);
5598 SDValue N1 = N->getOperand(1);
5600 bool IsSigned = (ISD::SMULO == N->getOpcode());
5602 EVT CarryVT = N->getValueType(1);
5614 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5615 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5623 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5625 // fold (mulo x, 0) -> 0 + no carry out
5630 // (mulo x, 2) -> (addo x, x)
5632 if (N1C && N1C->getAPIntValue() == 2 &&
5635 N->getVTList(), N0, N0);
5654 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5672 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5673 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5692 const fltSemantics &Semantics = InputTy->getFltSemantics();
5718 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5728 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5740 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5743 const APInt &MinC = MinCOp->getAPIntValue();
5744 const APInt &MaxC = MaxCOp->getAPIntValue();
5746 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5780 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5786 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5797 const APInt &C1 = N1C->getAPIntValue();
5798 const APInt &C3 = N3C->getAPIntValue();
5820 SDValue N0 = N->getOperand(0);
5821 SDValue N1 = N->getOperand(1);
5823 unsigned Opcode = N->getOpcode();
5830 // If the operands are the same, this is a no-op.
5845 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5876 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5895 // Simplify the operands using demanded-bits information.
5905 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5907 unsigned LogicOpcode = N->getOpcode();
5919 // Handle size-changing casts (or sign_extend_inreg).
5945 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5952 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5975 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5986 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5996 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5997 // --> OP (logic_op x, y), (logic_op, x1, y1), s
6010 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6029 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6032 // The type-legalizer generates this pattern when loading illegal
6050 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6051 !SVN0->getMask().equals(SVN1->getMask()))
6060 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6064 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6073 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6077 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6098 // If we're here post-legalization or the logic op type is not i1, the logic
6109 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6110 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6125 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6126 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6127 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6128 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6144 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6145 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6146 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6147 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6156 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6172 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6173 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6184 // Match a shared variable operand and 2 non-opaque constant operands.
6188 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6190 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6191 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6194 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6195 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6214 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6215 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6239 // FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6299 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6303 SDValue LHS = LogicOp->getOperand(0);
6304 SDValue RHS = LogicOp->getOperand(1);
6305 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6306 !LHS->hasOneUse() || !RHS->hasOneUse())
6313 SDValue LHS0 = LHS->getOperand(0);
6314 SDValue RHS0 = RHS->getOperand(0);
6315 SDValue LHS1 = LHS->getOperand(1);
6316 SDValue RHS1 = RHS->getOperand(1);
6321 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6322 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6323 EVT VT = LogicOp->getValueType(0);
6328 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6329 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6330 // sequence will be replaced with min-cmp sequence:
6331 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6332 // and and-cmp-cmp will be replaced with max-cmp sequence:
6333 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6394 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6401 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6416 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6418 const APInt &APLhs = LHS1C->getAPIntValue();
6419 const APInt &APRhs = RHS1C->getAPIntValue();
6423 if (APLhs == (-APRhs) &&
6427 // (icmp eq A, C) | (icmp eq A, -C)
6428 // -> (icmp eq Abs(A), C)
6429 // (icmp ne A, C) & (icmp ne A, -C)
6430 // -> (icmp ne Abs(A), C)
6439 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6440 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6442 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6443 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6447 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6448 // -> ~A & smin(C0, C1) == 0
6450 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6451 // -> ~A & smin(C0, C1) != 0
6455 APInt Dif = MaxC - MinC;
6467 DAG.getConstant(-MinC, DL, OpVT));
6480 // Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6515 // fold (and x, undef) -> 0
6523 // and(x, add) -> and(add, x)
6529 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6536 APInt ADDC = ADDI->getAPIntValue();
6537 APInt SRLC = SRLI->getAPIntValue();
6564 if (!AndC->getAPIntValue().isMask())
6567 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6570 EVT LoadedVT = LoadN->getMemoryVT();
6581 if (!LoadN->isSimple())
6584 // Do not generate loads of non-round integer types since these can
6608 // Do not generate loads of non-round integer types since these can
6614 if (!LDST->isSimple())
6617 EVT LdStMemVT = LDST->getMemoryVT();
6632 const Align LDSTAlign = LDST->getAlign();
6635 LDST->getAddressSpace(), NarrowAlign,
6636 LDST->getMemOperand()->getFlags()))
6641 EVT PtrType = LDST->getBasePtr().getValueType();
6653 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6657 // (the value loaded and the chain). Don't transform a pre-increment
6661 if (Load->getNumValues() > 2)
6667 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6668 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6677 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6681 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6694 for (SDValue Op : N->op_values()) {
6700 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6701 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6713 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6717 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6718 ExtVT.bitsGE(Load->getMemoryVT()))
6722 if (ExtVT.bitsLE(Load->getMemoryVT()))
6731 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6734 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6758 if (NodeToMask->getNumValues() > 1) {
6760 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6777 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6781 if (!Mask->getAPIntValue().isMask())
6785 if (isa<LoadSDNode>(N->getOperand(0)))
6795 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6796 SDValue MaskOp = N->getOperand(1);
6801 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6803 FixupNode->getValueType(0),
6812 SDValue Op0 = LogicN->getOperand(0);
6813 SDValue Op1 = LogicN->getOperand(1);
6831 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6832 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6843 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6850 // x & (-1 'logical shift' y)
6855 assert(N->getOpcode() == ISD::AND);
6857 SDValue N0 = N->getOperand(0);
6858 SDValue N1 = N->getOperand(1);
6864 // Try to match (-1 '[outer] logical shift' y)
6868 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6871 OuterShift = M->getOpcode();
6878 if (!isAllOnesConstant(M->getOperand(0)))
6880 Y = M->getOperand(1);
6893 EVT VT = N->getValueType(0);
6907 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6910 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6932 // Match a shift-right by constant.
6946 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6963 // Turn this into a bit-test pattern using mask op + setcc:
6964 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6965 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6971 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6975 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6978 /// For targets that support usubsat, match a bit-hack form of that operation
6981 EVT VT = N->getValueType(0);
6985 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6986 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6991 m_SpecificInt(BitWidth - 1))))) &&
6994 m_SpecificInt(BitWidth - 1))))))
7004 /// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7007 unsigned LogicOpcode = N->getOpcode();
7023 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7024 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7040 EVT VT = N->getValueType(0);
7051 /// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7052 /// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7055 unsigned LogicOpcode = N->getOpcode();
7079 EVT VT = N->getValueType(0);
7085 SDValue N0 = N->getOperand(0);
7086 SDValue N1 = N->getOperand(1);
7090 // x & x --> x
7094 // fold (and c1, c2) -> c1&c2
7111 // fold (and x, 0) -> 0, vector edition
7117 // fold (and x, -1) -> x, vector edition
7124 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7125 EVT LoadVT = MLoad->getMemoryVT();
7133 if (Splat->getAPIntValue().isMask(ElementSize)) {
7135 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7136 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7137 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7138 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7149 // fold (and x, -1) -> x
7166 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7169 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7174 // fold (and (or x, C), D) -> D if (C & D) == D
7176 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7186 APInt Mask = ~N1C->getAPIntValue();
7189 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7193 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7194 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7203 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7209 N0->hasOneUse() && N0Op0->hasOneUse()) {
7222 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7242 Constant = C->getAPIntValue();
7244 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7255 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7279 Load->getValueType(0),
7280 Load->getMemoryVT());
7285 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7288 switch (Load->getExtensionType()) {
7303 if (Load->getExtensionType() == ISD::EXTLOAD) {
7304 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7305 Load->getValueType(0), SDLoc(Load),
7306 Load->getChain(), Load->getBasePtr(),
7307 Load->getOffset(), Load->getMemoryVT(),
7308 Load->getMemOperand());
7310 if (Load->getNumValues() == 3) {
7335 EVT ExtVT = Ext->getValueType(0);
7336 SDValue Extendee = Ext->getOperand(0);
7339 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7351 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7353 EVT MemVT = GN0->getMemoryVT();
7359 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7360 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7363 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7364 GN0->getIndexType(), ISD::ZEXTLOAD);
7373 // fold (and (load x), 255) -> (zextload x, i8)
7374 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7391 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7401 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7402 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7408 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7412 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7417 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7421 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7427 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7429 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7431 // Masking the negated extension of a boolean is just the zero-extended
7433 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7434 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7436 // Note: the SimplifyDemandedBits fold below can make an information-losing
7447 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7448 // fold (and (sra)) -> (and (srl)) when possible.
7452 // fold (zext_inreg (extload x)) -> (zextload x)
7453 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7458 EVT MemVT = LN0->getMemoryVT();
7463 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7465 ((!LegalOperations && LN0->isSimple()) ||
7468 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7469 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7476 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7477 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7496 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7503 if (!C->getAPIntValue().isMask(
7533 EVT VT = N->getValueType(0);
7547 if (!N0->hasOneUse())
7552 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7553 N01C->getZExtValue() != 0xFFFF))
7560 if (!N1->hasOneUse())
7563 if (!N11C || N11C->getZExtValue() != 0xFF)
7573 if (!N0->hasOneUse() || !N1->hasOneUse())
7580 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7584 SDValue N00 = N0->getOperand(0);
7586 if (!N00->hasOneUse())
7589 if (!N001C || N001C->getZExtValue() != 0xFF)
7595 SDValue N10 = N1->getOperand(0);
7597 if (!N10->hasOneUse())
7602 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7603 N101C->getZExtValue() != 0xFFFF))
7616 // If the left-shift isn't masked out then the only way this is a bswap is
7638 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7643 /// Return true if the specified node is an element that makes up a 32-bit
7650 if (!N->hasOneUse())
7672 switch (N1C->getZExtValue()) {
7697 if (!C || C->getZExtValue() != 8)
7705 if (!C || C->getZExtValue() != 8)
7714 if (!C || C->getZExtValue() != 8)
7722 if (!C || C->getZExtValue() != 8)
7741 if (!C || C->getAPIntValue() != 16)
7757 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7764 if (!N0->hasOneUse() || !N1->hasOneUse())
7770 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7771 Mask1->getAPIntValue() != 0x00ff00ff)
7781 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7792 /// Match a 32-bit packed halfword bswap. That is
7802 EVT VT = N->getValueType(0);
7864 // fold (or x, undef) -> -1
7871 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7874 (N0->hasOneUse() || N1->hasOneUse())) {
7883 const APInt &LHSMask = N0O1C->getAPIntValue();
7884 const APInt &RHSMask = N1O1C->getAPIntValue();
7897 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7902 (N0->hasOneUse() || N1->hasOneUse())) {
7919 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7920 return V->getOperand(0);
7930 // fold or (and x, y), x --> x
7934 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7943 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7954 // fold or (xor X, N1), N1 --> or X, N1
7958 // fold or (xor x, y), (x and/or y) --> or x, y
7968 if (V->getOpcode() == ISD::ZERO_EXTEND)
7969 return V->getOperand(0);
7973 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7979 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7985 // Attempt to match a legalized build_pair-esque pattern:
7993 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8009 SDValue N0 = N->getOperand(0);
8010 SDValue N1 = N->getOperand(1);
8014 // x | x --> x
8018 // fold (or c1, c2) -> c1|c2
8032 // fold (or x, 0) -> x, vector edition
8036 // fold (or x, -1) -> -1, vector edition
8041 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8056 SmallVector<int, 4> Mask(NumElts, -1);
8059 int M0 = SV0->getMaskElt(i);
8060 int M1 = SV1->getMaskElt(i);
8079 // We have a zero and non-zero element. If the non-zero came from
8098 // fold (or x, 0) -> x
8102 // fold (or x, -1) -> -1
8109 // fold (or x, c) -> c iff (x & ~c) == 0
8111 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8130 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8133 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8138 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8141 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8143 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8158 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8170 // Simplify the operands using demanded-bits information.
8218 /// (or (add v v) (shrl v bitwidth-1)):
8219 /// expands (add v v) -> (shl v 1)
8222 /// expands (mul v c0) -> (shl (mul v c1) c3)
8225 /// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8228 /// expands (shl v c0) -> (shl (shl v c1) c3)
8231 /// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8250 // (add v v) -> (shl v 1)
8256 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8293 // TODO: We should be able to handle non-uniform constant vectors for these values
8295 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8296 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8297 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8302 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8304 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8306 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8307 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8315 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8316 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8327 // c2 - (bitwidth(op0 v c0) - c1) == c0
8328 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8341 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8357 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8358 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8360 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8363 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8365 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8370 // Neg == EltSize - Pos [B]
8383 // always invokes undefined behavior for 32-bit X.
8385 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8387 // un-demanded bits.
8429 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8442 Width = NegC->getAPIntValue();
8447 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8451 // NegC & Mask == (EltSize - PosC) & Mask
8455 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8463 // EltSize & Mask is 0 since Mask is EltSize - 1.
8469 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8479 // (srl x, (*ext (sub 32, y)))) ->
8483 // (srl x, (*ext y))) ->
8496 // shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8510 // (srl x1, (*ext (sub 32, y)))) ->
8514 // (srl x1, (*ext y))) ->
8529 return Cst && (Cst->getAPIntValue() == Imm);
8533 // -> (fshl x0, x1, y)
8535 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8542 // -> (fshr x0, x1, y)
8544 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8551 // -> (fshr x0, x1, y)
8552 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8554 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8564 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
8572 // We still try to match rotate by constant pre-legalization.
8573 // TODO: Support pre-legalization funnel-shift by constant.
8580 // lowering for rotate, allow matching rotate by non-constants. Only allow
8648 // Something has gone wrong - we've lost the shl/srl pair - bail.
8660 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8686 // TODO: Support pre-legalization funnel-shift by constant.
8712 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8717 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8731 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8732 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8733 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8734 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8751 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8761 // If the shift amount is sign/zext/any-extended just peel it off.
8814 /// However, there is a special case when dealing with vector loads -- we allow
8849 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
8853 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8854 if (Depth == 10)
8859 if (Depth && !Op.hasOneUse() &&
8878 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8882 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8886 if (LHS->isConstantZero())
8888 if (RHS->isConstantZero())
8893 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8897 uint64_t BitShift = ShiftOp->getZExtValue();
8908 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8909 Depth + 1, VectorIndex, Index);
8914 SDValue NarrowOp = Op->getOperand(0);
8925 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8929 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8930 Depth + 1, VectorIndex, StartingIndex);
8932 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8936 VectorIndex = OffsetOp->getZExtValue();
8938 SDValue NarrowOp = Op->getOperand(0);
8959 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8964 if (!L->isSimple() || L->isIndexed())
8967 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8976 return L->getExtensionType() == ISD::ZEXTLOAD
8994 return BW - i - 1;
9009 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9064 // We only handle merging simple stores of 1-4 bytes.
9066 EVT MemVT = N->getMemoryVT();
9068 !N->isSimple() || N->isIndexed())
9072 SDValue Chain = N->getChain();
9085 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9086 Store->isIndexed() || !Store->hasOneUse())
9089 Chain = Store->getChain();
9115 SDValue Trunc = Store->getValue();
9135 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9165 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9187 *FirstStore->getMemOperand(), &Fast);
9199 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9210 // Special-case: check if byte offsets line up for the opposite endian.
9238 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9239 FirstStore->getPointerInfo(), FirstStore->getAlign());
9277 assert(N->getOpcode() == ISD::OR &&
9281 EVT VT = N->getValueType(0);
9291 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9311 for (int i = ByteWidth - 1; i >= 0; --i) {
9318 if (P->isConstantZero()) {
9320 // zero-extend the load.
9321 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9325 assert(P->hasSrc() && "provenance should either be memory or zero");
9326 auto *L = cast<LoadSDNode>(P->Src.value());
9329 SDValue LChain = L->getChain();
9346 if (L->getMemoryVT().isVector()) {
9347 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9350 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9357 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9381 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9407 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9410 // replace it with a single (possibly zero-extended) load and bswap + shift if
9419 // We do not introduce illegal bswaps when zero-extending as this tends to
9435 *FirstLoad->getMemOperand(), &Fast);
9441 Chain, FirstLoad->getBasePtr(),
9442 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9459 // If the target has andn, bsl, or a similar bit-select instruction,
9476 assert(N->getOpcode() == ISD::XOR);
9478 // Don't touch 'not' (i.e. where y = -1).
9479 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9482 EVT VT = N->getValueType(0);
9495 // Don't touch 'not' (i.e. where y = -1).
9508 SDValue N0 = N->getOperand(0);
9509 SDValue N1 = N->getOperand(1);
9559 SDValue N0 = N->getOperand(0);
9560 SDValue N1 = N->getOperand(1);
9564 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9568 // fold (xor x, undef) -> undef
9574 // fold (xor c1, c2) -> c1^c2
9588 // fold (xor x, 0) -> x, vector edition
9593 // fold (xor x, 0) -> x
9601 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9604 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9609 // fold (a^b) -> (a|b) iff a and b share no bits.
9614 // look for 'add-like' folds:
9621 // fold !(x cc y) -> (x !cc y)
9626 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9657 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9668 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9669 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
9681 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9682 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
9695 // fold (not (neg x)) -> (add X, -1)
9696 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9704 // fold (not (add X, -1)) -> (neg X)
9710 // fold (xor (and x, y), y) -> (and (not x), y)
9711 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9718 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9727 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9732 // fold (xor x, x) -> 0
9736 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9748 // - Try to see the operation as placing a single zero in a value of all ones.
9749 // - There exists no value for x which would allow the result to contain zero.
9750 // - Values of x larger than the bitwidth are undefined and do not require a
9752 // - Pushing the zero left requires shifting one bits in from the right.
9760 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9776 // Simplify the expression using non-local knowledge.
9786 /// If we have a shift-by-constant of a bitwise logic op that itself has a
9787 /// shift-by-constant operand with identical opcode, we may be able to convert
9791 // Match a one-use bitwise logic op.
9792 SDValue LogicOp = Shift->getOperand(0);
9801 // Find a matching one-use shift by constant.
9802 unsigned ShiftOpcode = Shift->getOpcode();
9803 SDValue C1 = Shift->getOperand(1);
9806 const APInt &C1Val = C1Node->getAPIntValue();
9818 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9822 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9849 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9851 EVT VT = Shift->getValueType(0);
9852 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9857 LogicOp->getFlags());
9867 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9870 if (isBitwiseNot(N->getOperand(0)))
9873 // The inner binop must be one-use, since we want to replace it.
9874 SDValue LHS = N->getOperand(0);
9878 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9894 if (N->getOpcode() != ISD::SHL)
9913 if (IsCopyOrSelect && N->hasOneUse())
9918 EVT VT = N->getValueType(0);
9920 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9921 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9922 N->getOperand(1));
9930 assert(N->getOpcode() == ISD::TRUNCATE);
9931 assert(N->getOperand(0).getOpcode() == ISD::AND);
9933 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9934 EVT TruncVT = N->getValueType(0);
9935 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9937 SDValue N01 = N->getOperand(0).getOperand(1);
9940 SDValue N00 = N->getOperand(0).getOperand(0);
9954 SDValue N0 = N->getOperand(0);
9955 SDValue N1 = N->getOperand(1);
9956 EVT VT = N->getValueType(0);
9959 // fold (rot x, 0) -> x
9963 // fold (rot x, c) -> x iff (c % BitSize) == 0
9965 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9970 // fold (rot x, c) -> (rot x, c % BitSize)
9973 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9981 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9984 // rot i16 X, 8 --> bswap X
9986 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9990 // Simplify the operands using demanded-bits information.
9994 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9998 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10004 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10010 bool SameSide = (N->getOpcode() == NextOp);
10024 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10033 SDValue N0 = N->getOperand(0);
10034 SDValue N1 = N->getOperand(1);
10043 // fold (shl c1, c2) -> c1<<c2
10053 // If setcc produces all-one true value then:
10054 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10055 if (N1CV && N1CV->isConstant()) {
10057 SDValue N00 = N0->getOperand(0);
10058 SDValue N01 = N0->getOperand(1);
10061 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10079 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10086 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10090 APInt c1 = LHS->getAPIntValue();
10091 APInt c2 = RHS->getAPIntValue();
10100 APInt c1 = LHS->getAPIntValue();
10101 APInt c2 = RHS->getAPIntValue();
10111 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10127 APInt c1 = LHS->getAPIntValue();
10128 APInt c2 = RHS->getAPIntValue();
10130 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10140 APInt c1 = LHS->getAPIntValue();
10141 APInt c2 = RHS->getAPIntValue();
10143 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10156 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10165 APInt c1 = LHS->getAPIntValue();
10166 APInt c2 = RHS->getAPIntValue();
10184 const APInt &LHSC = LHS->getAPIntValue();
10185 const APInt &RHSC = RHS->getAPIntValue();
10190 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10191 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10192 if (N0->getFlags().hasExact()) {
10209 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10211 // Only fold this if the inner shift has no other uses -- if it does,
10240 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10248 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10249 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10261 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10267 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10272 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10287 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10288 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10296 if (N1C && !N1C->isOpaque())
10300 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10321 const APInt &C1 = N1C->getAPIntValue();
10339 // Transform a right shift of a multiply into a multiply-high.
10341 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10342 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10345 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10350 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10355 SDValue ShiftOperand = N->getOperand(0);
10374 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10377 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10381 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10390 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10397 ? Constant->getAPIntValue().getSignificantBits()
10398 : Constant->getAPIntValue().getActiveBits();
10402 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10426 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10449 bool IsSigned = N->getOpcode() == ISD::SRA;
10453 // fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10456 unsigned Opcode = N->getOpcode();
10460 SDValue N0 = N->getOperand(0);
10461 EVT VT = N->getValueType(0);
10490 SDValue N0 = N->getOperand(0);
10491 SDValue N1 = N->getOperand(1);
10499 // fold (sra c1, c2) -> (sra c1, c2)
10503 // Arithmetic shifting an all-sign-bit value is a no-op.
10504 // fold (sra 0, x) -> 0
10505 // fold (sra -1, x) -> -1
10519 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10527 APInt c1 = LHS->getAPIntValue();
10528 APInt c2 = RHS->getAPIntValue();
10532 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10552 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10553 // result_size - n != m.
10562 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10567 // Determine the residual right-shift amount.
10568 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10570 // If the shift is not a no-op (in which case this should be just a sign
10584 N->getValueType(0), Trunc);
10590 // sra (add (shl X, N1C), AddC), N1C -->
10591 // sext (add (trunc X to (width - N1C)), AddC')
10592 // sra (sub AddC, (shl X, N1C)), N1C -->
10593 // sext (sub AddC1',(trunc X to (width - N1C)))
10606 unsigned ShiftAmt = N1C->getZExtValue();
10607 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10612 // implementation and/or target-specific overrides (because
10613 // non-simple types likely require masking when legalized), but
10619 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10633 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10640 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10641 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10643 // TODO - support non-uniform vector shift amounts.
10652 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10653 if (LargeShift->getAPIntValue() == TruncBits) {
10673 if (N1C && !N1C->isOpaque())
10677 // Try to transform this shift into a multiply-high if
10682 // Attempt to convert a sra of a load into a narrower sign-extending load.
10693 SDValue N0 = N->getOperand(0);
10694 SDValue N1 = N->getOperand(1);
10703 // fold (srl c1, c2) -> c1 >>u c2
10721 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10725 APInt c1 = LHS->getAPIntValue();
10726 APInt c2 = RHS->getAPIntValue();
10735 APInt c1 = LHS->getAPIntValue();
10736 APInt c2 = RHS->getAPIntValue();
10749 // TODO - support non-uniform vector shift amounts.
10751 uint64_t c1 = N001C->getZExtValue();
10752 uint64_t c2 = N1C->getZExtValue();
10756 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10767 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10774 OpSizeInBits - c2),
10782 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10785 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10789 const APInt &LHSC = LHS->getAPIntValue();
10790 const APInt &RHSC = RHS->getAPIntValue();
10817 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10818 // TODO - support non-uniform vector shift amounts.
10823 if (N1C->getAPIntValue().uge(BitSize))
10827 uint64_t ShiftAmt = N1C->getZExtValue();
10833 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10840 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10842 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10847 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10851 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10882 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10894 if (N1C && !N1C->isOpaque())
10898 // Attempt to convert a srl of a load into a narrower zero-extending load.
10926 if (N->hasOneUse()) {
10927 SDNode *User = *N->user_begin();
10930 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
10931 User = *User->user_begin();
10933 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
10934 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
10938 // Try to transform this shift into a multiply-high if
10950 EVT VT = N->getValueType(0);
10951 SDValue N0 = N->getOperand(0);
10952 SDValue N1 = N->getOperand(1);
10953 SDValue N2 = N->getOperand(2);
10954 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10958 // fold (fshl N0, N1, 0) -> N0
10959 // fold (fshr N0, N1, 0) -> N1
10962 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10969 // TODO - support non-uniform vector shift amounts.
10973 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10974 if (Cst->getAPIntValue().uge(BitWidth)) {
10975 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10976 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
10980 unsigned ShAmt = Cst->getZExtValue();
10984 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10985 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10986 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10987 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10991 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
10995 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
10997 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10998 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10999 // TODO - bigendian support once we have test coverage.
11000 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11001 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11006 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11007 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11008 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
11013 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11014 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11017 RHS->getAddressSpace(), NewAlign,
11018 RHS->getMemOperand()->getFlags(), &Fast) &&
11021 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11024 VT, DL, RHS->getChain(), NewPtr,
11025 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11026 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11037 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11038 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11042 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11049 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11050 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11053 // non-constant (BW - N2).
11066 SDValue N0 = N->getOperand(0);
11067 SDValue N1 = N->getOperand(1);
11074 // fold (*shlsat c1, c2) -> c1<<c2
11075 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11081 // fold (sshlsat x, c) -> (shl x, c)
11082 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11083 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11086 // fold (ushlsat x, c) -> (shl x, c)
11087 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11088 N1C->getAPIntValue().ule(
11101 EVT SrcVT = N->getValueType(0);
11103 if (N->getOpcode() == ISD::TRUNCATE)
11104 N = N->getOperand(0).getNode();
11106 if (N->getOpcode() != ISD::ABS)
11109 EVT VT = N->getValueType(0);
11110 SDValue AbsOp1 = N->getOperand(0);
11121 // Check if the operands of the sub are (zero|sign)-extended.
11126 // fold (abs (sub nsw x, y)) -> abds(x, y)
11128 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
11138 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11139 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11146 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11147 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11149 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11150 (VT1 == MaxVT || Op1->hasOneUse()) &&
11159 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11160 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11170 SDValue N0 = N->getOperand(0);
11171 EVT VT = N->getValueType(0);
11174 // fold (abs c1) -> c2
11177 // fold (abs (abs x)) -> (abs x)
11180 // fold (abs x) -> x iff not-negative
11187 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11190 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11205 SDValue N0 = N->getOperand(0);
11206 EVT VT = N->getValueType(0);
11209 // fold (bswap c1) -> c2
11212 // fold (bswap (bswap x)) -> x
11216 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11225 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11231 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11232 ShAmt->getZExtValue() >= (BW / 2) &&
11233 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11237 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11246 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11247 // inverse-shift-of-bswap:
11248 // bswap (X u<< C) --> (bswap X) u>> C
11249 // bswap (X u>> C) --> (bswap X) u<< C
11253 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11254 ShAmt->getZExtValue() % 8 == 0) {
11268 SDValue N0 = N->getOperand(0);
11269 EVT VT = N->getValueType(0);
11272 // fold (bitreverse c1) -> c2
11276 // fold (bitreverse (bitreverse x)) -> x
11282 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11287 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11296 SDValue N0 = N->getOperand(0);
11297 EVT VT = N->getValueType(0);
11300 // fold (ctlz c1) -> c2
11313 SDValue N0 = N->getOperand(0);
11314 EVT VT = N->getValueType(0);
11317 // fold (ctlz_zero_undef c1) -> c2
11325 SDValue N0 = N->getOperand(0);
11326 EVT VT = N->getValueType(0);
11329 // fold (cttz c1) -> c2
11342 SDValue N0 = N->getOperand(0);
11343 EVT VT = N->getValueType(0);
11346 // fold (cttz_zero_undef c1) -> c2
11354 SDValue N0 = N->getOperand(0);
11355 EVT VT = N->getValueType(0);
11359 // fold (ctpop c1) -> c2
11367 const APInt &Amt = AmtC->getAPIntValue();
11460 const unsigned Opcode = N->getOpcode();
11462 // Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11467 auto VT = N->getValueType(0);
11497 if (IsUnsigned && !Add->getFlags().hasNoUnsignedWrap())
11500 if (!IsUnsigned && !Add->getFlags().hasNoSignedWrap())
11503 return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
11525 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11547 /// If a (v)select has a condition value that is a sign-bit test, try to smear
11548 /// the condition operand sign-bit across the value width and use it as a mask.
11551 SDValue Cond = N->getOperand(0);
11552 SDValue C1 = N->getOperand(1);
11553 SDValue C2 = N->getOperand(2);
11557 EVT VT = N->getValueType(0);
11562 // The inverted-condition + commuted-select variants of these patterns are
11566 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11569 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11570 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11575 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11576 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11588 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11593 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11603 SDValue Cond = N->getOperand(0);
11604 SDValue N1 = N->getOperand(1);
11605 SDValue N2 = N->getOperand(2);
11606 EVT VT = N->getValueType(0);
11619 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11622 // have an integer-based boolean or a floating-point-based boolean unless we
11633 C1->isZero() && C2->isOne()) {
11644 // Only do this before legalization to avoid conflicting with target-specific
11646 // is also a target-independent combine here in DAGCombiner in the other
11647 // direction for (select Cond, -1, 0) when the condition is not i1.
11650 // select Cond, 1, 0 --> zext (Cond)
11651 if (C1->isOne() && C2->isZero())
11654 // select Cond, -1, 0 --> sext (Cond)
11655 if (C1->isAllOnes() && C2->isZero())
11658 // select Cond, 0, 1 --> zext (!Cond)
11659 if (C1->isZero() && C2->isOne()) {
11665 // select Cond, 0, -1 --> sext (!Cond)
11666 if (C1->isZero() && C2->isAllOnes()) {
11679 const APInt &C1Val = C1->getAPIntValue();
11680 const APInt &C2Val = C2->getAPIntValue();
11682 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11683 if (C1Val - 1 == C2Val) {
11688 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11694 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11702 // select Cond, -1, C --> or (sext Cond), C
11703 if (C1->isAllOnes()) {
11708 // select Cond, C, -1 --> or (sext (not Cond)), C
11709 if (C2->isAllOnes()) {
11724 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11725 N->getOpcode() == ISD::VP_SELECT) &&
11727 SDValue Cond = N->getOperand(0);
11728 SDValue T = N->getOperand(1), F = N->getOperand(2);
11729 EVT VT = N->getValueType(0);
11736 // select Cond, Cond, F --> or Cond, freeze(F)
11737 // select Cond, 1, F --> or Cond, freeze(F)
11741 // select Cond, T, Cond --> and Cond, freeze(T)
11742 // select Cond, T, 0 --> and Cond, freeze(T)
11746 // select Cond, T, 1 --> or (not Cond), freeze(T)
11753 // select Cond, 0, F --> and (not Cond), freeze(F)
11764 SDValue N0 = N->getOperand(0);
11765 SDValue N1 = N->getOperand(1);
11766 SDValue N2 = N->getOperand(2);
11767 EVT VT = N->getValueType(0);
11778 // compare is inverted from that pattern ("Cond0 s> -1").
11786 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
11789 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11794 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
11797 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11804 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
11808 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11816 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
11822 // (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
11823 // (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
11824 // (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
11825 // (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
11869 SDValue N0 = N->getOperand(0);
11870 SDValue N1 = N->getOperand(1);
11871 SDValue N2 = N->getOperand(2);
11872 EVT VT = N->getValueType(0);
11875 SDNodeFlags Flags = N->getFlags();
11883 // select (not Cond), N1, N2 -> select Cond, N2, N1
11886 SelectOp->setFlags(Flags);
11909 // -> select Cond0, (select Cond1, X, Y), Y
11910 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11911 SDValue Cond0 = N0->getOperand(0);
11912 SDValue Cond1 = N0->getOperand(1);
11922 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11923 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11924 SDValue Cond0 = N0->getOperand(0);
11925 SDValue Cond1 = N0->getOperand(1);
11936 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11937 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11938 SDValue N1_0 = N1->getOperand(0);
11939 SDValue N1_1 = N1->getOperand(1);
11940 SDValue N1_2 = N1->getOperand(2);
11955 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11956 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11957 SDValue N2_0 = N2->getOperand(0);
11958 SDValue N2_1 = N2->getOperand(1);
11959 SDValue N2_2 = N2->getOperand(2);
11974 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
11982 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
11996 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11998 // select (fcmp lt x, y), x, y -> fminnum x, y
11999 // select (fcmp gt x, y), x, y -> fmaxnum x, y
12008 // This is conservatively limited to pre-legal-operations to give targets
12017 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12018 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12019 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12024 // %r = select %c, -1, %a
12029 // %r = select %u1, -1, %u0
12041 Flags = N0->getFlags();
12044 SelectNode->setFlags(Flags);
12069 SDValue Cond = N->getOperand(0);
12070 SDValue LHS = N->getOperand(1);
12071 SDValue RHS = N->getOperand(2);
12072 EVT VT = N->getValueType(0);
12080 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12087 // length of the BV and see if all the non-undef nodes are the same.
12090 if (Cond->getOperand(i)->isUndef())
12095 else if (Cond->getOperand(i).getNode() != BottomHalf)
12102 if (Cond->getOperand(i)->isUndef())
12107 else if (Cond->getOperand(i).getNode() != TopHalf)
12116 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12117 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12189 SDValue Mask = MSC->getMask();
12190 SDValue Chain = MSC->getChain();
12191 SDValue Index = MSC->getIndex();
12192 SDValue Scale = MSC->getScale();
12193 SDValue StoreVal = MSC->getValue();
12194 SDValue BasePtr = MSC->getBasePtr();
12195 SDValue VL = MSC->getVectorLength();
12196 ISD::MemIndexType IndexType = MSC->getIndexType();
12203 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12205 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12206 DL, Ops, MSC->getMemOperand(), IndexType);
12211 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12212 DL, Ops, MSC->getMemOperand(), IndexType);
12220 SDValue Mask = MSC->getMask();
12221 SDValue Chain = MSC->getChain();
12222 SDValue Index = MSC->getIndex();
12223 SDValue Scale = MSC->getScale();
12224 SDValue StoreVal = MSC->getValue();
12225 SDValue BasePtr = MSC->getBasePtr();
12226 ISD::MemIndexType IndexType = MSC->getIndexType();
12233 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12235 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12236 DL, Ops, MSC->getMemOperand(), IndexType,
12237 MSC->isTruncatingStore());
12242 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12243 DL, Ops, MSC->getMemOperand(), IndexType,
12244 MSC->isTruncatingStore());
12252 SDValue Mask = MST->getMask();
12253 SDValue Chain = MST->getChain();
12254 SDValue Value = MST->getValue();
12255 SDValue Ptr = MST->getBasePtr();
12264 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12265 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12266 !MST->getBasePtr().isUndef() &&
12267 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12268 MST1->getMemoryVT().getStoreSize()) ||
12270 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12271 MST->getMemoryVT().getStoreSize())) {
12272 CombineTo(MST1, MST1->getChain());
12273 if (N->getOpcode() != ISD::DELETED_NODE)
12281 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12282 !MST->isCompressingStore() && !MST->isTruncatingStore())
12283 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12284 MST->getBasePtr(), MST->getPointerInfo(),
12285 MST->getOriginalAlign(),
12286 MST->getMemOperand()->getFlags(), MST->getAAInfo());
12292 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12295 !cast<ConstantSDNode>(Value)->isOpaque())) {
12298 MST->getMemoryVT().getScalarSizeInBits());
12303 // Re-visit the store if anything changed and the store hasn't been merged
12305 // node back to the worklist if necessary, but we also need to re-visit
12307 if (N->getOpcode() != ISD::DELETED_NODE)
12317 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12318 MST->isUnindexed() && !MST->isCompressingStore() &&
12320 MST->getMemoryVT(), LegalOperations)) {
12321 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12324 MST->getOffset(), Mask, MST->getMemoryVT(),
12325 MST->getMemOperand(), MST->getAddressingMode(),
12334 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12335 // Combine strided stores with unit-stride to a regular VP store.
12336 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12337 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12338 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12339 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12340 SST->getVectorLength(), SST->getMemoryVT(),
12341 SST->getMemOperand(), SST->getAddressingMode(),
12342 SST->isTruncatingStore(), SST->isCompressingStore());
12349 SDValue Vec = N->getOperand(0);
12350 SDValue Mask = N->getOperand(1);
12351 SDValue Passthru = N->getOperand(2);
12398 SDValue Mask = MGT->getMask();
12399 SDValue Chain = MGT->getChain();
12400 SDValue Index = MGT->getIndex();
12401 SDValue Scale = MGT->getScale();
12402 SDValue BasePtr = MGT->getBasePtr();
12403 SDValue VL = MGT->getVectorLength();
12404 ISD::MemIndexType IndexType = MGT->getIndexType();
12407 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12410 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12411 Ops, MGT->getMemOperand(), IndexType);
12414 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12417 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12418 Ops, MGT->getMemOperand(), IndexType);
12426 SDValue Mask = MGT->getMask();
12427 SDValue Chain = MGT->getChain();
12428 SDValue Index = MGT->getIndex();
12429 SDValue Scale = MGT->getScale();
12430 SDValue PassThru = MGT->getPassThru();
12431 SDValue BasePtr = MGT->getBasePtr();
12432 ISD::MemIndexType IndexType = MGT->getIndexType();
12437 return CombineTo(N, PassThru, MGT->getChain());
12439 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12442 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12443 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12446 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12449 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12450 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12458 SDValue Mask = MLD->getMask();
12463 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12467 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12468 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12470 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12471 MLD->getPointerInfo(), MLD->getOriginalAlign(),
12472 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12485 SDValue Chain = HG->getChain();
12486 SDValue Inc = HG->getInc();
12487 SDValue Mask = HG->getMask();
12488 SDValue BasePtr = HG->getBasePtr();
12489 SDValue Index = HG->getIndex();
12492 EVT MemVT = HG->getMemoryVT();
12493 MachineMemOperand *MMO = HG->getMemOperand();
12494 ISD::MemIndexType IndexType = HG->getIndexType();
12500 HG->getScale(), HG->getIntID()};
12501 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL))
12514 EVT EltVT = SLD->getValueType(0).getVectorElementType();
12515 // Combine strided loads with unit-stride to a regular VP load.
12516 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12517 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12519 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12520 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12521 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12522 SLD->getMemOperand(), SLD->isExpandingLoad());
12531 SDValue Cond = N->getOperand(0);
12532 SDValue N1 = N->getOperand(1);
12533 SDValue N2 = N->getOperand(2);
12534 EVT VT = N->getValueType(0);
12558 const APInt &C1 = N1Elt->getAsAPIntVal();
12559 const APInt &C2 = N2Elt->getAsAPIntVal();
12562 if (C1 != C2 - 1)
12566 // Further simplifications for the extra-special cases where the constants are
12567 // all 0 or all -1 should be implemented as folds of these patterns.
12570 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12571 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12577 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12589 // The general case for select-of-constants:
12590 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12592 // leave that to a machine-specific pass.
12597 SDValue N0 = N->getOperand(0);
12598 SDValue N1 = N->getOperand(1);
12599 SDValue N2 = N->getOperand(2);
12612 SDValue N0 = N->getOperand(0);
12613 SDValue N1 = N->getOperand(1);
12614 SDValue N2 = N->getOperand(2);
12615 EVT VT = N->getValueType(0);
12624 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12628 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12629 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12640 // vselect (setg[te] X, 0), X, -X ->
12641 // vselect (setgt X, -1), X, -X ->
12642 // vselect (setl[te] X, 0), -X, X ->
12643 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12646 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12664 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
12671 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12672 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12678 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
12691 // TODO: This could be extended to handle non-loading patterns, but that
12707 // vselect (ext (setcc load(X), C)), N1, N2 -->
12745 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12746 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12757 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12759 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12781 // zext(x) >= y ? trunc(zext(x) - y) : 0
12782 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12783 // zext(x) > y ? trunc(zext(x) - y) : 0
12784 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12802 // x >= y ? x-y : 0 --> usubsat x, y
12803 // x > y ? x-y : 0 --> usubsat x, y
12814 // x > C-1 ? x+-C : 0 --> usubsat x, C
12818 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12831 // x s< 0 ? x^C : 0 --> usubsat x, C
12852 // Fold (vselect all_ones, N1, N2) -> N1
12855 // Fold (vselect all_zeros, N1, N2) -> N2
12883 SDValue N0 = N->getOperand(0);
12884 SDValue N1 = N->getOperand(1);
12885 SDValue N2 = N->getOperand(2);
12886 SDValue N3 = N->getOperand(3);
12887 SDValue N4 = N->getOperand(4);
12888 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12891 // fold select_cc lhs, rhs, x, x, cc -> x
12895 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12905 // cond always true -> true val
12906 // cond always false -> false val
12908 return SCCC->isZero() ? N3 : N2;
12912 if (SCC->isUndef())
12920 SelectOp->setFlags(SCC->getFlags());
12938 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
12940 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12941 EVT VT = N->getValueType(0);
12942 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13008 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13013 return CNode->getAPIntValue();
13022 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13030 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13033 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13038 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13050 NumBits - ShiftCAmt->getZExtValue())
13052 NumBits - ShiftCAmt->getZExtValue());
13069 SDValue LHS = N->getOperand(0);
13070 SDValue RHS = N->getOperand(1);
13071 SDValue Carry = N->getOperand(2);
13072 SDValue Cond = N->getOperand(3);
13076 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13096 ISD::LoadExtType LoadExt = Load->getExtensionType();
13110 /// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13111 /// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13112 /// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13118 unsigned Opcode = N->getOpcode();
13119 SDValue N0 = N->getOperand(0);
13120 EVT VT = N->getValueType(0);
13125 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
13129 SDValue Op1 = N0->getOperand(1);
13130 SDValue Op2 = N0->getOperand(2);
13144 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13145 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13146 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13152 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
13164 unsigned Opcode = N->getOpcode();
13165 SDValue N0 = N->getOperand(0);
13166 EVT VT = N->getValueType(0);
13171 // fold (sext c1) -> c1
13172 // fold (zext c1) -> c1
13173 // fold (aext c1) -> c1
13177 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13178 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13179 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13180 if (N0->getOpcode() == ISD::SELECT) {
13181 SDValue Op1 = N0->getOperand(1);
13182 SDValue Op2 = N0->getOperand(2);
13188 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13190 // -->
13191 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13192 // -->
13197 return DAG.getSelect(DL, VT, N0->getOperand(0),
13203 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13204 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13205 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13213 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13230 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13240 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13241 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13250 for (SDUse &Use : N0->uses()) {
13257 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
13258 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13264 SDValue UseOp = User->getOperand(i);
13279 // Remember if this value is live-out.
13280 if (User->getOpcode() == ISD::CopyToReg)
13286 for (SDUse &Use : N->uses()) {
13287 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
13309 SDValue SOp = SetCC->getOperand(j);
13313 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13316 Ops.push_back(SetCC->getOperand(2));
13317 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13323 SDValue N0 = N->getOperand(0);
13324 EVT DstVT = N->getValueType(0);
13327 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13328 N->getOpcode() == ISD::ZERO_EXTEND) &&
13345 // All legal types, and illegal non-vector types, are handled elsewhere.
13348 if (N0->getOpcode() != ISD::LOAD)
13354 !N0.hasOneUse() || !LN0->isSimple() ||
13360 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13364 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13387 SDValue BasePtr = LN0->getBasePtr();
13392 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
13393 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
13394 SplitSrcVT, LN0->getOriginalAlign(),
13395 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13415 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
13420 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13423 assert(N->getOpcode() == ISD::ZERO_EXTEND);
13424 EVT VT = N->getValueType(0);
13425 EVT OrigVT = N->getOperand(0).getValueType();
13430 SDValue N0 = N->getOperand(0);
13437 SDValue N1 = N0->getOperand(0);
13447 EVT MemVT = Load->getMemoryVT();
13449 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13468 Load->getChain(), Load->getBasePtr(),
13469 Load->getMemoryVT(), Load->getMemOperand());
13486 Load->getValueType(0), ExtLoad);
13501 unsigned CastOpcode = Cast->getOpcode();
13508 // obfuscated by target-specific operations after legalization. Do not create
13510 EVT VT = Cast->getValueType(0);
13514 SDValue VSel = Cast->getOperand(0);
13525 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13532 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13533 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13541 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13542 // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13555 EVT MemVT = LN0->getMemoryVT();
13556 if ((LegalOperations || !LN0->isSimple() ||
13562 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13563 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13566 if (LN0->use_empty())
13571 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13588 for (SDNode *User : N0->users()) {
13589 if (User->getOpcode() == ISD::SETCC) {
13590 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13604 !cast<LoadSDNode>(N0)->isSimple()) &&
13618 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13619 LN0->getBasePtr(), N0.getValueType(),
13620 LN0->getMemOperand());
13644 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13647 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13648 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13655 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13657 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13658 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13659 ExtLoadType, Ld->isExpandingLoad());
13664 // fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13670 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13672 EVT MemoryVT = ALoad->getMemoryVT();
13676 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13681 EVT OrigVT = ALoad->getValueType(0);
13684 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13685 ALoad->getBasePtr(), ALoad->getMemOperand()));
13686 NewALoad->setExtensionType(ExtLoadType);
13697 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13698 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13700 SDValue SetCC = N->getOperand(0);
13707 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13708 EVT VT = N->getValueType(0);
13715 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13716 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13718 unsigned ShCt = VT.getSizeInBits() - 1;
13724 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13732 SDValue N0 = N->getOperand(0);
13738 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13739 EVT VT = N->getValueType(0);
13743 // Propagate fast-math-flags.
13744 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13787 // Match a simple, non-extended load that can be converted to a
13788 // legal {z/s}ext-load.
13789 // TODO: Allow widening of an existing {z/s}ext-load?
13792 cast<LoadSDNode>(V)->isSimple() &&
13796 // Non-chain users of this value must either be the setcc in this
13797 // sequence or extends that can be folded into the new {z/s}ext-load.
13798 for (SDUse &Use : V->uses()) {
13806 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13820 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13821 // Here, T can be 1 or -1, depending on the type of the setcc and
13828 // sext(i1 1), that is, -1.
13856 SDValue N0 = N->getOperand(0);
13857 EVT VT = N->getValueType(0);
13871 // fold (sext (sext x)) -> (sext x)
13872 // fold (sext (aext x)) -> (sext x)
13876 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13877 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13883 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13886 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13895 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13896 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13914 if (N0->getFlags().hasNoSignedWrap() ||
13915 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
13932 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
13936 // fold (sext (truncate x)) -> (sextinreg x).
13974 // fold (sext (and/or/xor (load x), cst)) ->
13981 EVT MemVT = LN00->getMemoryVT();
13983 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13989 LN00->getChain(), LN00->getBasePtr(),
13990 LN00->getMemoryVT(),
13991 LN00->getMemOperand());
14009 LN00->getValueType(0), ExtLoad);
14023 // fold (sext x) -> (zext x) if the sign bit is known zero.
14033 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14042 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14051 // fold sext (not i1 X) -> add (zext i1 X), -1
14058 // Returning N0 is a form of in-visit replacement that may have
14080 /// Given an extending node with a pop-count operand, if the target does not
14081 /// support a pop-count in the narrow source type but does support it in the
14082 /// destination type, widen the pop-count to the destination type.
14084 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14085 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14088 SDValue CtPop = Extend->getOperand(0);
14092 EVT VT = Extend->getValueType(0);
14098 // zext (ctpop X) --> ctpop (zext X)
14106 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14108 EVT VT = Extend->getValueType(0);
14112 SDValue Abs = Extend->getOperand(0);
14131 SDValue N0 = N->getOperand(0);
14132 EVT VT = N->getValueType(0);
14146 // fold (zext (zext x)) -> (zext x)
14147 // fold (zext (aext x)) -> (zext x)
14151 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14155 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14156 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14161 // fold (zext (truncate x)) -> (zext x) or
14162 // (zext (truncate x)) -> (truncate x)
14182 // fold (zext (truncate x)) -> (and x, mask)
14184 // fold (zext (truncate (load x))) -> (zext (smaller load x))
14185 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14199 if (N->getFlags().hasNonNeg()) {
14205 if (N0->getFlags().hasNoSignedWrap() ||
14206 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14230 // possibly over several sub-vectors.
14255 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
14272 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
14290 // fold (zext (and/or/xor (load x), cst)) ->
14299 EVT MemVT = LN00->getMemoryVT();
14301 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
14307 EVT LoadResultTy = AndC->getValueType(0);
14318 LN00->getChain(), LN00->getBasePtr(),
14319 LN00->getMemoryVT(),
14320 LN00->getMemOperand());
14338 LN00->getValueType(0), ExtLoad);
14346 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14360 // Propagate fast-math-flags.
14361 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14376 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14393 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
14400 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14404 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
14414 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
14416 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14422 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
14450 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
14451 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
14460 SDValue N0 = N->getOperand(0);
14461 EVT VT = N->getValueType(0);
14471 // fold (aext (aext x)) -> (aext x)
14472 // fold (aext (zext x)) -> (zext x)
14473 // fold (aext (sext x)) -> (sext x)
14478 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14482 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14483 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14484 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14490 // fold (aext (truncate (load x))) -> (aext (smaller load x))
14491 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14508 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14520 // fold (aext (load x)) -> (aext (truncate (extload x)))
14539 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14540 LN0->getBasePtr(), N0.getValueType(),
14541 LN0->getMemOperand());
14558 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14559 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14560 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14564 ISD::LoadExtType ExtType = LN0->getExtensionType();
14565 EVT MemVT = LN0->getMemoryVT();
14568 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14569 MemVT, LN0->getMemOperand());
14578 // Propagate fast-math-flags.
14579 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14582 // aext(setcc) -> vsetcc
14583 // aext(setcc) -> truncate(vsetcc)
14584 // aext(setcc) -> aext(vsetcc)
14598 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14606 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14610 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14614 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14628 unsigned Opcode = N->getOpcode();
14629 SDValue N0 = N->getOperand(0);
14630 SDValue N1 = N->getOperand(1);
14631 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14633 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14635 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14643 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14644 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14647 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14652 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14662 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14667 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14677 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14678 SDValue N0 = N->getOperand(0);
14680 // Fold (assertalign (assertalign x, AL0), AL1) ->
14684 std::max(AL, AAN->getAlign()));
14717 unsigned Opc = N->getOpcode();
14720 SDValue N0 = N->getOperand(0);
14721 EVT VT = N->getValueType(0);
14734 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14741 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14743 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14750 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14755 ShAmt = N1C->getZExtValue();
14756 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14761 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14766 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14767 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14768 LN->getExtensionType() != ExtType)
14771 // An AND with a constant mask is the same as a truncate + zero-extend.
14772 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14776 const APInt &Mask = AndC->getAPIntValue();
14798 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14799 // check below? And maybe it could be non-profitable to do the transform in
14815 ShAmt = SRL1C->getZExtValue();
14816 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14820 // Because a SRL must be assumed to *need* to zero-extend the high bits
14823 if (LN->getExtensionType() == ISD::SEXTLOAD)
14829 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14831 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14837 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14842 SDNode *Mask = *(SRL->user_begin());
14843 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14844 isa<ConstantSDNode>(Mask->getOperand(1))) {
14846 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14875 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14881 ShLeftAmt = N01->getZExtValue();
14893 if (!LN0->isSimple() ||
14899 LN0->getMemoryVT().getStoreSizeInBits().getFixedValue();
14901 return LVTStoreBits - EVTStoreBits - ShAmt;
14913 DAG.getMemBasePlusOffset(LN0->getBasePtr(), TypeSize::getFixed(PtrOff),
14919 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14920 LN0->getPointerInfo().getWithOffset(PtrOff),
14921 LN0->getOriginalAlign(),
14922 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14924 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14925 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14926 LN0->getOriginalAlign(),
14927 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14962 SDValue N0 = N->getOperand(0);
14963 SDValue N1 = N->getOperand(1);
14964 EVT VT = N->getValueType(0);
14965 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14974 // fold (sext_in_reg c1) -> c1
14983 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14985 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14988 // fold (sext_in_reg (sext x)) -> (sext x)
14989 // fold (sext_in_reg (aext x)) -> (sext x)
15001 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15019 // fold (sext_in_reg (zext x)) -> (sext x)
15028 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15029 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15037 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15038 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15042 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15043 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15044 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15047 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15051 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15057 // fold (sext_inreg (extload x)) -> (sextload x)
15062 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15063 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15068 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15069 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15076 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15078 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15079 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15083 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15084 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15090 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15093 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15094 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15097 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15098 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15099 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15106 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15108 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15110 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
15111 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
15114 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15115 GN0->getIndexType(), ISD::SEXTLOAD);
15134 // -> (extract_subvector (signext iN_v to iM))
15138 EVT InnerExtVT = InnerExt->getValueType(0);
15139 SDValue Extendee = InnerExt->getOperand(0);
15157 unsigned InregOpcode = N->getOpcode();
15160 SDValue Src = N->getOperand(0);
15161 EVT VT = N->getValueType(0);
15169 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15170 // FIXME: one-use check may be overly restrictive
15187 SDValue N0 = N->getOperand(0);
15188 EVT VT = N->getValueType(0);
15194 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
15213 EVT VT = N->getValueType(0);
15214 SDValue N0 = N->getOperand(0);
15305 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
15330 SDValue N0 = N->getOperand(0);
15331 EVT VT = N->getValueType(0);
15340 // fold (truncate (truncate x)) -> (truncate x)
15348 // fold (truncate c1) -> c1
15352 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
15367 // Try to narrow a truncate-of-sext_in_reg to the destination type:
15368 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
15373 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
15381 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
15384 // Fold extract-and-trunc into a narrow extract. For example:
15387 // -- becomes --
15395 N0->hasOneUse()) {
15396 EVT TrTy = N->getValueType(0);
15401 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
15421 SDValue EltNo = Src->getOperand(1);
15423 int Elt = EltNo->getAsZExtVal();
15425 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
15433 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
15447 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
15471 // Attempt to pre-truncate BUILD_VECTOR sources.
15479 for (const SDValue &Op : N0->op_values()) {
15486 // trunc (splat_vector x) -> splat_vector (trunc x)
15492 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
15513 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
15527 // fold (truncate (load x)) -> (smaller load x)
15528 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
15537 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
15539 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
15540 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
15547 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
15562 // Stop if more than one members are non-undef.
15593 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
15600 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
15606 // Simplify the operands using demanded-bits information.
15610 // fold (truncate (extract_subvector(ext x))) ->
15619 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15621 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15629 // Narrow a suitable binary operation with a non-opaque constant operand by
15630 // moving it ahead of the truncate. This is limited to pre-legalization
15643 // TODO: We already restricted this to pre-legalization, but for vectors
15645 // Target-specific changes are likely needed to avoid regressions here.
15655 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15656 // (trunc uaddo_carry(X, Y, Carry)) ->
15662 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15665 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15670 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15688 SDValue Elt = N->getOperand(i);
15694 /// build_pair (load, load) -> load
15697 assert(N->getOpcode() == ISD::BUILD_PAIR);
15709 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15710 LD1->getAddressSpace() != LD2->getAddressSpace())
15714 EVT LD1VT = LD1->getValueType(0);
15719 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15720 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15721 LD1->getPointerInfo(), LD1->getAlign());
15727 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15728 // and Lo parts; on big-endian machines it doesn't.
15735 // IEEE754-compliant FP logic, we're done.
15736 EVT VT = N->getValueType(0);
15737 SDValue N0 = N->getOperand(0);
15780 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15781 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15782 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15786 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15800 SDValue N0 = N->getOperand(0);
15801 EVT VT = N->getValueType(0);
15816 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15817 cast<BuildVectorSDNode>(N0)->isConstant())
15824 // a fp -> int or int -> conversion and that the resulting operation will
15837 // (conv (conv x, t1), t2) -> (conv x, t2)
15841 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15849 V->hasOneUse());
15857 // fold (conv (load x)) -> (load (conv*)x)
15868 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15873 *LN0->getMemOperand())) {
15875 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15876 LN0->getMemOperand());
15885 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15886 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15889 // fold (bitcast (fneg x)) ->
15893 // fold (bitcast (fabs x)) ->
15899 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15937 // fold (bitconvert (fcopysign cst, x)) ->
15943 // fold (bitcast (fcopysign cst, x)) ->
15948 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15968 DAG.getConstant(OrigXWidth-VTWidth, DL,
16011 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16024 // Remove double bitcasts from shuffles - this is often a legacy of
16027 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16029 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16048 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16049 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16056 for (int M : SVN->getMask())
16058 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16070 EVT VT = N->getValueType(0);
16075 SDValue N0 = N->getOperand(0);
16086 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16089 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16090 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16091 // the freeze through to the operands that are not guaranteed non-poison.
16092 // NOTE: we will strip poison-generating flags, so ignore them here.
16095 N0->getNumValues() != 1 || !N0->hasOneUse())
16119 for (const SDValue &Op : N0->op_values())
16128 for (auto [OpNo, Op] : enumerate(N0->ops())) {
16130 /*Depth*/ 1))
16139 // Multiple maybe-poison ops when not allowed - bail out.
16144 // it could create undef or poison due to it's poison-generating flags.
16145 // So not finding any maybe-poison operands is fine.
16158 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
16176 if (N->getOpcode() == ISD::DELETED_NODE)
16180 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
16181 N0 = N->getOperand(0);
16185 SmallVector<SDValue> Ops(N0->ops());
16186 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
16196 SVN->getMask());
16199 R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
16210 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
16219 // type, convert each element. This handles FP<->INT cases.
16222 for (SDValue Op : BV->op_values()) {
16231 BV->getValueType(0).getVectorNumElements());
16267 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
16283 // Returns true if floating point contraction is allowed on the FMUL-SDValue
16289 N->getFlags().hasAllowContract();
16294 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
16300 SDValue N0 = N->getOperand(0);
16301 SDValue N1 = N->getOperand(1);
16302 EVT VT = N->getValueType(0);
16309 // Floating-point multiply-add with intermediate rounding.
16314 // Floating-point multiply-add without intermediate rounding.
16326 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16329 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
16352 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16357 if (N0->use_size() > N1->use_size())
16361 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
16362 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
16367 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
16369 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
16374 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
16375 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
16377 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
16379 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
16383 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16396 SDValue FMul = TmpFMA->getOperand(2);
16407 TmpFMA = TmpFMA->getOperand(2);
16413 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
16426 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
16443 // -> (fma x, y, (fma (fpext u), (fpext v), z))
16467 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
16468 // FIXME: This turns two single-precision and one double-precision
16469 // operation into two double-precision operations, which might not be
16496 // -> (fma y, z, (fma (fpext u), (fpext v), x))
16512 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
16513 // FIXME: This turns two single-precision and one double-precision
16514 // operation into two double-precision operations, which might not be
16537 SDValue N0 = N->getOperand(0);
16538 SDValue N1 = N->getOperand(1);
16539 EVT VT = N->getValueType(0);
16546 // Floating-point multiply-add with intermediate rounding.
16551 // Floating-point multiply-add without intermediate rounding.
16560 const SDNodeFlags Flags = N->getFlags();
16565 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16581 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16584 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16586 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
16594 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16597 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
16609 (N0->use_size() > N1->use_size())) {
16610 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
16613 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
16617 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16620 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16625 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
16627 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
16638 // -> (fma (fpext x), (fpext y), (fneg z))
16653 // -> (fma (fneg (fpext y)), (fpext z), x)
16670 // -> (fneg (fma (fpext x), (fpext y), z))
16673 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16694 // -> (fneg (fma (fpext x)), (fpext y), z)
16697 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16718 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16732 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16734 // -> (fma x, y (fma u, v, (fneg z)))
16737 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16747 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16750 N1->hasOneUse() && NoSignedZero) {
16762 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16763 if (isFusedOp(N0) && N0->hasOneUse()) {
16782 // -> (fma (fpext x), (fpext y),
16784 // FIXME: This turns two single-precision and one double-precision
16785 // operation into two double-precision operations, which might not be
16808 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16810 N1->hasOneUse()) {
16830 // -> (fma (fneg (fpext y)), (fpext z),
16832 // FIXME: This turns two single-precision and one double-precision
16833 // operation into two double-precision operations, which might not be
16866 SDValue N0 = N->getOperand(0);
16867 SDValue N1 = N->getOperand(1);
16868 EVT VT = N->getValueType(0);
16871 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16881 // Floating-point multiply-add without intermediate rounding.
16887 // Floating-point multiply-add with intermediate rounding. This can result
16900 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16901 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16903 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16905 if (C->isExactlyValue(+1.0))
16908 if (C->isExactlyValue(-1.0))
16921 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16922 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16923 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16924 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16926 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16928 if (C0->isExactlyValue(+1.0))
16932 if (C0->isExactlyValue(-1.0))
16938 if (C1->isExactlyValue(+1.0))
16941 if (C1->isExactlyValue(-1.0))
16960 // FADD -> FMA combines:
16970 SDValue N0 = N->getOperand(0);
16971 SDValue N1 = N->getOperand(1);
16974 EVT VT = N->getValueType(0);
16977 SDNodeFlags Flags = N->getFlags();
16980 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16983 // fold (fadd c1, c2) -> c1 + c2
16996 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16998 if (N1C && N1C->isZero())
16999 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
17005 // fold (fadd A, (fneg B)) -> (fsub A, B)
17011 // fold (fadd (fneg A), B) -> (fsub B, A)
17021 return C && C->isExactlyValue(-2.0);
17024 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17030 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17043 // If allowed, fold (fadd (fneg x), x) -> 0.0
17047 // If allowed, fold (fadd x, (fneg x)) -> 0.0
17058 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17073 // (fadd (fmul x, c), x) -> (fmul x, c+1)
17080 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17094 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17101 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17113 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17123 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17131 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17141 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17145 } // enable-unsafe-fp-math
17147 // FADD -> FMA combines:
17157 SDValue Chain = N->getOperand(0);
17158 SDValue N0 = N->getOperand(1);
17159 SDValue N1 = N->getOperand(2);
17160 EVT VT = N->getValueType(0);
17161 EVT ChainVT = N->getValueType(1);
17165 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17173 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
17184 SDValue N0 = N->getOperand(0);
17185 SDValue N1 = N->getOperand(1);
17188 EVT VT = N->getValueType(0);
17191 const SDNodeFlags Flags = N->getFlags();
17194 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17197 // fold (fsub c1, c2) -> c1-c2
17209 // (fsub A, 0) -> A
17210 if (N1CFP && N1CFP->isZero()) {
17211 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
17218 // (fsub x, x) -> 0.0
17223 // (fsub -0.0, N1) -> -N1
17224 if (N0CFP && N0CFP->isZero()) {
17225 if (N0CFP->isNegative() ||
17227 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
17245 // X - (X + Y) -> -Y
17246 if (N0 == N1->getOperand(0))
17247 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
17248 // X - (Y + X) -> -Y
17249 if (N0 == N1->getOperand(1))
17250 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
17253 // fold (fsub A, (fneg B)) -> (fadd A, B)
17258 // FSUB -> FMA combines:
17269 // -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
17271 // -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
17283 EVT VT = N->getValueType(0);
17288 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
17291 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
17292 Pow2Op = N->getOperand(1 - ConstOpIdx);
17308 const APFloat &APF = CFP->getValueAPF();
17319 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
17322 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
17328 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
17367 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
17374 SDValue N0 = N->getOperand(0);
17375 SDValue N1 = N->getOperand(1);
17377 EVT VT = N->getValueType(0);
17380 const SDNodeFlags Flags = N->getFlags();
17383 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17386 // fold (fmul c1, c2) -> c1*c2
17404 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
17418 // Match a special-case: we convert X * 2.0 into fadd.
17419 // fmul (fadd X, X), C -> fmul X, 2.0 * C
17427 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
17433 // fold (fmul X, 2.0) -> (fadd X, X)
17434 if (N1CFP && N1CFP->isExactlyValue(+2.0))
17437 // fold (fmul X, -1.0) -> (fsub -0.0, X)
17438 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
17441 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
17445 // -N0 * -N1 --> N0 * N1
17461 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
17462 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
17477 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
17478 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17495 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
17499 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
17507 // FMUL -> FMA combines:
17513 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
17522 SDValue N0 = N->getOperand(0);
17523 SDValue N1 = N->getOperand(1);
17524 SDValue N2 = N->getOperand(2);
17527 EVT VT = N->getValueType(0);
17536 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
17539 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
17557 if (N0CFP && N0CFP->isZero())
17559 if (N1CFP && N1CFP->isZero())
17564 if (N0CFP && N0CFP->isExactlyValue(1.0))
17566 if (N1CFP && N1CFP->isExactlyValue(1.0))
17569 // Canonicalize (fma c, x, y) -> (fma x, c, y)
17575 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17577 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
17586 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
17596 // (fma x, -1, y) -> (fadd (fneg x), y)
17599 if (N1CFP->isExactlyValue(1.0))
17602 if (N1CFP->isExactlyValue(-1.0) &&
17609 // fma (fneg x), K, y -> fma x -K, y
17613 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
17621 // (fma x, c, x) -> (fmul x, (c+1))
17628 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
17632 DAG.getConstantFP(-1.0, DL, VT)));
17636 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
17637 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
17646 SDValue N0 = N->getOperand(0);
17647 SDValue N1 = N->getOperand(1);
17648 SDValue N2 = N->getOperand(2);
17649 EVT VT = N->getValueType(0);
17661 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17667 // TODO: Limit this transform based on optsize/minsize - it always creates at
17671 const SDNodeFlags Flags = N->getFlags();
17675 // Skip if current node is a reciprocal/fneg-reciprocal.
17676 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17678 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17688 EVT VT = N->getValueType(0);
17692 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17698 for (auto *U : N1->users()) {
17699 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17701 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17702 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17703 U->getFlags().hasAllowReassociation() &&
17704 U->getFlags().hasNoSignedZeros())
17709 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17723 // Dividend / Divisor -> Dividend * Reciprocal
17725 SDValue Dividend = U->getOperand(0);
17731 // In the absence of fast-math-flags, this user node is always the
17740 SDValue N0 = N->getOperand(0);
17741 SDValue N1 = N->getOperand(1);
17742 EVT VT = N->getValueType(0);
17745 SDNodeFlags Flags = N->getFlags();
17748 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17751 // fold (fdiv c1, c2) -> c1/c2
17766 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17770 const APFloat &N1APF = N1CFP->getValueAPF();
17790 // into a target-specific square root estimate instruction.
17825 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17832 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17833 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17846 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17861 // Fold X/Sqrt(X) -> Sqrt(X)
17867 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17890 SDValue N0 = N->getOperand(0);
17891 SDValue N1 = N->getOperand(1);
17892 EVT VT = N->getValueType(0);
17893 SDNodeFlags Flags = N->getFlags();
17897 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17900 // fold (frem c1, c2) -> fmod(c1,c2)
17907 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
17933 SDNodeFlags Flags = N->getFlags();
17942 SDValue N0 = N->getOperand(0);
17948 // transform the fdiv, we may produce a sub-optimal estimate sequence
17954 /// copysign(x, fp_extend(y)) -> copysign(x, y)
17955 /// copysign(x, fp_round(y)) -> copysign(x, y)
17958 // Always fold no-op FP casts.
17973 SDValue N1 = N->getOperand(1);
17977 EVT N1VT = N1->getValueType(0);
17978 EVT N1Op0VT = N1->getOperand(0).getValueType();
17983 SDValue N0 = N->getOperand(0);
17984 SDValue N1 = N->getOperand(1);
17985 EVT VT = N->getValueType(0);
17988 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17992 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17993 const APFloat &V = N1C->getValueAPF();
17994 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17995 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
18006 // copysign(fabs(x), y) -> copysign(x, y)
18007 // copysign(fneg(x), y) -> copysign(x, y)
18008 // copysign(copysign(x,z), y) -> copysign(x, y)
18013 // copysign(x, abs(y)) -> abs(x)
18017 // copysign(x, copysign(y,z)) -> copysign(x, z)
18021 // copysign(x, fp_extend(y)) -> copysign(x, y)
18022 // copysign(x, fp_round(y)) -> copysign(x, y)
18032 // We only take the non-sign bits from the value operand
18041 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18050 EVT VT = N->getValueType(0);
18051 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18052 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18053 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18054 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18055 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
18059 SDNodeFlags Flags = N->getFlags();
18071 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18077 // power-of-2 fractional exponents.
18078 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18079 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18081 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18082 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
18083 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18084 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
18088 SDNodeFlags Flags = N->getFlags();
18104 // pow(X, 0.25) --> sqrt(sqrt(X))
18106 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18110 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18120 // replacing casts with a libcall. We also must be allowed to ignore -0.0
18121 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
18123 // FIXME: We should be able to use node-level FMF here.
18125 EVT VT = N->getValueType(0);
18131 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18132 SDValue N0 = N->getOperand(0);
18133 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18137 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18145 SDValue N0 = N->getOperand(0);
18146 EVT VT = N->getValueType(0);
18154 // fold (sint_to_fp c1) -> c1fp
18155 // ...but only if the target supports immediate floating-point values
18170 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18174 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18177 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18193 SDValue N0 = N->getOperand(0);
18194 EVT VT = N->getValueType(0);
18202 // fold (uint_to_fp c1) -> c1fp
18203 // ...but only if the target supports immediate floating-point values
18217 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
18229 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
18231 SDValue N0 = N->getOperand(0);
18232 EVT VT = N->getValueType(0);
18240 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
18251 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
18272 SDValue N0 = N->getOperand(0);
18273 EVT VT = N->getValueType(0);
18276 // fold (fp_to_sint undef) -> undef
18280 // fold (fp_to_sint c1fp) -> c1
18288 SDValue N0 = N->getOperand(0);
18289 EVT VT = N->getValueType(0);
18292 // fold (fp_to_uint undef) -> undef
18296 // fold (fp_to_uint c1fp) -> c1
18304 SDValue N0 = N->getOperand(0);
18305 EVT VT = N->getValueType(0);
18307 // fold (lrint|llrint undef) -> undef
18308 // fold (lround|llround undef) -> undef
18312 // fold (lrint|llrint c1fp) -> c1
18313 // fold (lround|llround c1fp) -> c1
18315 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
18322 SDValue N0 = N->getOperand(0);
18323 SDValue N1 = N->getOperand(1);
18324 EVT VT = N->getValueType(0);
18327 // fold (fp_round c1fp) -> c1fp
18331 // fold (fp_round (fp_extend x)) -> x
18335 // fold (fp_round (fp_round x)) -> (fp_round x)
18337 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
18340 // Avoid folding legal fp_rounds into non-legal ones.
18348 // instructions from f32 or f64. Moreover, the first (value-preserving)
18356 // single-step fp_round we want to fold to.
18365 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
18370 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
18386 SDValue N0 = N->getOperand(0);
18387 EVT VT = N->getValueType(0);
18395 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
18398 // fold (fp_extend c1fp) -> c1fp
18402 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
18407 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
18417 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
18422 LN0->getChain(),
18423 LN0->getBasePtr(), N0.getValueType(),
18424 LN0->getMemOperand());
18441 SDValue N0 = N->getOperand(0);
18442 EVT VT = N->getValueType(0);
18444 // fold (fceil c1) -> fceil(c1)
18452 SDValue N0 = N->getOperand(0);
18453 EVT VT = N->getValueType(0);
18455 // fold (ftrunc c1) -> ftrunc(c1)
18459 // fold ftrunc (known rounded int x) -> x
18477 SDValue N0 = N->getOperand(0);
18479 // fold (ffrexp c1) -> ffrexp(c1)
18481 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
18486 SDValue N0 = N->getOperand(0);
18487 EVT VT = N->getValueType(0);
18489 // fold (ffloor c1) -> ffloor(c1)
18497 SDValue N0 = N->getOperand(0);
18498 EVT VT = N->getValueType(0);
18509 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
18515 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
18527 SDValue N0 = N->getOperand(0);
18528 SDValue N1 = N->getOperand(1);
18529 EVT VT = N->getValueType(0);
18530 const SDNodeFlags Flags = N->getFlags();
18531 unsigned Opc = N->getOpcode();
18543 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
18546 const APFloat &AF = N1CFP->getValueAPF();
18548 // minnum(X, nan) -> X
18549 // maxnum(X, nan) -> X
18550 // minimum(X, nan) -> nan
18551 // maximum(X, nan) -> nan
18553 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
18558 // minnum(X, -inf) -> -inf
18559 // maxnum(X, +inf) -> +inf
18560 // minimum(X, -inf) -> -inf if nnan
18561 // maximum(X, +inf) -> +inf if nnan
18563 return N->getOperand(1);
18565 // minnum(X, +inf) -> X if nnan
18566 // maxnum(X, -inf) -> X if nnan
18567 // minimum(X, +inf) -> X
18568 // maximum(X, -inf) -> X
18570 return N->getOperand(0);
18585 SDValue N0 = N->getOperand(0);
18586 EVT VT = N->getValueType(0);
18589 // fold (fabs c1) -> fabs(c1)
18593 // fold (fabs (fabs x)) -> (fabs x)
18595 return N->getOperand(0);
18597 // fold (fabs (fneg x)) -> (fabs x)
18598 // fold (fabs (fcopysign x, y)) -> (fabs x)
18609 SDValue Chain = N->getOperand(0);
18610 SDValue N1 = N->getOperand(1);
18611 SDValue N2 = N->getOperand(2);
18615 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
18617 N1->getOperand(0), N2, N->getFlags());
18628 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
18629 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
18630 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
18631 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
18632 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
18639 bool False = (Cond == ISD::SETULT && C->isZero()) ||
18640 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
18641 (Cond == ISD::SETUGT && C->isAllOnes()) ||
18642 (Cond == ISD::SETGT && C->isMaxSignedValue());
18643 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
18644 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
18645 (Cond == ISD::SETUGE && C->isZero()) ||
18646 (Cond == ISD::SETGE && C->isMinSignedValue());
18650 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
18652 S0 = S0->getOperand(0);
18656 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
18658 S1 = S1->getOperand(0);
18666 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
18667 N->getFlags());
18692 ChainHandle.getValue(), NewN1, N2, N->getFlags());
18722 // SRL constant is equal to the log2 of the AND constant. The back-end is
18731 const APInt &AndConst = AndOp1->getAsAPIntVal();
18734 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18744 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18745 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18758 // Returning N is form in-visit replacement that may invalidated
18769 SDValue Op0 = N->getOperand(0);
18770 SDValue Op1 = N->getOperand(1);
18774 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18778 Op0 = N->getOperand(0);
18779 Op1 = N->getOperand(1);
18787 // it would introduce illegal operations post-legalization as this can
18788 // result in infinite looping between converting xor->setcc here, and
18789 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
18802 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18803 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18813 CondLHS, CondRHS, CC->get(), SDLoc(N),
18820 N->getOperand(0), Simp.getOperand(2),
18822 N->getOperand(4));
18831 if (LD->isIndexed())
18833 EVT VT = LD->getMemoryVT();
18836 Ptr = LD->getBasePtr();
18838 if (ST->isIndexed())
18840 EVT VT = ST->getMemoryVT();
18843 Ptr = ST->getBasePtr();
18846 if (LD->isIndexed())
18848 EVT VT = LD->getMemoryVT();
18852 Ptr = LD->getBasePtr();
18855 if (ST->isIndexed())
18857 EVT VT = ST->getMemoryVT();
18861 Ptr = ST->getBasePtr();
18870 /// Try turning a load/store into a pre-indexed load/store when the base
18889 Ptr->hasOneUse())
18899 // Backends without true r+i pre-indexed forms may need to pass a
18912 // Try turning it into a pre-indexed load / store except when:
18927 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18928 : cast<StoreSDNode>(N)->getValue();
18935 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18950 for (SDUse &Use : BasePtr->uses()) {
18960 if (Use.getUser()->getOpcode() != ISD::ADD &&
18961 Use.getUser()->getOpcode() != ISD::SUB) {
18966 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
18987 for (SDNode *User : Ptr->users()) {
19019 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19038 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19040 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
19048 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19049 // indexed load/store and the expression that needs to be re-written.
19052 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19054 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
19055 const APInt &Offset0 = CN->getAPIntValue();
19056 const APInt &Offset1 = Offset->getAsAPIntVal();
19057 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19058 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19059 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19060 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19065 if (X0 < 0) CNV = -CNV;
19067 else CNV = CNV - Offset1;
19072 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19077 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
19096 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19111 for (SDNode *User : BasePtr->users()) {
19131 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
19132 for (SDNode *UserUser : User->users())
19148 Ptr->hasOneUse())
19151 // Try turning it into a post-indexed load / store except when
19158 for (SDNode *Op : Ptr->users()) {
19178 /// post-indexed load/store. The transformation folded the add/subtract into the
19209 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
19229 /// Return the base-pointer arithmetic from an indexed \p LD.
19231 ISD::MemIndexedMode AM = LD->getAddressingMode();
19233 SDValue BP = LD->getOperand(1);
19234 SDValue Inc = LD->getOperand(2);
19240 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
19244 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
19245 ConstInc->getValueType(0));
19259 EVT STMemType = ST->getMemoryVT();
19282 EVT LDMemType = LD->getMemoryVT();
19283 EVT LDType = LD->getValueType(0);
19285 "Attempting to extend value of non-matching type");
19289 switch (LD->getExtensionType()) {
19309 SDValue Chain = LD->getOperand(0);
19313 Chain = Chain->getOperand(0);
19319 for (SDValue Op : Chain->ops()) {
19348 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
19350 SDValue Chain = LD->getOperand(0);
19355 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
19358 EVT LDType = LD->getValueType(0);
19359 EVT LDMemType = LD->getMemoryVT();
19360 EVT STMemType = ST->getMemoryVT();
19361 EVT STType = ST->getValue().getValueType();
19369 // no cost-benefit analysis to prove it's worth it.
19377 // analysis on big-endian platforms it seems better to bail out for now.
19387 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
19389 8 -
19403 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
19404 if (LD->isIndexed()) {
19421 // Simple case: Direct non-truncating forwarding
19423 return ReplaceLd(LD, ST->getValue(), Chain);
19426 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
19432 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
19437 // Handle some cases for big-endian that would be Offset 0 and handled for
19438 // little-endian.
19439 SDValue Val = ST->getValue();
19451 if (LD->getBasePtr().isUndef() || Offset != 0)
19474 if (Val->use_empty())
19481 SDValue Chain = LD->getChain();
19482 SDValue Ptr = LD->getBasePtr();
19488 if (LD->isSimple()) {
19489 if (N->getValueType(1) == MVT::Other) {
19491 if (!N->hasAnyUseOfValue(0)) {
19498 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
19504 if (N->use_empty())
19511 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
19519 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
19520 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
19522 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
19528 Index = DAG.getUNDEF(N->getValueType(1));
19529 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
19548 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
19549 !LD->isAtomic()) {
19551 if (*Alignment > LD->getAlign() &&
19552 isAligned(*Alignment, LD->getSrcValueOffset())) {
19554 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
19555 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
19556 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19564 if (LD->isUnindexed()) {
19565 // Walk up chain skipping non-aliasing memory nodes.
19573 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
19574 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
19575 BetterChain, Ptr, LD->getMemOperand());
19577 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
19578 LD->getValueType(0),
19579 BetterChain, Ptr, LD->getMemoryVT(),
19580 LD->getMemOperand());
19636 EVT TruncType = LS.Inst->getValueType(0);
19639 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19649 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
19650 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
19720 // - Start from the truncated value.
19721 // - Zero extend to the desired bit width.
19722 // - Shift left.
19724 unsigned BitWidth = Origin->getValueSizeInBits(0);
19726 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19728 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19746 LLVMContext &Ctxt = *DAG->getContext();
19752 Align Alignment = Origin->getAlign();
19766 if (!Origin->getOffset().isUndef())
19769 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19782 EVT PtrType = Origin->getBasePtr().getValueType();
19795 EVT TruncateType = Inst->getValueType(0);
19808 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19811 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19812 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19820 Offset = TySizeInBytes - Offset - getLoadedSize();
19827 /// \pre this->Inst && this->Origin are valid Instructions and this
19831 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19832 const SDValue &OldBaseAddr = Origin->getBasePtr();
19841 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19842 DAG->getConstant(Offset, DL, ArithType));
19850 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19851 Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
19852 Origin->getMemOperand()->getFlags());
19855 EVT FinalType = Inst->getValueType(0);
19858 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19867 if (!Inst || !Inst->hasOneUse())
19869 SDNode *User = *Inst->user_begin();
19870 if (User->getOpcode() != ISD::BITCAST)
19873 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19874 EVT ResVT = User->getValueType(0);
19876 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
19878 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
19879 User->getOperand(0)->isDivergent());
19883 // At this point, we know that we perform a cross-register-bank copy.
19885 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
19888 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19894 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19895 Origin->getAddressSpace(), getAlign(),
19896 Origin->getMemOperand()->getFlags(), &IsFast) ||
19905 if (Inst->getValueType(0) != getLoadedType())
19961 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19976 EVT LoadedType = First->getLoadedType();
19979 if (LoadedType != Second->getLoadedType())
19990 if (First->getAlign() < RequiredAlignment)
19998 --GlobalLSCost.Loads;
20059 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20060 !LD->getValueType(0).isInteger())
20066 if (LD->getValueType(0).isScalableVector())
20071 APInt UsedBits(LD->getValueSizeInBits(0), 0);
20078 for (SDUse &U : LD->uses()) {
20087 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20088 isa<ConstantSDNode>(User->getOperand(1))) {
20089 Shift = User->getConstantOperandVal(1);
20090 User = *User->user_begin();
20095 if (User->getOpcode() != ISD::TRUNCATE)
20098 // The width of the type must be a power of 2 and greater than 8-bits.
20100 // Moreover, if we shifted with a non-8-bits multiple, the slice
20102 unsigned Width = User->getValueSizeInBits(0);
20140 assert(SliceInst->getOpcode() == ISD::LOAD &&
20160 if (V->getOpcode() != ISD::AND ||
20161 !isa<ConstantSDNode>(V->getOperand(1)) ||
20162 !ISD::isNormalLoad(V->getOperand(0).getNode()))
20166 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20167 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
20178 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
20191 NotMaskLZ -= 64-V.getValueSizeInBits();
20193 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
20198 default: return Result; // All one mask, or 5-byte mask.
20209 else if (Chain->getOpcode() == ISD::TokenFactor &&
20212 if (!LD->isOperandOf(Chain.getNode()))
20231 SelectionDAG &DAG = DC->getDAG();
20246 if (DC->isTypeLegal(VT))
20255 if (St->isIndexed())
20259 if (St->getMemOperand() &&
20261 *St->getMemOperand()))
20278 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
20280 SDValue Ptr = St->getBasePtr();
20288 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
20289 St->getPointerInfo().getWithOffset(StOffset),
20290 VT, St->getOriginalAlign());
20296 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
20297 St->getPointerInfo().getWithOffset(StOffset),
20298 St->getOriginalAlign());
20307 if (!ST->isSimple())
20310 SDValue Chain = ST->getChain();
20311 SDValue Value = ST->getValue();
20312 SDValue Ptr = ST->getBasePtr();
20315 if (ST->isTruncatingStore() || VT.isVector())
20355 if (LD->getBasePtr() != Ptr ||
20356 LD->getPointerInfo().getAddrSpace() !=
20357 ST->getPointerInfo().getAddrSpace())
20363 APInt Imm = N1->getAsAPIntVal();
20373 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
20374 unsigned NewBW = NextPowerOf2(MSB - LSB);
20389 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
20412 ? VTStoreSize - NewBW - ShAmt
20418 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20420 LD->getAddressSpace(), NewAlign,
20421 LD->getMemOperand()->getFlags(), &IsFast) &&
20432 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20436 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
20437 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
20438 LD->getMemOperand()->getFlags(), LD->getAAInfo());
20443 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
20462 SDValue Value = ST->getValue();
20466 EVT VT = LD->getMemoryVT();
20467 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
20468 LD->isNonTemporal() || ST->isNonTemporal() ||
20469 LD->getPointerInfo().getAddrSpace() != 0 ||
20470 ST->getPointerInfo().getAddrSpace() != 0)
20487 *LD->getMemOperand(), &FastLD) ||
20489 *ST->getMemOperand(), &FastST) ||
20493 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
20494 LD->getBasePtr(), LD->getMemOperand());
20496 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
20497 ST->getBasePtr(), ST->getMemOperand());
20511 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
20529 if (AddNode->hasOneUse() &&
20534 for (SDNode *User : ConstNode->users()) {
20538 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
20543 if (User->getOperand(0) == ConstNode)
20544 OtherOp = User->getOperand(1).getNode();
20546 OtherOp = User->getOperand(0).getNode();
20551 // User = ConstNode * A <-- visiting User. OtherOp is A.
20553 // AddNode = (A + c1) <-- MulVar is A.
20554 // = AddNode * ConstNode <-- current visiting instruction.
20566 // ... = AddNode * ConstNode <-- current visiting instruction.
20569 // User = OtherOp * ConstNode <-- visiting User.
20574 if (OtherOp->getOpcode() == ISD::ADD &&
20575 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
20576 OtherOp->getOperand(0).getNode() == MulVar)
20597 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20598 Chains.push_back(StoreNodes[i].MemNode->getChain());
20608 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
20611 if (MMO->getPseudoValue())
20614 if (!MMO->getValue())
20617 const Value *Obj = getUnderlyingObject(MMO->getValue());
20651 Flags = St->getMemOperand()->getFlags();
20652 AAInfo = St->getAAInfo();
20656 if (Flags != St->getMemOperand()->getFlags())
20659 AAInfo = AAInfo.concat(St->getAAInfo());
20676 SDValue Val = St->getValue();
20691 Val = DAG.getConstant(C->getAPIntValue()
20708 SDValue Val = peekThroughBitcasts(St->getValue());
20752 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20755 SDValue Val = St->getValue();
20759 StoreInt |= C->getAPIntValue()
20763 StoreInt |= C->getValueAPF()
20795 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20797 ? FirstInChain->getPointerInfo()
20798 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20799 FirstInChain->getAlign(), *Flags, AAInfo);
20806 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20809 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20811 ? FirstInChain->getPointerInfo()
20812 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20813 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20835 SDValue Val = peekThroughBitcasts(St->getValue());
20840 EVT MemVT = St->getMemoryVT();
20846 LoadVT = Ld->getMemoryVT();
20851 if (!Ld->hasNUsesOfValue(1, 0))
20855 if (!Ld->isSimple() || Ld->isIndexed())
20859 int64_t &Offset) -> bool {
20862 if (!Other->isSimple() || Other->isIndexed())
20864 // Don't mix temporal stores with non-temporal stores.
20865 if (St->isNonTemporal() != Other->isNonTemporal())
20869 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20871 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20872 : Other->getMemoryVT() != MemVT;
20882 if (LoadVT != OtherLd->getMemoryVT())
20885 if (!OtherLd->hasNUsesOfValue(1, 0))
20889 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20891 // Don't mix temporal loads with non-temporal loads.
20892 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20909 if (Other->isTruncatingStore())
20932 // |-------|-------|
20940 SDNode *RootNode = St->getChain().getNode();
20948 SDNode *RootNode) -> bool {
20951 RootCount->second.first == RootNode &&
20952 RootCount->second.second > StoreMergeDependenceLimit;
20971 RootNode = Ldn->getChain().getNode();
20975 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20977 SDNode *User = I->getUser();
20978 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
20979 for (SDUse &U2 : User->uses())
20983 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
20988 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21019 if (N->getOpcode() == ISD::TokenFactor) {
21020 for (SDValue Op : N->ops())
21031 // * Chain (Op 0) -> We have already considered these
21034 // dependency to a load, that has a non-chain dep to
21037 // of chain and non-chain deps, and we need to include
21039 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21040 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21043 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21044 // non-indexed stores). Not constant on all targets (e.g. ARM)
21046 for (const SDValue &Op : N->op_values())
21088 // non-consecutive store memory address.
21095 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21120 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21121 Align FirstStoreAlign = FirstInChain->getAlign();
21129 SDValue StoredVal = ST->getValue();
21132 IsElementZero = C->isZero();
21134 IsElementZero = C->getConstantFPValue()->isNullValue();
21156 *FirstInChain->getMemOperand(), &IsFast) &&
21169 *FirstInChain->getMemOperand(), &IsFast) &&
21186 *FirstInChain->getMemOperand(), &IsFast) &&
21204 // improved or we've dropped a non-zero value. Drop as many
21209 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21213 NumConsecutiveStores -= NumSkip;
21221 NumConsecutiveStores -= NumElem;
21231 NumConsecutiveStores -= NumElem;
21247 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21248 Align FirstStoreAlign = FirstInChain->getAlign();
21263 *FirstInChain->getMemOperand(), &IsFast) &&
21279 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21283 NumConsecutiveStores -= NumSkip;
21292 NumConsecutiveStores -= NumStoresToMerge;
21301 NumConsecutiveStores -= NumStoresToMerge;
21326 SDValue Val = peekThroughBitcasts(St->getValue());
21352 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
21361 if (Offset0 - Offset1 == ElementSizeBytes &&
21369 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21370 Align FirstStoreAlign = FirstInChain->getAlign();
21374 // non-consecutive load memory address. These variables hold the index in
21385 SDValue LoadChain = FirstLoad->getChain();
21388 if (LoadNodes[i].MemNode->getChain() != LoadChain)
21392 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21396 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
21417 *FirstInChain->getMemOperand(), &IsFastSt) &&
21420 *FirstLoad->getMemOperand(), &IsFastLd) &&
21432 *FirstInChain->getMemOperand(), &IsFastSt) &&
21435 *FirstLoad->getMemOperand(), &IsFastLd) &&
21450 *FirstInChain->getMemOperand(), &IsFastSt) &&
21453 *FirstLoad->getMemOperand(), &IsFastLd) &&
21472 Align FirstLoadAlign = FirstLoad->getAlign();
21484 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
21485 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21489 NumConsecutiveStores -= NumSkip;
21498 NumConsecutiveStores -= NumElem;
21541 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
21542 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
21547 "Unexpected type for rotate-able load pair");
21554 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
21555 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21562 FirstLoad->getChain(), FirstLoad->getBasePtr(),
21563 FirstLoad->getPointerInfo(), JointMemOpVT,
21566 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
21567 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21569 JointMemOpVT, FirstInChain->getAlign(),
21570 FirstInChain->getMemOperand()->getFlags());
21583 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
21585 if (Val->use_empty())
21592 NumConsecutiveStores -= NumElem;
21605 EVT MemVT = St->getMemoryVT();
21611 // This function cannot currently deal with non-byte-sized memory sizes.
21618 SDValue StoredVal = peekThroughBitcasts(St->getValue());
21624 // Find potential store merge candidates by searching through chain sub-DAG
21639 bool IsNonTemporalStore = St->isNonTemporal();
21641 cast<LoadSDNode>(StoredVal)->isNonTemporal();
21646 // case that a non-mergeable store is found first, e.g., {p[-2],
21694 if (ST->isTruncatingStore()) {
21695 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
21696 ST->getBasePtr(), ST->getMemoryVT(),
21697 ST->getMemOperand());
21699 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
21700 ST->getMemOperand());
21705 MVT::Other, ST->getChain(), ReplStore);
21715 SDValue Value = ST->getValue();
21724 SDValue Chain = ST->getChain();
21725 SDValue Ptr = ST->getBasePtr();
21730 // the number of stores. For example, on x86-32 an f64 can be stored in one
21735 switch (CFP->getSimpleValueType(0).SimpleTy) {
21745 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21747 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21750 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21756 ST->isSimple()) ||
21758 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21761 Ptr, ST->getMemOperand());
21764 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21765 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21768 // 64-bit integer store into two 32-bit stores.
21769 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
21775 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21776 AAMDNodes AAInfo = ST->getAAInfo();
21778 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21779 ST->getOriginalAlign(), MMOFlags, AAInfo);
21782 ST->getPointerInfo().getWithOffset(4),
21783 ST->getOriginalAlign(), MMOFlags, AAInfo);
21792 // (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21799 SDValue Value = ST->getValue();
21800 SDValue Ptr = ST->getBasePtr();
21801 SDValue Chain = ST->getChain();
21816 if (!Ld || Ld->getBasePtr() != Ptr ||
21817 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21819 Ld->getAddressSpace() != ST->getAddressSpace() ||
21825 Elt.getValueType(), ST->getAddressSpace(),
21826 ST->getAlign(), ST->getMemOperand()->getFlags(),
21831 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21837 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21839 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21844 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21845 ST->getMemOperand()->getFlags());
21850 SDValue Val = ST->getVal();
21852 EVT MemVT = ST->getMemoryVT();
21868 SDValue Chain = ST->getChain();
21869 SDValue Value = ST->getValue();
21870 SDValue Ptr = ST->getBasePtr();
21874 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21875 ST->isUnindexed()) {
21883 if (((!LegalOperations && ST->isSimple()) ||
21886 DAG, *ST->getMemOperand())) {
21888 ST->getMemOperand());
21892 // Turn 'store undef, Ptr' -> nothing.
21893 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
21897 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21898 !ST->isAtomic()) {
21900 if (*Alignment > ST->getAlign() &&
21901 isAligned(*Alignment, ST->getSrcValueOffset())) {
21903 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21904 ST->getMemoryVT(), *Alignment,
21905 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21922 if (ST->isUnindexed()) {
21923 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21930 Chain = ST->getChain();
21934 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21937 !cast<ConstantSDNode>(Value)->isOpaque())) {
21942 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21943 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21945 ST->getMemOperand());
21949 ST->getMemoryVT().getScalarSizeInBits());
21955 // Re-visit the store if anything changed and the store hasn't been merged
21957 // node back to the worklist if necessary, but we also need to re-visit
21959 if (N->getOpcode() != ISD::DELETED_NODE)
21966 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21969 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21970 ST->getMemOperand());
21975 if (!Cst->isOpaque()) {
21976 const APInt &CValue = Cst->getAPIntValue();
21982 ST->getMemoryVT(), ST->getMemOperand());
21989 // TODO: Add big-endian truncate support with test coverage.
21995 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21996 ST->isUnindexed() && ST->isSimple() &&
21997 Ld->getAddressSpace() == ST->getAddressSpace() &&
22012 if (ST->isUnindexed() && ST->isSimple() &&
22013 ST1->isUnindexed() && ST1->isSimple()) {
22014 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22015 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22016 ST->getAddressSpace() == ST1->getAddressSpace()) {
22022 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
22023 !ST1->getBasePtr().isUndef() &&
22024 ST->getAddressSpace() == ST1->getAddressSpace()) {
22029 if (ST->getMemoryVT().isScalableVector() ||
22030 ST1->getMemoryVT().isScalableVector()) {
22031 if (ST1->getBasePtr() == Ptr &&
22032 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
22033 ST->getMemoryVT().getStoreSize())) {
22034 CombineTo(ST1, ST1->getChain());
22044 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
22046 ST1->getMemoryVT().getFixedSizeInBits())) {
22047 CombineTo(ST1, ST1->getChain());
22059 Value->hasOneUse() && ST->isUnindexed() &&
22061 ST->getMemoryVT(), LegalOperations)) {
22063 Ptr, ST->getMemoryVT(), ST->getMemOperand());
22069 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
22078 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
22087 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
22092 if (isa<ConstantFPSDNode>(ST->getValue())) {
22105 if (!LifetimeEnd->hasOffset())
22108 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
22109 LifetimeEnd->getOffset(), false);
22112 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
22120 Chains.push_back(Chain.getOperand(--Nops));
22132 if (!ST->isSimple() || ST->isIndexed())
22134 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
22142 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
22147 CombineTo(ST, ST->getChain());
22162 /// (shl (zext I to i64), 32)), addr) -->
22166 /// For pair of {i32, i32}, i64 store --> two i32 stores.
22167 /// For pair of {i32, i16}, i64 store --> two i32 stores.
22168 /// For pair of {i16, i16}, i32 store --> two i16 stores.
22169 /// For pair of {i16, i8}, i32 store --> two i16 stores.
22170 /// For pair of {i8, i8}, i16 store --> two i8 stores.
22190 if (!ST->isSimple())
22193 SDValue Val = ST->getValue();
22217 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
22220 // Lo and Hi are zero-extended from int with size less equal than 32
22242 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22243 AAMDNodes AAInfo = ST->getAAInfo();
22250 SDValue Chain = ST->getChain();
22251 SDValue Ptr = ST->getBasePtr();
22253 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22254 ST->getOriginalAlign(), MMOFlags, AAInfo);
22259 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
22260 ST->getOriginalAlign(), MMOFlags, AAInfo);
22267 // --> (vector_shuffle X, Y, NewMask)
22276 // Vec's operand 0 is using indices from 0 to N-1 and
22277 // operand 1 from N to 2N - 1, where N is the number of
22280 int ElementOffset = -1;
22304 for (SDValue Op : reverse(ArgVal->ops())) {
22305 CurrentArgOffset -= Step;
22317 if (ElementOffset == -1) {
22334 // --> (vector_shuffle X, Y) and variations where shuffle operands may be
22337 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22339 SDValue InsertVal = N->getOperand(1);
22340 SDValue Vec = N->getOperand(0);
22346 ArrayRef<int> Mask = SVN->getMask();
22362 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
22367 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22369 SDValue InsertVal = N->getOperand(1);
22376 SDValue DestVec = N->getOperand(0);
22391 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
22426 EVT VT = N->getValueType(0);
22430 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
22435 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
22436 SDValue Scalar = N->getOperand(1);
22437 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
22439 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
22440 (InsIndex == VT.getVectorNumElements() - 1 &&
22459 SDValue Vec = Shuffle->getOperand(0);
22469 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
22470 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
22471 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22472 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22473 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
22480 -1))
22484 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
22490 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
22492 Vec.getValueType(), VecLoad->getAddressSpace(),
22493 NewAlign, VecLoad->getMemOperand()->getFlags(),
22500 SDValue Ptr = ScalarLoad->getBasePtr();
22502 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
22505 InsIndex == 0 ? ScalarLoad->getPointerInfo()
22506 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
22508 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
22509 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
22516 SDValue InVec = N->getOperand(0);
22517 SDValue InVal = N->getOperand(1);
22518 SDValue EltNo = N->getOperand(2);
22524 // Insert into out-of-bounds element is undefined.
22526 IndexC->getZExtValue() >= VT.getVectorNumElements())
22530 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
22537 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
22549 unsigned Elt = IndexC->getZExtValue();
22553 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
22563 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
22591 // vXi1 vector - we don't need to recurse.
22626 // UNDEF - build new BUILD_VECTOR from already inserted operands.
22630 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
22637 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
22643 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
22646 if (CurIdx->getAPIntValue().ult(NumElts)) {
22647 unsigned Idx = CurIdx->getZExtValue();
22654 CurVec = CurVec->getOperand(0);
22658 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
22663 SDValue LHS = SVN->getOperand(0);
22664 SDValue RHS = SVN->getOperand(1);
22665 SmallVector<int, 16> Mask(SVN->getMask());
22685 // TODO: Do this for -1 with OR mask?
22698 // Failed to find a match in the chain - bail.
22726 assert(OriginalLoad->isSimple());
22728 EVT ResultVT = EVE->getValueType(0);
22743 Align Alignment = OriginalLoad->getAlign();
22747 int Elt = ConstEltNo->getZExtValue();
22749 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22754 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22760 OriginalLoad->getAddressSpace(), Alignment,
22761 OriginalLoad->getMemOperand()->getFlags(),
22766 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22778 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22780 OriginalLoad->getMemOperand()->getFlags(),
22781 OriginalLoad->getAAInfo());
22785 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22786 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22787 OriginalLoad->getAAInfo());
22803 SDValue Vec = ExtElt->getOperand(0);
22804 SDValue Index = ExtElt->getOperand(1);
22808 Vec->getNumValues() != 1)
22815 EVT ResVT = ExtElt->getValueType(0);
22820 // Extracting an element of a vector constant is constant-folded, so this
22832 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
22833 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
22839 DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
22840 // We may need to sign- or zero-extend the result to match the same
22870 // We perform this optimization post type-legalization because
22871 // the type-legalizer often scalarizes integer-promoted vectors.
22876 // TODO: Add support for big-endian.
22880 SDValue VecOp = N->getOperand(0);
22885 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22889 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22894 EVT ScalarVT = N->getValueType(0);
22898 // TODO: deal with the cases other than everything being integer-typed.
22923 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22935 for (SDNode *User : E.Producer->users()) {
22936 switch (User->getOpcode()) {
22944 /*NumBits=*/User->getValueSizeInBits(0));
22950 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22951 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22952 // Logical right-shift means that we start extraction later,
22954 unsigned ShAmt = ShAmtC->getZExtValue();
22955 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22965 if (User->getOpcode() != ISD::BUILD_VECTOR)
22989 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
23012 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
23022 SDValue VecOp = N->getOperand(0);
23023 SDValue Index = N->getOperand(1);
23024 EVT ScalarVT = N->getValueType(0);
23029 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
23031 // This only really matters if the index is non-constant since other combines
23041 // (vextract (scalar_to_vector val, 0) -> val
23060 // extract_vector_elt of out-of-bounds element -> UNDEF
23063 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
23066 // extract_vector_elt (build_vector x, y), 1 -> y
23074 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
23104 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
23119 // The vector index of the LSBs of the source depend on the endian-ness.
23121 unsigned ExtractIndex = IndexC->getZExtValue();
23122 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
23123 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
23133 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
23142 BCTruncElt = IsLE ? 0 : Scale - 1;
23153 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
23164 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
23173 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
23176 if (OrigElt == -1)
23185 OrigElt -= NumElts;
23212 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
23213 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23214 Use->getOperand(0) == VecOp &&
23215 isa<ConstantSDNode>(Use->getOperand(1));
23218 for (SDNode *User : VecOp->users()) {
23219 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
23220 if (CstElt->getAPIntValue().ult(NumElts))
23221 DemandedElts.setBit(CstElt->getZExtValue());
23226 if (N->getOpcode() != ISD::DELETED_NODE)
23234 if (N->getOpcode() != ISD::DELETED_NODE)
23266 // extract (vector load $addr), i --> load $addr + i * size
23269 !Index->hasPredecessor(VecOp.getNode())) {
23271 if (VecLoad && VecLoad->isSimple())
23280 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
23281 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
23282 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
23283 int Elt = IndexC->getZExtValue();
23311 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
23323 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
23332 // -> extract_vector_elt a, 0
23334 // -> extract_vector_elt a, 1
23336 // -> extract_vector_elt b, 0
23338 // -> extract_vector_elt b, 1
23351 // Make sure we found a non-volatile load and the extractelement is
23353 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
23356 // If Idx was -1 above, Elt is going to be -1, so just return undef.
23357 if (Elt == -1)
23365 // We perform this optimization post type-legalization because
23366 // the type-legalizer often scalarizes integer-promoted vectors.
23367 // Performing this optimization before may create bit-casts which
23368 // will be type-legalized to complex code sequences.
23374 unsigned NumInScalars = N->getNumOperands();
23376 EVT VT = N->getValueType(0);
23380 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
23381 // optimizations. We do not handle sign-extend because we can't fill the sign
23387 SDValue In = N->getOperand(i);
23446 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
23447 SDValue Cast = N->getOperand(i);
23455 In = Cast->getOperand(0);
23457 (i * ElemRatio + (ElemRatio - 1));
23483 // (trunc (srl $1 half-width))
23484 // (trunc (srl $1 (2 * half-width))))
23487 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23489 EVT VT = N->getValueType(0);
23508 unsigned NumInScalars = N->getNumOperands();
23520 SDValue In = PeekThroughBitcast(N->getOperand(i));
23572 EVT VT = N->getValueType(0);
23588 "Inputs must be sorted to be in non-increasing vector size order.");
23628 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
23676 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
23679 // total number of elements in the shuffle - if we are shuffling a wider
23685 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
23709 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23713 int NumBVOps = BV->getNumOperands();
23714 int ZextElt = -1;
23716 SDValue Op = BV->getOperand(i);
23719 if (ZextElt == -1)
23724 // Bail out if there's no non-undef element.
23725 if (ZextElt == -1)
23729 // one other element. That other element must be a zero-extended scalar
23734 EVT VT = BV->getValueType(0);
23735 SDValue Zext = BV->getOperand(ZextElt);
23742 // The zero-extend must be a multiple of the source size, and we must be
23755 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23761 // that vector (mask value is number-of-elements) for the high bits.
23762 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23768 // the shuffle mask with -1.
23771 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23789 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23798 EVT VT = N->getValueType(0);
23800 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23812 unsigned NumElems = N->getNumOperands();
23815 // that element comes from. -1 stands for undef, 0 for the zero vector,
23820 SmallVector<int, 8> VectorMask(NumElems, -1);
23828 // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
23832 SDValue Op = N->getOperand(i);
23847 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23858 if (ExtractIdx->getAsAPIntVal().uge(
23867 OneConstExtractIndex = ExtractIdx->getZExtValue();
23874 if (Idx == -1) { // A new source vector?
23915 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23948 // while preserving the relative order of equally-sized vectors.
23949 // Note that we keep the first "implicit zero vector as-is.
24009 // Update the vector mask to point to the post-shuffle vectors.
24012 Vec = Shuffles.size() - 1;
24014 Vec = (Vec - 1) / 2;
24039 SmallVector<int, 8> Mask(NumElems, -1);
24046 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
24055 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
24086 EVT VT = N->getValueType(0);
24089 SDValue Op0 = N->getOperand(0);
24090 auto checkElem = [&](SDValue Op) -> int64_t {
24097 return C->getZExtValue();
24098 return -1;
24104 // known-minimum vector length of the result type.
24109 unsigned NumElems = N->getNumOperands();
24120 if ((Offset + i) != checkElem(N->getOperand(i)))
24137 // non-constant-zero op, UNDEF's, and to be KnownBits-based,
24143 // FIXME: support big-endian.
24147 EVT VT = N->getValueType(0);
24148 EVT OpVT = N->getOperand(0).getValueType();
24164 for (auto I : enumerate(N->ops())) {
24169 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
24174 // Profitability check: don't allow non-zero constant operands.
24177 // Profitability check: there must only be a single non-zero operand,
24181 // The operand must be a zero-extension itself.
24187 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
24194 // This BUILD_VECTOR must have at least one non-constant-zero operand.
24208 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
24215 Scale * N->getNumOperands());
24233 for (auto I : enumerate(N->ops())) {
24244 NewOps.append(*Factor - 1, ZeroOp);
24253 EVT VT = N->getValueType(0);
24262 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
24266 // TODO: Maybe this is useful for non-splat too?
24268 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
24275 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
24279 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
24289 if (!LegalTypes && (N->getNumOperands() > 1)) {
24290 SDValue Op0 = N->getOperand(0);
24291 auto checkElem = [&](SDValue Op) -> uint64_t {
24295 return CNode->getZExtValue();
24296 return -1;
24300 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
24301 if (Offset + i != checkElem(N->getOperand(i))) {
24302 Offset = -1;
24308 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
24310 if ((Offset != -1) &&
24311 ((Offset % N->getValueType(0).getVectorNumElements()) ==
24313 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
24335 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
24345 EVT OpVT = N->getOperand(0).getValueType();
24352 EVT VT = N->getValueType(0);
24359 for (const SDValue &Op : N->ops()) {
24399 // --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
24402 EVT VT = N->getValueType(0);
24407 for (const SDValue &Op : N->ops()) {
24422 assert(FirstConcat && "Concat of all-undefs found");
24425 for (const SDValue &Op : N->ops()) {
24427 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
24430 ConcatOps.append(Op->op_begin(), Op->op_end());
24440 EVT VT = N->getValueType(0);
24441 EVT OpVT = N->getOperand(0).getValueType();
24453 for (SDValue Op : N->ops()) {
24458 Mask.append((unsigned)NumOpElts, -1);
24476 Mask.append((unsigned)NumOpElts, -1);
24514 unsigned CastOpcode = N->getOperand(0).getOpcode();
24531 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
24538 for (SDValue Op : N->ops()) {
24548 EVT VT = N->getValueType(0);
24550 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
24570 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
24582 EVT VT = N->getValueType(0);
24583 EVT OpVT = N->getOperand(0).getValueType();
24587 // For now, only allow simple 2-operand concatenations.
24588 if (N->getNumOperands() != 2)
24603 for (SDValue Op : N->ops()) {
24605 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
24606 all_of(N->ops(), [CurSVN](SDValue Op) {
24609 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
24621 AdjustedMask.reserve(SVN->getMask().size());
24622 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
24623 append_range(AdjustedMask, SVN->getMask());
24639 for (SDValue Op : N->ops()) {
24645 if (Op == SVN->getOperand(0)) {
24649 if (Op == SVN->getOperand(1)) {
24663 for (auto I : zip(SVN->ops(), ShufOps)) {
24669 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
24681 if (N->getNumOperands() == 1)
24682 return N->getOperand(0);
24685 EVT VT = N->getValueType(0);
24690 if (all_of(drop_begin(N->ops()),
24692 SDValue In = N->getOperand(0);
24704 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
24705 SmallVector<SDValue, 4> Ops(In->ops());
24712 // concat_vectors(scalar_to_vector(scalar), undef) ->
24721 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
24725 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
24726 if (Scalar->getOpcode() == ISD::TRUNCATE &&
24728 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
24729 Scalar = Scalar->getOperand(0);
24756 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
24760 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
24769 for (const SDValue &Op : N->ops())
24778 for (const SDValue &Op : N->ops()) {
24788 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24832 N->getOperand(0).getValueType().getVectorMinNumElements();
24834 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24835 SDValue Op = N->getOperand(i);
24854 if (SingleSource.getValueType() != N->getValueType(0))
24880 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24881 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24891 SDValue BinOp = Extract->getOperand(0);
24893 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24901 SDValue Index = Extract->getOperand(1);
24902 EVT SubVT = Extract->getValueType(0);
24917 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24919 BinOp->getFlags());
24933 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24940 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24942 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24945 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24947 // with fneg in a target-specific way.
24950 if (C && C->getValueAPF().isNegZero())
24962 EVT VT = Extract->getValueType(0);
24963 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24994 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24995 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
25003 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
25013 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
25014 // flavors, but no other 256-bit integer support. This could be extended to
25022 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
25032 // half-sized operand for our new narrow binop:
25033 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
25034 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
25035 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
25055 /// (extract_subvector (load wide vector)) --> (load narrow vector)
25057 // TODO: Add support for big-endian. The offset calculation must be adjusted.
25061 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
25062 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
25065 // Allow targets to opt-out.
25066 EVT VT = Extract->getValueType(0);
25072 unsigned Index = Extract->getConstantOperandVal(1);
25076 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
25088 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
25092 // we are extracting from something besides index 0 (little-endian).
25096 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
25103 MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
25104 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
25106 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
25109 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
25125 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
25128 SDValue N0 = N->getOperand(0);
25130 // Only deal with non-scalable vectors.
25131 EVT NarrowVT = N->getValueType(0);
25142 if (!WideShuffleVector->hasOneUse())
25150 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
25163 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
25165 assert((M >= -1) && (M < (2 * WideNumElts)) &&
25166 "Out-of-bounds shuffle mask?");
25182 // And which NumEltsExtracted-sized subvector of that operand is that?
25194 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
25198 NewMask.emplace_back(-1);
25243 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
25269 EVT NVT = N->getValueType(0);
25270 SDValue V = N->getOperand(0);
25271 uint64_t ExtIdx = N->getConstantOperandVal(1);
25283 // ext (ext X, C), 0 --> ext X, C
25293 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
25300 // --> extract_subvector(y,c2-c1)
25309 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
25313 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
25317 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
25380 // extract_subvec (concat V1, V2, ...), i --> Vi
25387 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
25391 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
25427 SDValue Src = V->getOperand(IdxVal);
25435 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
25463 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
25464 N->getOperand(1));
25482 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
25490 ArrayRef<int> Mask = Shuf->getMask();
25491 EVT VT = Shuf->getValueType(0);
25494 SmallVector<int, 16> Mask0(HalfNumElts, -1);
25495 SmallVector<int, 16> Mask1(HalfNumElts, -1);
25497 if (Mask[i] == -1)
25502 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
25506 Mask1[i - HalfNumElts] = M;
25517 // shuffle (concat X, undef), (concat Y, undef), Mask -->
25529 EVT VT = N->getValueType(0);
25532 SDValue N0 = N->getOperand(0);
25533 SDValue N1 = N->getOperand(1);
25535 ArrayRef<int> Mask = SVN->getMask();
25542 auto IsUndefMaskElt = [](int i) { return i == -1; };
25558 // subvector-sized copies from a concatenated vector
25569 int OpIdx = -1;
25585 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
25591 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25594 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
25604 // We don't fold shuffles where one side is a non-zero constant, and we don't
25605 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
25606 // non-constant operands. This seems to work out reasonably well in practice.
25610 EVT VT = SVN->getValueType(0);
25612 SDValue N0 = SVN->getOperand(0);
25613 SDValue N1 = SVN->getOperand(1);
25615 if (!N0->hasOneUse())
25621 if (!N1->hasOneUse())
25638 if (SDValue Splat0 = BV0->getSplatValue())
25639 IsSplat = (Splat0 == BV1->getSplatValue());
25643 for (int M : SVN->getMask()) {
25646 int Idx = M < (int)NumElts ? M : M - NumElts;
25654 // Operand can't be combined - bail out.
25659 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
25661 // generate low-quality code if the target can't reconstruct an appropriate
25687 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
25696 // TODO Add support for big-endian when we have a test case.
25704 // power-of-2 extensions as they are the most likely.
25727 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
25732 EVT VT = SVN->getValueType(0);
25735 // TODO Add support for big-endian when we have a test case.
25739 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
25741 Mask = SVN->getMask()](unsigned Scale) {
25753 SDValue N0 = SVN->getOperand(0);
25755 // are pre-legalization.
25765 // e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25771 EVT VT = SVN->getValueType(0);
25776 // TODO: add support for big-endian when we have a test case.
25781 SmallVector<int, 16> Mask(SVN->getMask());
25787 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25801 // Element-wise(!), which of these demanded elements are know to be zero?
25803 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25815 Indice = -2; // Zeroable element.
25827 // The shuffle may be more fine-grained than we want. Widen elements first.
25846 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25847 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25854 // Analyze the shuffle mask in Scale-sized chunks.
25859 // FIXME: undef should be fine, but that results in more-defined result.
25863 // FIXME: undef should be fine, but that results in more-defined result.
25865 [](int Indice) { return Indice == -2; }))
25874 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25894 EVT VT = SVN->getValueType(0);
25897 // TODO Add support for big-endian when we have a test case.
25901 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
25908 ArrayRef<int> Mask = SVN->getMask();
25918 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25919 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25920 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25946 // Combine shuffles of splat-shuffles of the form:
25947 // shuffle (shuffle V, undef, splat-mask), undef, M
25948 // If splat-mask contains undef elements, we need to be careful about
25953 EVT VT = Shuf->getValueType(0);
25956 if (!Shuf->getOperand(1).isUndef())
25959 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25961 // FIXME: this can be done per-operand.
25962 if (!Shuf->isSplat()) {
25964 for (int Idx : Shuf->getMask()) {
25967 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25972 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25974 // Which lowest demanded element is *not* known-undef?
25976 for (int Idx : Shuf->getMask()) {
25982 return DAG.getUNDEF(VT); // All undef - result is undef.
25984 SmallVector<int, 8> SplatMask(Shuf->getMask());
25988 // Otherwise, just pick the lowest demanded non-undef element.
25989 // Or sentinel undef, if we know we'd pick a known-undef element.
25990 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25992 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25993 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25994 Shuf->getOperand(1), SplatMask);
26001 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
26002 return Shuf->getOperand(0);
26004 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26005 if (!Splat || !Splat->isSplat())
26008 ArrayRef<int> ShufMask = Shuf->getMask();
26009 ArrayRef<int> SplatMask = Splat->getMask();
26012 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
26013 // every undef mask element in the splat-shuffle has a corresponding undef
26014 // element in the user-shuffle's mask or if the composition of mask elements
26017 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
26018 // In this case it is not legal to simplify to the splat-shuffle because we
26021 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
26023 // simplify to the splat-shuffle.
26024 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
26027 // the splat-shuffle.
26031 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
26032 SplatMask[UserMask[i]] != -1)
26037 return Shuf->getOperand(0);
26043 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
26045 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
26046 Splat->getOperand(0), Splat->getOperand(1),
26056 SDValue Op0 = SVN->getOperand(0);
26057 SDValue Op1 = SVN->getOperand(1);
26058 EVT VT = SVN->getValueType(0);
26080 ArrayRef<int> Mask = SVN->getMask();
26098 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
26101 if (!OuterShuf->getOperand(1).isUndef())
26103 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
26104 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
26107 ArrayRef<int> OuterMask = OuterShuf->getMask();
26108 ArrayRef<int> InnerMask = InnerShuf->getMask();
26111 SmallVector<int, 32> CombinedMask(NumElts, -1);
26112 int SplatIndex = -1;
26116 if (OuterMaskElt == -1)
26121 if (InnerMaskElt == -1)
26125 if (SplatIndex == -1)
26128 // Non-matching index - this is not a splat.
26134 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
26135 getSplatIndex(CombinedMask) != -1) &&
26139 EVT VT = OuterShuf->getValueType(0);
26140 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
26144 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
26145 InnerShuf->getOperand(1), CombinedMask);
26151 /// from the first operand. Otherwise, return -1.
26154 int EltFromOp0 = -1;
26156 // Should we ignore undefs in the shuffle mask instead? The trade-off is
26162 if (EltFromOp0 != -1)
26163 return -1;
26167 return -1;
26180 ArrayRef<int> Mask = Shuf->getMask();
26182 SDValue Op0 = Shuf->getOperand(0);
26183 SDValue Op1 = Shuf->getOperand(1);
26185 if (ShufOp0Index == -1) {
26189 if (ShufOp0Index == -1)
26211 // this to a scalar-to-vector plus shuffle.
26217 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
26232 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26233 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
26236 ArrayRef<int> Mask = Shuf->getMask();
26237 ArrayRef<int> Mask0 = Shuf0->getMask();
26240 if (Mask[i] == -1)
26251 return Shuf->getOperand(0);
26255 EVT VT = N->getValueType(0);
26258 SDValue N0 = N->getOperand(0);
26259 SDValue N1 = N->getOperand(1);
26263 // Canonicalize shuffle undef, undef -> undef
26269 // Canonicalize shuffle v, v -> v, undef
26272 createUnaryMask(SVN->getMask(), NumElts));
26274 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
26283 int Idx = SVN->getMaskElt(i);
26285 Idx = -1;
26306 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
26307 int SplatIndex = SVN->getSplatIndex();
26309 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
26310 // splat (vector_bo L, R), Index -->
26319 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
26325 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
26326 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
26334 if (Idx->getAPIntValue() == SplatIndex)
26359 if (V->getOpcode() == ISD::BITCAST) {
26360 SDValue ConvInput = V->getOperand(0);
26366 if (V->getOpcode() == ISD::BUILD_VECTOR) {
26367 assert(V->getNumOperands() == NumElts &&
26372 if (!V->getOperand(i).isUndef()) {
26373 Base = V->getOperand(i);
26381 if (V->getOperand(i) != Base) {
26392 SDValue Splatted = V->getOperand(SplatIndex);
26397 if (SVN->getMaskElt(i) < 0)
26401 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
26405 if (V->getValueType(0) != VT)
26439 // only low-half elements of a concat with undef:
26440 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
26447 int Idx = SVN->getMaskElt(i);
26450 Idx -= HalfNumElts;
26465 // --> insert_subvector(lhs,rhs1,4).
26514 ArrayRef<int> Mask = SVN->getMask();
26527 // shuffle into a AND node, with all the out-of-lane elements are known zero.
26530 ArrayRef<int> Mask = SVN->getMask();
26531 SmallVector<int, 16> ClearMask(NumElts, -1);
26554 // original type, incase the value is split into two (eg i64->i32).
26580 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26596 EVT InnerVT = BC0->getValueType(0);
26613 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
26614 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
26619 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
26622 SDValue SV0 = BC0->getOperand(0);
26623 SDValue SV1 = BC0->getOperand(1);
26648 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
26649 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
26654 SmallVectorImpl<int> &Mask) -> bool {
26657 if (OtherSVN->isSplat())
26664 int Idx = SVN->getMaskElt(i);
26672 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
26678 Idx = OtherSVN->getMaskElt(Idx);
26684 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
26685 : OtherSVN->getOperand(1);
26693 Mask.push_back(-1);
26715 // Last chance - see if the vector is another shuffle and if it
26718 int InnerIdx = CurrentSVN->getMaskElt(Idx);
26720 Mask.push_back(-1);
26724 ? CurrentSVN->getOperand(0)
26725 : CurrentSVN->getOperand(1);
26727 Mask.push_back(-1);
26749 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26750 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26751 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26752 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
26753 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
26754 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
26765 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
26766 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
26767 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
26772 assert(N1->getOperand(0).getValueType() == VT &&
26775 SDValue SV0 = N1->getOperand(0);
26776 SDValue SV1 = N1->getOperand(1);
26785 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26788 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26789 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26794 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26795 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26796 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26801 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26802 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26805 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26806 assert(OtherSV->getOperand(0).getValueType() == VT &&
26811 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26829 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26831 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26857 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26860 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26864 // Ensure we don't increase the number of shuffles - we must merge a
26873 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26884 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26917 EVT VT = N->getValueType(0);
26926 SDValue Scalar = N->getOperand(0);
26929 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26933 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26934 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26937 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26940 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26941 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26953 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26954 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26984 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26985 SmallVector<int, 8> Mask(SrcNumElts, -1);
26986 Mask[0] = ExtIndexC->getZExtValue();
27010 EVT VT = N->getValueType(0);
27011 SDValue N0 = N->getOperand(0);
27012 SDValue N1 = N->getOperand(1);
27013 SDValue N2 = N->getOperand(2);
27014 uint64_t InsIdx = N->getConstantOperandVal(2);
27044 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
27050 // insert_subvector undef, (splat X), N2 -> splat X
27057 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
27071 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
27089 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
27097 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
27107 // -> bitcast(insert_subvector(v, s, c2))
27145 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
27166 SmallVector<SDValue, 8> Ops(N0->ops());
27179 SDValue N0 = N->getOperand(0);
27181 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
27182 if (N0->getOpcode() == ISD::FP16_TO_FP)
27183 return N0->getOperand(0);
27189 auto Op = N->getOpcode();
27192 SDValue N0 = N->getOperand(0);
27194 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
27195 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27196 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
27198 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
27199 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
27206 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
27207 N->getValueType(0), {N0});
27212 SDValue N0 = N->getOperand(0);
27214 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
27215 if (N0->getOpcode() == ISD::BF16_TO_FP)
27216 return N0->getOperand(0);
27222 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27227 SDValue N0 = N->getOperand(0);
27229 unsigned Opcode = N->getOpcode();
27231 // VECREDUCE over 1-element vector is just an extract.
27237 if (Res.getValueType() != N->getValueType(0))
27238 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
27250 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
27253 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
27254 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
27263 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
27266 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
27277 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
27285 // FSUB -> FMA combines:
27295 if (N->getOpcode() == ISD::VP_GATHER)
27299 if (N->getOpcode() == ISD::VP_SCATTER)
27303 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
27307 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
27311 // VP operations in which all vector elements are disabled - either by
27312 // determining that the mask is all false or that the EVL is 0 - can be
27315 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
27316 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
27317 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
27319 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
27323 switch (N->getOpcode()) {
27343 if (ISD::isVPBinaryOp(N->getOpcode()))
27344 return DAG.getUNDEF(N->getValueType(0));
27349 if (MemSD->writeMem())
27350 return MemSD->getChain();
27351 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
27355 if (ISD::isVPReduction(N->getOpcode()))
27356 return N->getOperand(0);
27362 SDValue Chain = N->getOperand(0);
27363 SDValue Ptr = N->getOperand(1);
27364 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27369 for (auto *U : Ptr->users()) {
27380 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27381 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27382 !LdNode->getChain().reachesChainWithoutSideEffects(SDValue(N, 0)))
27387 for (SDUse &U : LdNode->uses()) {
27398 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27399 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27400 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27405 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
27406 StNode->getMemOperand());
27412 SDValue Chain = N->getOperand(0);
27413 SDValue Ptr = N->getOperand(1);
27414 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27418 for (auto *U : Ptr->users()) {
27429 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27430 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27436 SDValue StValue = StNode->getValue();
27438 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27439 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27440 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27446 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
27447 LdNode->getMemOperand());
27456 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
27458 EVT VT = N->getValueType(0);
27459 SDValue LHS = N->getOperand(0);
27460 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
27476 // all zeros or all ones - suitable for shuffle masking.
27486 // X & undef --> 0 (not undef). So this lane must be converted to choose
27487 // from the zero constant vector (same as if the element had all 0-bits).
27495 Bits = Cst->getAPIntValue();
27497 Bits = CstFP->getValueAPF().bitcastToAPInt();
27503 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
27544 SDValue N0 = N->getOperand(0);
27545 SDValue N1 = N->getOperand(1);
27546 unsigned Opcode = N->getOpcode();
27547 EVT VT = N->getValueType(0);
27579 // constant or undef. Avoid splatting which would over-define potentially
27582 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
27589 EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
27596 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
27598 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
27604 EVT VT = N->getValueType(0);
27607 unsigned Opcode = N->getOpcode();
27609 SDValue N0 = N->getOperand(0);
27625 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
27637 EVT VT = N->getValueType(0);
27640 SDValue LHS = N->getOperand(0);
27641 SDValue RHS = N->getOperand(1);
27642 unsigned Opcode = N->getOpcode();
27643 SDNodeFlags Flags = N->getFlags();
27647 // --> shuffle (VBinOp A, B), Undef, Mask
27650 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
27655 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
27661 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
27666 // undefined elements because that could be poison-unsafe or inhibit
27669 // load-folding or other target-specific behaviors.
27670 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
27671 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
27672 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27673 // binop (splat X), (splat C) --> splat (binop X, C)
27674 SDValue X = Shuf0->getOperand(0);
27677 Shuf0->getMask());
27679 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
27680 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
27681 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27682 // binop (splat C), (splat X) --> splat (binop C, X)
27683 SDValue X = Shuf1->getOperand(0);
27686 Shuf1->getMask());
27693 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
27716 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
27725 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
27756 cast<CondCodeSDNode>(N0.getOperand(2))->get());
27765 const SDNodeFlags Flags = N0->getFlags();
27773 SelectNode->setFlags(Flags);
27785 /// should return the appropriate thing (e.g. the node) back to the top-level of
27789 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27792 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
27799 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
27800 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
27801 CmpLHS = TheSelect->getOperand(0);
27802 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
27805 SDValue Cmp = TheSelect->getOperand(0);
27807 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
27812 if (Zero && Zero->isZero() &&
27815 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27822 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27843 !LLD->isSimple() || !RLD->isSimple() ||
27846 LLD->isIndexed() || RLD->isIndexed() ||
27848 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27850 (LLD->getExtensionType() != RLD->getExtensionType() &&
27852 LLD->getExtensionType() != ISD::EXTLOAD &&
27853 RLD->getExtensionType() != ISD::EXTLOAD) ||
27855 // over-conservative. It would be beneficial to be able to remember
27859 LLD->getPointerInfo().getAddrSpace() != 0 ||
27860 RLD->getPointerInfo().getAddrSpace() != 0 ||
27863 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
27864 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
27865 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27866 LLD->getBasePtr().getValueType()))
27870 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27892 if (TheSelect->getOpcode() == ISD::SELECT) {
27898 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27901 if ((LLD->hasAnyUseOfValue(1) &&
27903 (RLD->hasAnyUseOfValue(1) &&
27908 LLD->getBasePtr().getValueType(),
27909 TheSelect->getOperand(0), LLD->getBasePtr(),
27910 RLD->getBasePtr());
27918 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27919 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27923 if ((LLD->hasAnyUseOfValue(1) &&
27925 (RLD->hasAnyUseOfValue(1) &&
27930 LLD->getBasePtr().getValueType(),
27931 TheSelect->getOperand(0),
27932 TheSelect->getOperand(1),
27933 LLD->getBasePtr(), RLD->getBasePtr(),
27934 TheSelect->getOperand(4));
27941 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27942 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27943 if (!RLD->isInvariant())
27945 if (!RLD->isDereferenceable())
27947 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27949 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27950 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27955 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
27956 : LLD->getExtensionType(),
27957 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27958 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27965 // old-load value is dead now.
27981 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27982 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27992 // (X > -1) ? A : 0
27993 // (X > 0) ? X : 0 <-- This is canonical signed max.
27998 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
28005 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
28008 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
28009 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
28027 unsigned ShCt = XType.getSizeInBits() - 1;
28046 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
28048 SDValue N0 = N->getOperand(0);
28049 SDValue N1 = N->getOperand(1);
28050 SDValue N2 = N->getOperand(2);
28062 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
28067 SDVTList OpVTs = N1->getVTList();
28070 // --> binop(select(cond, x, z), y)
28076 NewBinOp->setFlags(N1->getFlags());
28077 NewBinOp->intersectFlagsWith(N2->getFlags());
28082 // --> binop(x, select(cond, y, z))
28091 NewBinOp->setFlags(N1->getFlags());
28092 NewBinOp->intersectFlagsWith(N2->getFlags());
28103 SDValue N0 = N->getOperand(0);
28104 EVT VT = N->getValueType(0);
28105 bool IsFabs = N->getOpcode() == ISD::FABS;
28118 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
28119 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
28161 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
28162 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
28167 if (!TV->hasOneUse() && !FV->hasOneUse())
28170 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
28171 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
28172 Type *FPTy = Elts[0]->getType();
28179 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
28184 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
28193 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
28203 // (x ? y : y) -> y.
28217 // fold select_cc true, x, y -> x
28218 // fold select_cc false, x, y -> y
28219 return !(SCCC->isZero()) ? N2 : N3;
28230 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
28233 // when the condition can be materialized as an all-ones register. Any
28234 // single bit-test can be materialized as an all-ones register with
28235 // shift-left and shift-right-arith.
28236 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
28237 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
28238 SDValue AndLHS = N0->getOperand(0);
28239 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
28240 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
28242 const APInt &AndMask = ConstAndRHS->getAPIntValue();
28244 unsigned ShCt = AndMask.getBitWidth() - 1;
28250 // either all-ones, or zero.
28259 // fold select C, 16, 0 -> shl C, 4
28260 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
28261 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
28275 if (NotExtCompare && N2C->isOne())
28291 if (N2C->isOne())
28294 unsigned ShCt = N2C->getAPIntValue().logBase2();
28304 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
28305 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
28306 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
28307 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
28308 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
28309 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
28310 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
28311 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
28312 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
28320 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
28339 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
28340 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
28342 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
28343 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
28344 (N1C->isZero() && CC == ISD::SETLT)) &&
28345 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
28348 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
28395 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28400 if (C->isZero())
28404 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
28436 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28441 if (C->isZero())
28445 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
28461 // This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
28463 // `Op` is non-zero).
28465 SDValue Op, unsigned Depth,
28487 // Helper for determining whether a value is a power-2 constant scalar or a
28491 if (C->isZero() || C->isOpaque())
28494 if (C->getAPIntValue().isPowerOf2()) {
28495 Pow2Constants.emplace_back(C->getAPIntValue());
28516 if (Depth >= DAG.MaxRecursionDepth)
28531 // log2(X << Y) -> log2(X) + Y
28533 // 1 << Y and X nuw/nsw << Y are all non-zero.
28534 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
28535 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
28537 Depth + 1, AssumeNonZero))
28542 // c ? X : Y -> c ? Log2(X) : Log2(Y)
28546 Depth + 1, AssumeNonZero))
28548 Depth + 1, AssumeNonZero))
28552 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
28553 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
28559 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
28562 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
28570 /// Determines the LogBase2 value for a non-null input value using the
28571 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
28577 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
28582 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
28587 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28589 /// F(X) = 1/X - A [which has a zero at X = 1/A]
28591 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
28594 /// Result = N X_i + X_i (N - N A X_i)
28622 // Newton iterations: Est = Est + Est (N - Arg * Est)
28627 if (i == Iterations - 1) {
28636 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
28657 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28659 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28661 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
28670 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
28675 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
28683 // If non-reciprocal square root is requested, multiply the result by Arg.
28690 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28692 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28694 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
28700 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
28701 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
28708 // E = (E * -0.5) * ((A * E) * E + -3.0)
28715 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
28719 // RSQRT: LHS = (E * -0.5)
28722 // SQRT: LHS = (A * E) * -0.5
28804 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
28807 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
28808 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
28809 : (LSN->getAddressingMode() == ISD::PRE_DEC)
28810 ? -1 * C->getSExtValue()
28812 TypeSize Size = LSN->getMemoryVT().getStoreSize();
28813 return {LSN->isVolatile(), LSN->isAtomic(),
28814 LSN->getBasePtr(), Offset /*base offset*/,
28815 LocationSize::precise(Size), LSN->getMemOperand()};
28820 LN->getOperand(1),
28821 (LN->hasOffset()) ? LN->getOffset() : 0,
28822 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28852 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28853 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28879 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28880 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28888 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28889 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28890 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28891 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28922 if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28930 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28932 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28937 if (BatchAA->isNoAlias(
28938 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28939 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28940 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28941 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28949 /// Walk up chain skipping non-aliasing memory nodes,
28958 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28962 unsigned Depth = 0;
28965 auto ImproveChain = [&](SDValue &C) -> bool {
28976 cast<LSBaseSDNode>(C.getNode())->isSimple();
29018 // chain until we reach the depth limit.
29020 // FIXME: The depth check could be made to return the last non-aliasing
29023 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
29039 Chains.push_back(Chain.getOperand(--n));
29040 ++Depth;
29048 ++Depth;
29056 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
29087 // we improve the chains of all the potential candidates up-front
29090 // to go from a partially-merged state to the desired final
29091 // fully-merged state.
29118 if (St->getMemoryVT().isZeroSized())
29121 // BaseIndexOffset assumes that offsets are fixed-size, which
29124 if (St->getMemoryVT().isScalableVT())
29128 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
29131 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
29132 if (Chain->getMemoryVT().isScalableVector())
29136 if (!SDValue(Chain, 0)->hasOneUse())
29139 if (!Chain->isSimple() || Chain->isIndexed())
29148 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
29156 if (I != Intervals.begin() && (--I).stop() <= Offset)
29170 SDValue NewChain = STChain->getChain();
29173 StoreSDNode *S = ChainedStores[--I];
29176 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
29184 if (St->isTruncatingStore())
29185 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
29186 St->getBasePtr(), St->getMemoryVT(),
29187 St->getMemOperand());
29189 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
29190 St->getBasePtr(), St->getMemOperand());
29197 auto hasImprovedChain = [&](SDValue ST) -> bool {
29198 return ST->getOperand(0) != NewChain;
29209 for (const SDValue &Op : TF->ops())
29234 SDValue BetterChain = FindBetterChain(St, St->getChain());
29235 if (St->getChain() != BetterChain) {