xref: /llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (revision 778138114e9e42e28fcb51c0a38224e667a3790c)
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallBitVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/Analysis/AliasAnalysis.h"
31 #include "llvm/Analysis/MemoryLocation.h"
32 #include "llvm/Analysis/TargetLibraryInfo.h"
33 #include "llvm/Analysis/ValueTracking.h"
34 #include "llvm/Analysis/VectorUtils.h"
35 #include "llvm/CodeGen/ByteProvider.h"
36 #include "llvm/CodeGen/DAGCombine.h"
37 #include "llvm/CodeGen/ISDOpcodes.h"
38 #include "llvm/CodeGen/MachineFunction.h"
39 #include "llvm/CodeGen/MachineMemOperand.h"
40 #include "llvm/CodeGen/SDPatternMatch.h"
41 #include "llvm/CodeGen/SelectionDAG.h"
42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
43 #include "llvm/CodeGen/SelectionDAGNodes.h"
44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
45 #include "llvm/CodeGen/TargetLowering.h"
46 #include "llvm/CodeGen/TargetRegisterInfo.h"
47 #include "llvm/CodeGen/TargetSubtargetInfo.h"
48 #include "llvm/CodeGen/ValueTypes.h"
49 #include "llvm/CodeGenTypes/MachineValueType.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/DebugCounter.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/KnownBits.h"
64 #include "llvm/Support/MathExtras.h"
65 #include "llvm/Support/raw_ostream.h"
66 #include "llvm/Target/TargetMachine.h"
67 #include "llvm/Target/TargetOptions.h"
68 #include <algorithm>
69 #include <cassert>
70 #include <cstdint>
71 #include <functional>
72 #include <iterator>
73 #include <optional>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 #include <variant>
78 
79 #include "MatchContext.h"
80 
81 using namespace llvm;
82 using namespace llvm::SDPatternMatch;
83 
84 #define DEBUG_TYPE "dagcombine"
85 
86 STATISTIC(NodesCombined   , "Number of dag nodes combined");
87 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
88 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
89 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
90 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
91 STATISTIC(SlicedLoads, "Number of load sliced");
92 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
93 
94 DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
95               "Controls whether a DAG combine is performed for a node");
96 
97 static cl::opt<bool>
98 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
99                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
100 
101 static cl::opt<bool>
102 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
103         cl::desc("Enable DAG combiner's use of TBAA"));
104 
105 #ifndef NDEBUG
106 static cl::opt<std::string>
107 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
108                    cl::desc("Only use DAG-combiner alias analysis in this"
109                             " function"));
110 #endif
111 
112 /// Hidden option to stress test load slicing, i.e., when this option
113 /// is enabled, load slicing bypasses most of its profitability guards.
114 static cl::opt<bool>
115 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
116                   cl::desc("Bypass the profitability model of load slicing"),
117                   cl::init(false));
118 
119 static cl::opt<bool>
120   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
121                     cl::desc("DAG combiner may split indexing from loads"));
122 
123 static cl::opt<bool>
124     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
125                        cl::desc("DAG combiner enable merging multiple stores "
126                                 "into a wider store"));
127 
128 static cl::opt<unsigned> TokenFactorInlineLimit(
129     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
130     cl::desc("Limit the number of operands to inline for Token Factors"));
131 
132 static cl::opt<unsigned> StoreMergeDependenceLimit(
133     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
134     cl::desc("Limit the number of times for the same StoreNode and RootNode "
135              "to bail out in store merging dependence check"));
136 
137 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
138     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
139     cl::desc("DAG combiner enable reducing the width of load/op/store "
140              "sequence"));
141 static cl::opt<bool> ReduceLoadOpStoreWidthForceNarrowingProfitable(
142     "combiner-reduce-load-op-store-width-force-narrowing-profitable",
143     cl::Hidden, cl::init(false),
144     cl::desc("DAG combiner force override the narrowing profitable check when "
145              "reducing the width of load/op/store sequences"));
146 
147 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
148     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
149     cl::desc("DAG combiner enable load/<replace bytes>/store with "
150              "a narrower store"));
151 
152 static cl::opt<bool> EnableVectorFCopySignExtendRound(
153     "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
154     cl::desc(
155         "Enable merging extends and rounds into FCOPYSIGN on vector types"));
156 namespace {
157 
158   class DAGCombiner {
159     SelectionDAG &DAG;
160     const TargetLowering &TLI;
161     const SelectionDAGTargetInfo *STI;
162     CombineLevel Level = BeforeLegalizeTypes;
163     CodeGenOptLevel OptLevel;
164     bool LegalDAG = false;
165     bool LegalOperations = false;
166     bool LegalTypes = false;
167     bool ForCodeSize;
168     bool DisableGenericCombines;
169 
170     /// Worklist of all of the nodes that need to be simplified.
171     ///
172     /// This must behave as a stack -- new nodes to process are pushed onto the
173     /// back and when processing we pop off of the back.
174     ///
175     /// The worklist will not contain duplicates but may contain null entries
176     /// due to nodes being deleted from the underlying DAG. For fast lookup and
177     /// deduplication, the index of the node in this vector is stored in the
178     /// node in SDNode::CombinerWorklistIndex.
179     SmallVector<SDNode *, 64> Worklist;
180 
181     /// This records all nodes attempted to be added to the worklist since we
182     /// considered a new worklist entry. As we keep do not add duplicate nodes
183     /// in the worklist, this is different from the tail of the worklist.
184     SmallSetVector<SDNode *, 32> PruningList;
185 
186     /// Map from candidate StoreNode to the pair of RootNode and count.
187     /// The count is used to track how many times we have seen the StoreNode
188     /// with the same RootNode bail out in dependence check. If we have seen
189     /// the bail out for the same pair many times over a limit, we won't
190     /// consider the StoreNode with the same RootNode as store merging
191     /// candidate again.
192     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
193 
194     // BatchAA - Used for DAG load/store alias analysis.
195     BatchAAResults *BatchAA;
196 
197     /// This caches all chains that have already been processed in
198     /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
199     /// stores candidates.
200     SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
201 
202     /// When an instruction is simplified, add all users of the instruction to
203     /// the work lists because they might get more simplified now.
204     void AddUsersToWorklist(SDNode *N) {
205       for (SDNode *Node : N->users())
206         AddToWorklist(Node);
207     }
208 
209     /// Convenient shorthand to add a node and all of its user to the worklist.
210     void AddToWorklistWithUsers(SDNode *N) {
211       AddUsersToWorklist(N);
212       AddToWorklist(N);
213     }
214 
215     // Prune potentially dangling nodes. This is called after
216     // any visit to a node, but should also be called during a visit after any
217     // failed combine which may have created a DAG node.
218     void clearAddedDanglingWorklistEntries() {
219       // Check any nodes added to the worklist to see if they are prunable.
220       while (!PruningList.empty()) {
221         auto *N = PruningList.pop_back_val();
222         if (N->use_empty())
223           recursivelyDeleteUnusedNodes(N);
224       }
225     }
226 
227     SDNode *getNextWorklistEntry() {
228       // Before we do any work, remove nodes that are not in use.
229       clearAddedDanglingWorklistEntries();
230       SDNode *N = nullptr;
231       // The Worklist holds the SDNodes in order, but it may contain null
232       // entries.
233       while (!N && !Worklist.empty()) {
234         N = Worklist.pop_back_val();
235       }
236 
237       if (N) {
238         assert(N->getCombinerWorklistIndex() >= 0 &&
239                "Found a worklist entry without a corresponding map entry!");
240         // Set to -2 to indicate that we combined the node.
241         N->setCombinerWorklistIndex(-2);
242       }
243       return N;
244     }
245 
246     /// Call the node-specific routine that folds each particular type of node.
247     SDValue visit(SDNode *N);
248 
249   public:
250     DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
251         : DAG(D), TLI(D.getTargetLoweringInfo()),
252           STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
253           BatchAA(BatchAA) {
254       ForCodeSize = DAG.shouldOptForSize();
255       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
256 
257       MaximumLegalStoreInBits = 0;
258       // We use the minimum store size here, since that's all we can guarantee
259       // for the scalable vector types.
260       for (MVT VT : MVT::all_valuetypes())
261         if (EVT(VT).isSimple() && VT != MVT::Other &&
262             TLI.isTypeLegal(EVT(VT)) &&
263             VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
264           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
265     }
266 
267     void ConsiderForPruning(SDNode *N) {
268       // Mark this for potential pruning.
269       PruningList.insert(N);
270     }
271 
272     /// Add to the worklist making sure its instance is at the back (next to be
273     /// processed.)
274     void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
275                        bool SkipIfCombinedBefore = false) {
276       assert(N->getOpcode() != ISD::DELETED_NODE &&
277              "Deleted Node added to Worklist");
278 
279       // Skip handle nodes as they can't usefully be combined and confuse the
280       // zero-use deletion strategy.
281       if (N->getOpcode() == ISD::HANDLENODE)
282         return;
283 
284       if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
285         return;
286 
287       if (IsCandidateForPruning)
288         ConsiderForPruning(N);
289 
290       if (N->getCombinerWorklistIndex() < 0) {
291         N->setCombinerWorklistIndex(Worklist.size());
292         Worklist.push_back(N);
293       }
294     }
295 
296     /// Remove all instances of N from the worklist.
297     void removeFromWorklist(SDNode *N) {
298       PruningList.remove(N);
299       StoreRootCountMap.erase(N);
300 
301       int WorklistIndex = N->getCombinerWorklistIndex();
302       // If not in the worklist, the index might be -1 or -2 (was combined
303       // before). As the node gets deleted anyway, there's no need to update
304       // the index.
305       if (WorklistIndex < 0)
306         return; // Not in the worklist.
307 
308       // Null out the entry rather than erasing it to avoid a linear operation.
309       Worklist[WorklistIndex] = nullptr;
310       N->setCombinerWorklistIndex(-1);
311     }
312 
313     void deleteAndRecombine(SDNode *N);
314     bool recursivelyDeleteUnusedNodes(SDNode *N);
315 
316     /// Replaces all uses of the results of one DAG node with new values.
317     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
318                       bool AddTo = true);
319 
320     /// Replaces all uses of the results of one DAG node with new values.
321     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
322       return CombineTo(N, &Res, 1, AddTo);
323     }
324 
325     /// Replaces all uses of the results of one DAG node with new values.
326     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
327                       bool AddTo = true) {
328       SDValue To[] = { Res0, Res1 };
329       return CombineTo(N, To, 2, AddTo);
330     }
331 
332     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
333 
334   private:
335     unsigned MaximumLegalStoreInBits;
336 
337     /// Check the specified integer node value to see if it can be simplified or
338     /// if things it uses can be simplified by bit propagation.
339     /// If so, return true.
340     bool SimplifyDemandedBits(SDValue Op) {
341       unsigned BitWidth = Op.getScalarValueSizeInBits();
342       APInt DemandedBits = APInt::getAllOnes(BitWidth);
343       return SimplifyDemandedBits(Op, DemandedBits);
344     }
345 
346     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
347       EVT VT = Op.getValueType();
348       APInt DemandedElts = VT.isFixedLengthVector()
349                                ? APInt::getAllOnes(VT.getVectorNumElements())
350                                : APInt(1, 1);
351       return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
352     }
353 
354     /// Check the specified vector node value to see if it can be simplified or
355     /// if things it uses can be simplified as it only uses some of the
356     /// elements. If so, return true.
357     bool SimplifyDemandedVectorElts(SDValue Op) {
358       // TODO: For now just pretend it cannot be simplified.
359       if (Op.getValueType().isScalableVector())
360         return false;
361 
362       unsigned NumElts = Op.getValueType().getVectorNumElements();
363       APInt DemandedElts = APInt::getAllOnes(NumElts);
364       return SimplifyDemandedVectorElts(Op, DemandedElts);
365     }
366 
367     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
368                               const APInt &DemandedElts,
369                               bool AssumeSingleUse = false);
370     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
371                                     bool AssumeSingleUse = false);
372 
373     bool CombineToPreIndexedLoadStore(SDNode *N);
374     bool CombineToPostIndexedLoadStore(SDNode *N);
375     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
376     bool SliceUpLoad(SDNode *N);
377 
378     // Looks up the chain to find a unique (unaliased) store feeding the passed
379     // load. If no such store is found, returns a nullptr.
380     // Note: This will look past a CALLSEQ_START if the load is chained to it so
381     //       so that it can find stack stores for byval params.
382     StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
383     // Scalars have size 0 to distinguish from singleton vectors.
384     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
385     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
386     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
387 
388     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
389     ///   load.
390     ///
391     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
392     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
393     /// \param EltNo index of the vector element to load.
394     /// \param OriginalLoad load that EVE came from to be replaced.
395     /// \returns EVE on success SDValue() on failure.
396     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
397                                          SDValue EltNo,
398                                          LoadSDNode *OriginalLoad);
399     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
400     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
401     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
402     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
403     SDValue PromoteIntBinOp(SDValue Op);
404     SDValue PromoteIntShiftOp(SDValue Op);
405     SDValue PromoteExtend(SDValue Op);
406     bool PromoteLoad(SDValue Op);
407 
408     SDValue foldShiftToAvg(SDNode *N);
409 
410     SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
411                                 SDValue RHS, SDValue True, SDValue False,
412                                 ISD::CondCode CC);
413 
414     /// Call the node-specific routine that knows how to fold each
415     /// particular type of node. If that doesn't do anything, try the
416     /// target-specific DAG combines.
417     SDValue combine(SDNode *N);
418 
419     // Visitation implementation - Implement dag node combining for different
420     // node types.  The semantics are as follows:
421     // Return Value:
422     //   SDValue.getNode() == 0 - No change was made
423     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
424     //   otherwise              - N should be replaced by the returned Operand.
425     //
426     SDValue visitTokenFactor(SDNode *N);
427     SDValue visitMERGE_VALUES(SDNode *N);
428     SDValue visitADD(SDNode *N);
429     SDValue visitADDLike(SDNode *N);
430     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
431     SDValue visitSUB(SDNode *N);
432     SDValue visitADDSAT(SDNode *N);
433     SDValue visitSUBSAT(SDNode *N);
434     SDValue visitADDC(SDNode *N);
435     SDValue visitADDO(SDNode *N);
436     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
437     SDValue visitSUBC(SDNode *N);
438     SDValue visitSUBO(SDNode *N);
439     SDValue visitADDE(SDNode *N);
440     SDValue visitUADDO_CARRY(SDNode *N);
441     SDValue visitSADDO_CARRY(SDNode *N);
442     SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
443                                  SDNode *N);
444     SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
445                                  SDNode *N);
446     SDValue visitSUBE(SDNode *N);
447     SDValue visitUSUBO_CARRY(SDNode *N);
448     SDValue visitSSUBO_CARRY(SDNode *N);
449     template <class MatchContextClass> SDValue visitMUL(SDNode *N);
450     SDValue visitMULFIX(SDNode *N);
451     SDValue useDivRem(SDNode *N);
452     SDValue visitSDIV(SDNode *N);
453     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
454     SDValue visitUDIV(SDNode *N);
455     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
456     SDValue visitREM(SDNode *N);
457     SDValue visitMULHU(SDNode *N);
458     SDValue visitMULHS(SDNode *N);
459     SDValue visitAVG(SDNode *N);
460     SDValue visitABD(SDNode *N);
461     SDValue visitSMUL_LOHI(SDNode *N);
462     SDValue visitUMUL_LOHI(SDNode *N);
463     SDValue visitMULO(SDNode *N);
464     SDValue visitIMINMAX(SDNode *N);
465     SDValue visitAND(SDNode *N);
466     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
467     SDValue visitOR(SDNode *N);
468     SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
469     SDValue visitXOR(SDNode *N);
470     SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
471     SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
472     SDValue visitSHL(SDNode *N);
473     SDValue visitSRA(SDNode *N);
474     SDValue visitSRL(SDNode *N);
475     SDValue visitFunnelShift(SDNode *N);
476     SDValue visitSHLSAT(SDNode *N);
477     SDValue visitRotate(SDNode *N);
478     SDValue visitABS(SDNode *N);
479     SDValue visitBSWAP(SDNode *N);
480     SDValue visitBITREVERSE(SDNode *N);
481     SDValue visitCTLZ(SDNode *N);
482     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
483     SDValue visitCTTZ(SDNode *N);
484     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
485     SDValue visitCTPOP(SDNode *N);
486     SDValue visitSELECT(SDNode *N);
487     SDValue visitVSELECT(SDNode *N);
488     SDValue visitVP_SELECT(SDNode *N);
489     SDValue visitSELECT_CC(SDNode *N);
490     SDValue visitSETCC(SDNode *N);
491     SDValue visitSETCCCARRY(SDNode *N);
492     SDValue visitSIGN_EXTEND(SDNode *N);
493     SDValue visitZERO_EXTEND(SDNode *N);
494     SDValue visitANY_EXTEND(SDNode *N);
495     SDValue visitAssertExt(SDNode *N);
496     SDValue visitAssertAlign(SDNode *N);
497     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
498     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
499     SDValue visitTRUNCATE(SDNode *N);
500     SDValue visitTRUNCATE_USAT_U(SDNode *N);
501     SDValue visitBITCAST(SDNode *N);
502     SDValue visitFREEZE(SDNode *N);
503     SDValue visitBUILD_PAIR(SDNode *N);
504     SDValue visitFADD(SDNode *N);
505     SDValue visitVP_FADD(SDNode *N);
506     SDValue visitVP_FSUB(SDNode *N);
507     SDValue visitSTRICT_FADD(SDNode *N);
508     SDValue visitFSUB(SDNode *N);
509     SDValue visitFMUL(SDNode *N);
510     template <class MatchContextClass> SDValue visitFMA(SDNode *N);
511     SDValue visitFMAD(SDNode *N);
512     SDValue visitFDIV(SDNode *N);
513     SDValue visitFREM(SDNode *N);
514     SDValue visitFSQRT(SDNode *N);
515     SDValue visitFCOPYSIGN(SDNode *N);
516     SDValue visitFPOW(SDNode *N);
517     SDValue visitFCANONICALIZE(SDNode *N);
518     SDValue visitSINT_TO_FP(SDNode *N);
519     SDValue visitUINT_TO_FP(SDNode *N);
520     SDValue visitFP_TO_SINT(SDNode *N);
521     SDValue visitFP_TO_UINT(SDNode *N);
522     SDValue visitXROUND(SDNode *N);
523     SDValue visitFP_ROUND(SDNode *N);
524     SDValue visitFP_EXTEND(SDNode *N);
525     SDValue visitFNEG(SDNode *N);
526     SDValue visitFABS(SDNode *N);
527     SDValue visitFCEIL(SDNode *N);
528     SDValue visitFTRUNC(SDNode *N);
529     SDValue visitFFREXP(SDNode *N);
530     SDValue visitFFLOOR(SDNode *N);
531     SDValue visitFMinMax(SDNode *N);
532     SDValue visitBRCOND(SDNode *N);
533     SDValue visitBR_CC(SDNode *N);
534     SDValue visitLOAD(SDNode *N);
535 
536     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
537     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
538     SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
539 
540     bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
541 
542     SDValue visitSTORE(SDNode *N);
543     SDValue visitATOMIC_STORE(SDNode *N);
544     SDValue visitLIFETIME_END(SDNode *N);
545     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
546     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
547     SDValue visitBUILD_VECTOR(SDNode *N);
548     SDValue visitCONCAT_VECTORS(SDNode *N);
549     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
550     SDValue visitVECTOR_SHUFFLE(SDNode *N);
551     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
552     SDValue visitINSERT_SUBVECTOR(SDNode *N);
553     SDValue visitVECTOR_COMPRESS(SDNode *N);
554     SDValue visitMLOAD(SDNode *N);
555     SDValue visitMSTORE(SDNode *N);
556     SDValue visitMGATHER(SDNode *N);
557     SDValue visitMSCATTER(SDNode *N);
558     SDValue visitMHISTOGRAM(SDNode *N);
559     SDValue visitVPGATHER(SDNode *N);
560     SDValue visitVPSCATTER(SDNode *N);
561     SDValue visitVP_STRIDED_LOAD(SDNode *N);
562     SDValue visitVP_STRIDED_STORE(SDNode *N);
563     SDValue visitFP_TO_FP16(SDNode *N);
564     SDValue visitFP16_TO_FP(SDNode *N);
565     SDValue visitFP_TO_BF16(SDNode *N);
566     SDValue visitBF16_TO_FP(SDNode *N);
567     SDValue visitVECREDUCE(SDNode *N);
568     SDValue visitVPOp(SDNode *N);
569     SDValue visitGET_FPENV_MEM(SDNode *N);
570     SDValue visitSET_FPENV_MEM(SDNode *N);
571 
572     template <class MatchContextClass>
573     SDValue visitFADDForFMACombine(SDNode *N);
574     template <class MatchContextClass>
575     SDValue visitFSUBForFMACombine(SDNode *N);
576     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
577 
578     SDValue XformToShuffleWithZero(SDNode *N);
579     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
580                                                     const SDLoc &DL,
581                                                     SDNode *N,
582                                                     SDValue N0,
583                                                     SDValue N1);
584     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
585                                       SDValue N1, SDNodeFlags Flags);
586     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
587                            SDValue N1, SDNodeFlags Flags);
588     SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
589                                  EVT VT, SDValue N0, SDValue N1,
590                                  SDNodeFlags Flags = SDNodeFlags());
591 
592     SDValue visitShiftByConstant(SDNode *N);
593 
594     SDValue foldSelectOfConstants(SDNode *N);
595     SDValue foldVSelectOfConstants(SDNode *N);
596     SDValue foldBinOpIntoSelect(SDNode *BO);
597     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
598     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
599     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
600     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
601                              SDValue N2, SDValue N3, ISD::CondCode CC,
602                              bool NotExtCompare = false);
603     SDValue convertSelectOfFPConstantsToLoadOffset(
604         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
605         ISD::CondCode CC);
606     SDValue foldSignChangeInBitcast(SDNode *N);
607     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
608                                    SDValue N2, SDValue N3, ISD::CondCode CC);
609     SDValue foldSelectOfBinops(SDNode *N);
610     SDValue foldSextSetcc(SDNode *N);
611     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
612                               const SDLoc &DL);
613     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
614     SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
615     SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
616                             SDValue False, ISD::CondCode CC, const SDLoc &DL);
617     SDValue unfoldMaskedMerge(SDNode *N);
618     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
619     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
620                           const SDLoc &DL, bool foldBooleans);
621     SDValue rebuildSetCC(SDValue N);
622 
623     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
624                            SDValue &CC, bool MatchStrict = false) const;
625     bool isOneUseSetCC(SDValue N) const;
626 
627     SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
628     SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
629 
630     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
631                                          unsigned HiOp);
632     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
633     SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
634                                  const TargetLowering &TLI);
635 
636     SDValue CombineExtLoad(SDNode *N);
637     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
638     SDValue combineRepeatedFPDivisors(SDNode *N);
639     SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
640     SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
641     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
642     SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
643     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
644     SDValue BuildSDIV(SDNode *N);
645     SDValue BuildSDIVPow2(SDNode *N);
646     SDValue BuildUDIV(SDNode *N);
647     SDValue BuildSREMPow2(SDNode *N);
648     SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
649     SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
650                           bool KnownNeverZero = false,
651                           bool InexpensiveOnly = false,
652                           std::optional<EVT> OutVT = std::nullopt);
653     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
654     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
655     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
656     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
657     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
658                                 SDNodeFlags Flags, bool Reciprocal);
659     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
660                                 SDNodeFlags Flags, bool Reciprocal);
661     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
662                                bool DemandHighBits = true);
663     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
664     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
665                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
666                               unsigned PosOpcode, unsigned NegOpcode,
667                               const SDLoc &DL);
668     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
669                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
670                               unsigned PosOpcode, unsigned NegOpcode,
671                               const SDLoc &DL);
672     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
673     SDValue MatchLoadCombine(SDNode *N);
674     SDValue mergeTruncStores(StoreSDNode *N);
675     SDValue reduceLoadWidth(SDNode *N);
676     SDValue ReduceLoadOpStoreWidth(SDNode *N);
677     SDValue splitMergedValStore(StoreSDNode *ST);
678     SDValue TransformFPLoadStorePair(SDNode *N);
679     SDValue convertBuildVecZextToZext(SDNode *N);
680     SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
681     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
682     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
683     SDValue reduceBuildVecToShuffle(SDNode *N);
684     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
685                                   ArrayRef<int> VectorMask, SDValue VecIn1,
686                                   SDValue VecIn2, unsigned LeftIdx,
687                                   bool DidSplitVec);
688     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
689 
690     /// Walk up chain skipping non-aliasing memory nodes,
691     /// looking for aliasing nodes and adding them to the Aliases vector.
692     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
693                           SmallVectorImpl<SDValue> &Aliases);
694 
695     /// Return true if there is any possibility that the two addresses overlap.
696     bool mayAlias(SDNode *Op0, SDNode *Op1) const;
697 
698     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
699     /// chain (aliasing node.)
700     SDValue FindBetterChain(SDNode *N, SDValue Chain);
701 
702     /// Try to replace a store and any possibly adjacent stores on
703     /// consecutive chains with better chains. Return true only if St is
704     /// replaced.
705     ///
706     /// Notice that other chains may still be replaced even if the function
707     /// returns false.
708     bool findBetterNeighborChains(StoreSDNode *St);
709 
710     // Helper for findBetterNeighborChains. Walk up store chain add additional
711     // chained stores that do not overlap and can be parallelized.
712     bool parallelizeChainedStores(StoreSDNode *St);
713 
714     /// Holds a pointer to an LSBaseSDNode as well as information on where it
715     /// is located in a sequence of memory operations connected by a chain.
716     struct MemOpLink {
717       // Ptr to the mem node.
718       LSBaseSDNode *MemNode;
719 
720       // Offset from the base ptr.
721       int64_t OffsetFromBase;
722 
723       MemOpLink(LSBaseSDNode *N, int64_t Offset)
724           : MemNode(N), OffsetFromBase(Offset) {}
725     };
726 
727     // Classify the origin of a stored value.
728     enum class StoreSource { Unknown, Constant, Extract, Load };
729     StoreSource getStoreSource(SDValue StoreVal) {
730       switch (StoreVal.getOpcode()) {
731       case ISD::Constant:
732       case ISD::ConstantFP:
733         return StoreSource::Constant;
734       case ISD::BUILD_VECTOR:
735         if (ISD::isBuildVectorOfConstantSDNodes(StoreVal.getNode()) ||
736             ISD::isBuildVectorOfConstantFPSDNodes(StoreVal.getNode()))
737           return StoreSource::Constant;
738         return StoreSource::Unknown;
739       case ISD::EXTRACT_VECTOR_ELT:
740       case ISD::EXTRACT_SUBVECTOR:
741         return StoreSource::Extract;
742       case ISD::LOAD:
743         return StoreSource::Load;
744       default:
745         return StoreSource::Unknown;
746       }
747     }
748 
749     /// This is a helper function for visitMUL to check the profitability
750     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
751     /// MulNode is the original multiply, AddNode is (add x, c1),
752     /// and ConstNode is c2.
753     bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
754                                      SDValue ConstNode);
755 
756     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
757     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
758     /// the type of the loaded value to be extended.
759     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
760                           EVT LoadResultTy, EVT &ExtVT);
761 
762     /// Helper function to calculate whether the given Load/Store can have its
763     /// width reduced to ExtVT.
764     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
765                            EVT &MemVT, unsigned ShAmt = 0);
766 
767     /// Used by BackwardsPropagateMask to find suitable loads.
768     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
769                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
770                            ConstantSDNode *Mask, SDNode *&NodeToMask);
771     /// Attempt to propagate a given AND node back to load leaves so that they
772     /// can be combined into narrow loads.
773     bool BackwardsPropagateMask(SDNode *N);
774 
775     /// Helper function for mergeConsecutiveStores which merges the component
776     /// store chains.
777     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
778                                 unsigned NumStores);
779 
780     /// Helper function for mergeConsecutiveStores which checks if all the store
781     /// nodes have the same underlying object. We can still reuse the first
782     /// store's pointer info if all the stores are from the same object.
783     bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
784 
785     /// This is a helper function for mergeConsecutiveStores. When the source
786     /// elements of the consecutive stores are all constants or all extracted
787     /// vector elements, try to merge them into one larger store introducing
788     /// bitcasts if necessary.  \return True if a merged store was created.
789     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
790                                          EVT MemVT, unsigned NumStores,
791                                          bool IsConstantSrc, bool UseVector,
792                                          bool UseTrunc);
793 
794     /// This is a helper function for mergeConsecutiveStores. Stores that
795     /// potentially may be merged with St are placed in StoreNodes. On success,
796     /// returns a chain predecessor to all store candidates.
797     SDNode *getStoreMergeCandidates(StoreSDNode *St,
798                                     SmallVectorImpl<MemOpLink> &StoreNodes);
799 
800     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
801     /// have indirect dependency through their operands. RootNode is the
802     /// predecessor to all stores calculated by getStoreMergeCandidates and is
803     /// used to prune the dependency check. \return True if safe to merge.
804     bool checkMergeStoreCandidatesForDependencies(
805         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
806         SDNode *RootNode);
807 
808     /// This is a helper function for mergeConsecutiveStores. Given a list of
809     /// store candidates, find the first N that are consecutive in memory.
810     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
811     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
812                                   int64_t ElementSizeBytes) const;
813 
814     /// This is a helper function for mergeConsecutiveStores. It is used for
815     /// store chains that are composed entirely of constant values.
816     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
817                                   unsigned NumConsecutiveStores,
818                                   EVT MemVT, SDNode *Root, bool AllowVectors);
819 
820     /// This is a helper function for mergeConsecutiveStores. It is used for
821     /// store chains that are composed entirely of extracted vector elements.
822     /// When extracting multiple vector elements, try to store them in one
823     /// vector store rather than a sequence of scalar stores.
824     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
825                                  unsigned NumConsecutiveStores, EVT MemVT,
826                                  SDNode *Root);
827 
828     /// This is a helper function for mergeConsecutiveStores. It is used for
829     /// store chains that are composed entirely of loaded values.
830     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
831                               unsigned NumConsecutiveStores, EVT MemVT,
832                               SDNode *Root, bool AllowVectors,
833                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
834 
835     /// Merge consecutive store operations into a wide store.
836     /// This optimization uses wide integers or vectors when possible.
837     /// \return true if stores were merged.
838     bool mergeConsecutiveStores(StoreSDNode *St);
839 
840     /// Try to transform a truncation where C is a constant:
841     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
842     ///
843     /// \p N needs to be a truncation and its first operand an AND. Other
844     /// requirements are checked by the function (e.g. that trunc is
845     /// single-use) and if missed an empty SDValue is returned.
846     SDValue distributeTruncateThroughAnd(SDNode *N);
847 
848     /// Helper function to determine whether the target supports operation
849     /// given by \p Opcode for type \p VT, that is, whether the operation
850     /// is legal or custom before legalizing operations, and whether is
851     /// legal (but not custom) after legalization.
852     bool hasOperation(unsigned Opcode, EVT VT) {
853       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
854     }
855 
856   public:
857     /// Runs the dag combiner on all nodes in the work list
858     void Run(CombineLevel AtLevel);
859 
860     SelectionDAG &getDAG() const { return DAG; }
861 
862     /// Convenience wrapper around TargetLowering::getShiftAmountTy.
863     EVT getShiftAmountTy(EVT LHSTy) {
864       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
865     }
866 
867     /// This method returns true if we are running before type legalization or
868     /// if the specified VT is legal.
869     bool isTypeLegal(const EVT &VT) {
870       if (!LegalTypes) return true;
871       return TLI.isTypeLegal(VT);
872     }
873 
874     /// Convenience wrapper around TargetLowering::getSetCCResultType
875     EVT getSetCCResultType(EVT VT) const {
876       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
877     }
878 
879     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
880                          SDValue OrigLoad, SDValue ExtLoad,
881                          ISD::NodeType ExtType);
882   };
883 
884 /// This class is a DAGUpdateListener that removes any deleted
885 /// nodes from the worklist.
886 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
887   DAGCombiner &DC;
888 
889 public:
890   explicit WorklistRemover(DAGCombiner &dc)
891     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
892 
893   void NodeDeleted(SDNode *N, SDNode *E) override {
894     DC.removeFromWorklist(N);
895   }
896 };
897 
898 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
899   DAGCombiner &DC;
900 
901 public:
902   explicit WorklistInserter(DAGCombiner &dc)
903       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
904 
905   // FIXME: Ideally we could add N to the worklist, but this causes exponential
906   //        compile time costs in large DAGs, e.g. Halide.
907   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
908 };
909 
910 } // end anonymous namespace
911 
912 //===----------------------------------------------------------------------===//
913 //  TargetLowering::DAGCombinerInfo implementation
914 //===----------------------------------------------------------------------===//
915 
916 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
917   ((DAGCombiner*)DC)->AddToWorklist(N);
918 }
919 
920 SDValue TargetLowering::DAGCombinerInfo::
921 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
922   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
923 }
924 
925 SDValue TargetLowering::DAGCombinerInfo::
926 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
927   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
928 }
929 
930 SDValue TargetLowering::DAGCombinerInfo::
931 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
932   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
933 }
934 
935 bool TargetLowering::DAGCombinerInfo::
936 recursivelyDeleteUnusedNodes(SDNode *N) {
937   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
938 }
939 
940 void TargetLowering::DAGCombinerInfo::
941 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
942   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
943 }
944 
945 //===----------------------------------------------------------------------===//
946 // Helper Functions
947 //===----------------------------------------------------------------------===//
948 
949 void DAGCombiner::deleteAndRecombine(SDNode *N) {
950   removeFromWorklist(N);
951 
952   // If the operands of this node are only used by the node, they will now be
953   // dead. Make sure to re-visit them and recursively delete dead nodes.
954   for (const SDValue &Op : N->ops())
955     // For an operand generating multiple values, one of the values may
956     // become dead allowing further simplification (e.g. split index
957     // arithmetic from an indexed load).
958     if (Op->hasOneUse() || Op->getNumValues() > 1)
959       AddToWorklist(Op.getNode());
960 
961   DAG.DeleteNode(N);
962 }
963 
964 // APInts must be the same size for most operations, this helper
965 // function zero extends the shorter of the pair so that they match.
966 // We provide an Offset so that we can create bitwidths that won't overflow.
967 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
968   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
969   LHS = LHS.zext(Bits);
970   RHS = RHS.zext(Bits);
971 }
972 
973 // Return true if this node is a setcc, or is a select_cc
974 // that selects between the target values used for true and false, making it
975 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
976 // the appropriate nodes based on the type of node we are checking. This
977 // simplifies life a bit for the callers.
978 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
979                                     SDValue &CC, bool MatchStrict) const {
980   if (N.getOpcode() == ISD::SETCC) {
981     LHS = N.getOperand(0);
982     RHS = N.getOperand(1);
983     CC  = N.getOperand(2);
984     return true;
985   }
986 
987   if (MatchStrict &&
988       (N.getOpcode() == ISD::STRICT_FSETCC ||
989        N.getOpcode() == ISD::STRICT_FSETCCS)) {
990     LHS = N.getOperand(1);
991     RHS = N.getOperand(2);
992     CC  = N.getOperand(3);
993     return true;
994   }
995 
996   if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
997       !TLI.isConstFalseVal(N.getOperand(3)))
998     return false;
999 
1000   if (TLI.getBooleanContents(N.getValueType()) ==
1001       TargetLowering::UndefinedBooleanContent)
1002     return false;
1003 
1004   LHS = N.getOperand(0);
1005   RHS = N.getOperand(1);
1006   CC  = N.getOperand(4);
1007   return true;
1008 }
1009 
1010 /// Return true if this is a SetCC-equivalent operation with only one use.
1011 /// If this is true, it allows the users to invert the operation for free when
1012 /// it is profitable to do so.
1013 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1014   SDValue N0, N1, N2;
1015   if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1016     return true;
1017   return false;
1018 }
1019 
1020 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
1021   if (!ScalarTy.isSimple())
1022     return false;
1023 
1024   uint64_t MaskForTy = 0ULL;
1025   switch (ScalarTy.getSimpleVT().SimpleTy) {
1026   case MVT::i8:
1027     MaskForTy = 0xFFULL;
1028     break;
1029   case MVT::i16:
1030     MaskForTy = 0xFFFFULL;
1031     break;
1032   case MVT::i32:
1033     MaskForTy = 0xFFFFFFFFULL;
1034     break;
1035   default:
1036     return false;
1037     break;
1038   }
1039 
1040   APInt Val;
1041   if (ISD::isConstantSplatVector(N, Val))
1042     return Val.getLimitedValue() == MaskForTy;
1043 
1044   return false;
1045 }
1046 
1047 // Determines if it is a constant integer or a splat/build vector of constant
1048 // integers (and undefs).
1049 // Do not permit build vector implicit truncation.
1050 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1051   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1052     return !(Const->isOpaque() && NoOpaques);
1053   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1054     return false;
1055   unsigned BitWidth = N.getScalarValueSizeInBits();
1056   for (const SDValue &Op : N->op_values()) {
1057     if (Op.isUndef())
1058       continue;
1059     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1060     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1061         (Const->isOpaque() && NoOpaques))
1062       return false;
1063   }
1064   return true;
1065 }
1066 
1067 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1068 // undef's.
1069 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1070   if (V.getOpcode() != ISD::BUILD_VECTOR)
1071     return false;
1072   return isConstantOrConstantVector(V, NoOpaques) ||
1073          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
1074 }
1075 
1076 // Determine if this an indexed load with an opaque target constant index.
1077 static bool canSplitIdx(LoadSDNode *LD) {
1078   return MaySplitLoadIndex &&
1079          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1080           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1081 }
1082 
1083 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1084                                                              const SDLoc &DL,
1085                                                              SDNode *N,
1086                                                              SDValue N0,
1087                                                              SDValue N1) {
1088   // Currently this only tries to ensure we don't undo the GEP splits done by
1089   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1090   // we check if the following transformation would be problematic:
1091   // (load/store (add, (add, x, offset1), offset2)) ->
1092   // (load/store (add, x, offset1+offset2)).
1093 
1094   // (load/store (add, (add, x, y), offset2)) ->
1095   // (load/store (add, (add, x, offset2), y)).
1096 
1097   if (N0.getOpcode() != ISD::ADD)
1098     return false;
1099 
1100   // Check for vscale addressing modes.
1101   // (load/store (add/sub (add x, y), vscale))
1102   // (load/store (add/sub (add x, y), (lsl vscale, C)))
1103   // (load/store (add/sub (add x, y), (mul vscale, C)))
1104   if ((N1.getOpcode() == ISD::VSCALE ||
1105        ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1106         N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1107         isa<ConstantSDNode>(N1.getOperand(1)))) &&
1108       N1.getValueType().getFixedSizeInBits() <= 64) {
1109     int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1110                                  ? N1.getConstantOperandVal(0)
1111                                  : (N1.getOperand(0).getConstantOperandVal(0) *
1112                                     (N1.getOpcode() == ISD::SHL
1113                                          ? (1LL << N1.getConstantOperandVal(1))
1114                                          : N1.getConstantOperandVal(1)));
1115     if (Opc == ISD::SUB)
1116       ScalableOffset = -ScalableOffset;
1117     if (all_of(N->users(), [&](SDNode *Node) {
1118           if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1119               LoadStore && LoadStore->getBasePtr().getNode() == N) {
1120             TargetLoweringBase::AddrMode AM;
1121             AM.HasBaseReg = true;
1122             AM.ScalableOffset = ScalableOffset;
1123             EVT VT = LoadStore->getMemoryVT();
1124             unsigned AS = LoadStore->getAddressSpace();
1125             Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1126             return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1127                                              AS);
1128           }
1129           return false;
1130         }))
1131       return true;
1132   }
1133 
1134   if (Opc != ISD::ADD)
1135     return false;
1136 
1137   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1138   if (!C2)
1139     return false;
1140 
1141   const APInt &C2APIntVal = C2->getAPIntValue();
1142   if (C2APIntVal.getSignificantBits() > 64)
1143     return false;
1144 
1145   if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1146     if (N0.hasOneUse())
1147       return false;
1148 
1149     const APInt &C1APIntVal = C1->getAPIntValue();
1150     const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1151     if (CombinedValueIntVal.getSignificantBits() > 64)
1152       return false;
1153     const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1154 
1155     for (SDNode *Node : N->users()) {
1156       if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1157         // Is x[offset2] already not a legal addressing mode? If so then
1158         // reassociating the constants breaks nothing (we test offset2 because
1159         // that's the one we hope to fold into the load or store).
1160         TargetLoweringBase::AddrMode AM;
1161         AM.HasBaseReg = true;
1162         AM.BaseOffs = C2APIntVal.getSExtValue();
1163         EVT VT = LoadStore->getMemoryVT();
1164         unsigned AS = LoadStore->getAddressSpace();
1165         Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1166         if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1167           continue;
1168 
1169         // Would x[offset1+offset2] still be a legal addressing mode?
1170         AM.BaseOffs = CombinedValue;
1171         if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1172           return true;
1173       }
1174     }
1175   } else {
1176     if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1177       if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1178         return false;
1179 
1180     for (SDNode *Node : N->users()) {
1181       auto *LoadStore = dyn_cast<MemSDNode>(Node);
1182       if (!LoadStore)
1183         return false;
1184 
1185       // Is x[offset2] a legal addressing mode? If so then
1186       // reassociating the constants breaks address pattern
1187       TargetLoweringBase::AddrMode AM;
1188       AM.HasBaseReg = true;
1189       AM.BaseOffs = C2APIntVal.getSExtValue();
1190       EVT VT = LoadStore->getMemoryVT();
1191       unsigned AS = LoadStore->getAddressSpace();
1192       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1193       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1194         return false;
1195     }
1196     return true;
1197   }
1198 
1199   return false;
1200 }
1201 
1202 /// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1203 /// \p N0 is the same kind of operation as \p Opc.
1204 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1205                                                SDValue N0, SDValue N1,
1206                                                SDNodeFlags Flags) {
1207   EVT VT = N0.getValueType();
1208 
1209   if (N0.getOpcode() != Opc)
1210     return SDValue();
1211 
1212   SDValue N00 = N0.getOperand(0);
1213   SDValue N01 = N0.getOperand(1);
1214 
1215   if (DAG.isConstantIntBuildVectorOrConstantInt(N01)) {
1216     SDNodeFlags NewFlags;
1217     if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1218         Flags.hasNoUnsignedWrap())
1219       NewFlags |= SDNodeFlags::NoUnsignedWrap;
1220 
1221     if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1222       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1223       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1224         NewFlags.setDisjoint(Flags.hasDisjoint() &&
1225                              N0->getFlags().hasDisjoint());
1226         return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1227       }
1228       return SDValue();
1229     }
1230     if (TLI.isReassocProfitable(DAG, N0, N1)) {
1231       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1232       //              iff (op x, c1) has one use
1233       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1234       return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1235     }
1236   }
1237 
1238   // Check for repeated operand logic simplifications.
1239   if (Opc == ISD::AND || Opc == ISD::OR) {
1240     // (N00 & N01) & N00 --> N00 & N01
1241     // (N00 & N01) & N01 --> N00 & N01
1242     // (N00 | N01) | N00 --> N00 | N01
1243     // (N00 | N01) | N01 --> N00 | N01
1244     if (N1 == N00 || N1 == N01)
1245       return N0;
1246   }
1247   if (Opc == ISD::XOR) {
1248     // (N00 ^ N01) ^ N00 --> N01
1249     if (N1 == N00)
1250       return N01;
1251     // (N00 ^ N01) ^ N01 --> N00
1252     if (N1 == N01)
1253       return N00;
1254   }
1255 
1256   if (TLI.isReassocProfitable(DAG, N0, N1)) {
1257     if (N1 != N01) {
1258       // Reassociate if (op N00, N1) already exist
1259       if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1260         // if Op (Op N00, N1), N01 already exist
1261         // we need to stop reassciate to avoid dead loop
1262         if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1263           return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1264       }
1265     }
1266 
1267     if (N1 != N00) {
1268       // Reassociate if (op N01, N1) already exist
1269       if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1270         // if Op (Op N01, N1), N00 already exist
1271         // we need to stop reassciate to avoid dead loop
1272         if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1273           return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1274       }
1275     }
1276 
1277     // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1278     // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1279     // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1280     // comparisons with the same predicate. This enables optimizations as the
1281     // following one:
1282     // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1283     // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1284     if (Opc == ISD::AND || Opc == ISD::OR) {
1285       if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1286           N01->getOpcode() == ISD::SETCC) {
1287         ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1288         ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1289         ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1290         if (CC1 == CC00 && CC1 != CC01) {
1291           SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1292           return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1293         }
1294         if (CC1 == CC01 && CC1 != CC00) {
1295           SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1296           return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1297         }
1298       }
1299     }
1300   }
1301 
1302   return SDValue();
1303 }
1304 
1305 /// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1306 /// same kind of operation as \p Opc.
1307 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1308                                     SDValue N1, SDNodeFlags Flags) {
1309   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1310 
1311   // Floating-point reassociation is not allowed without loose FP math.
1312   if (N0.getValueType().isFloatingPoint() ||
1313       N1.getValueType().isFloatingPoint())
1314     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1315       return SDValue();
1316 
1317   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1318     return Combined;
1319   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1320     return Combined;
1321   return SDValue();
1322 }
1323 
1324 // Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1325 // Note that we only expect Flags to be passed from FP operations. For integer
1326 // operations they need to be dropped.
1327 SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1328                                           const SDLoc &DL, EVT VT, SDValue N0,
1329                                           SDValue N1, SDNodeFlags Flags) {
1330   if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1331       N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1332       N0->hasOneUse() && N1->hasOneUse() &&
1333       TLI.isOperationLegalOrCustom(Opc, N0.getOperand(0).getValueType()) &&
1334       TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1335     SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1336     return DAG.getNode(RedOpc, DL, VT,
1337                        DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1338                                    N0.getOperand(0), N1.getOperand(0)));
1339   }
1340   return SDValue();
1341 }
1342 
1343 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1344                                bool AddTo) {
1345   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1346   ++NodesCombined;
1347   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1348              To[0].dump(&DAG);
1349              dbgs() << " and " << NumTo - 1 << " other values\n");
1350   for (unsigned i = 0, e = NumTo; i != e; ++i)
1351     assert((!To[i].getNode() ||
1352             N->getValueType(i) == To[i].getValueType()) &&
1353            "Cannot combine value to value of different type!");
1354 
1355   WorklistRemover DeadNodes(*this);
1356   DAG.ReplaceAllUsesWith(N, To);
1357   if (AddTo) {
1358     // Push the new nodes and any users onto the worklist
1359     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1360       if (To[i].getNode())
1361         AddToWorklistWithUsers(To[i].getNode());
1362     }
1363   }
1364 
1365   // Finally, if the node is now dead, remove it from the graph.  The node
1366   // may not be dead if the replacement process recursively simplified to
1367   // something else needing this node.
1368   if (N->use_empty())
1369     deleteAndRecombine(N);
1370   return SDValue(N, 0);
1371 }
1372 
1373 void DAGCombiner::
1374 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1375   // Replace the old value with the new one.
1376   ++NodesCombined;
1377   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1378              dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1379 
1380   // Replace all uses.
1381   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1382 
1383   // Push the new node and any (possibly new) users onto the worklist.
1384   AddToWorklistWithUsers(TLO.New.getNode());
1385 
1386   // Finally, if the node is now dead, remove it from the graph.
1387   recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1388 }
1389 
1390 /// Check the specified integer node value to see if it can be simplified or if
1391 /// things it uses can be simplified by bit propagation. If so, return true.
1392 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1393                                        const APInt &DemandedElts,
1394                                        bool AssumeSingleUse) {
1395   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1396   KnownBits Known;
1397   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1398                                 AssumeSingleUse))
1399     return false;
1400 
1401   // Revisit the node.
1402   AddToWorklist(Op.getNode());
1403 
1404   CommitTargetLoweringOpt(TLO);
1405   return true;
1406 }
1407 
1408 /// Check the specified vector node value to see if it can be simplified or
1409 /// if things it uses can be simplified as it only uses some of the elements.
1410 /// If so, return true.
1411 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1412                                              const APInt &DemandedElts,
1413                                              bool AssumeSingleUse) {
1414   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1415   APInt KnownUndef, KnownZero;
1416   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1417                                       TLO, 0, AssumeSingleUse))
1418     return false;
1419 
1420   // Revisit the node.
1421   AddToWorklist(Op.getNode());
1422 
1423   CommitTargetLoweringOpt(TLO);
1424   return true;
1425 }
1426 
1427 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1428   SDLoc DL(Load);
1429   EVT VT = Load->getValueType(0);
1430   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1431 
1432   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1433              Trunc.dump(&DAG); dbgs() << '\n');
1434 
1435   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1436   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1437 
1438   AddToWorklist(Trunc.getNode());
1439   recursivelyDeleteUnusedNodes(Load);
1440 }
1441 
1442 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1443   Replace = false;
1444   SDLoc DL(Op);
1445   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1446     LoadSDNode *LD = cast<LoadSDNode>(Op);
1447     EVT MemVT = LD->getMemoryVT();
1448     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1449                                                       : LD->getExtensionType();
1450     Replace = true;
1451     return DAG.getExtLoad(ExtType, DL, PVT,
1452                           LD->getChain(), LD->getBasePtr(),
1453                           MemVT, LD->getMemOperand());
1454   }
1455 
1456   unsigned Opc = Op.getOpcode();
1457   switch (Opc) {
1458   default: break;
1459   case ISD::AssertSext:
1460     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1461       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1462     break;
1463   case ISD::AssertZext:
1464     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1465       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1466     break;
1467   case ISD::Constant: {
1468     unsigned ExtOpc =
1469       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1470     return DAG.getNode(ExtOpc, DL, PVT, Op);
1471   }
1472   }
1473 
1474   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1475     return SDValue();
1476   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1477 }
1478 
1479 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1480   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1481     return SDValue();
1482   EVT OldVT = Op.getValueType();
1483   SDLoc DL(Op);
1484   bool Replace = false;
1485   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1486   if (!NewOp.getNode())
1487     return SDValue();
1488   AddToWorklist(NewOp.getNode());
1489 
1490   if (Replace)
1491     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1492   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1493                      DAG.getValueType(OldVT));
1494 }
1495 
1496 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1497   EVT OldVT = Op.getValueType();
1498   SDLoc DL(Op);
1499   bool Replace = false;
1500   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1501   if (!NewOp.getNode())
1502     return SDValue();
1503   AddToWorklist(NewOp.getNode());
1504 
1505   if (Replace)
1506     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1507   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1508 }
1509 
1510 /// Promote the specified integer binary operation if the target indicates it is
1511 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1512 /// i32 since i16 instructions are longer.
1513 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1514   if (!LegalOperations)
1515     return SDValue();
1516 
1517   EVT VT = Op.getValueType();
1518   if (VT.isVector() || !VT.isInteger())
1519     return SDValue();
1520 
1521   // If operation type is 'undesirable', e.g. i16 on x86, consider
1522   // promoting it.
1523   unsigned Opc = Op.getOpcode();
1524   if (TLI.isTypeDesirableForOp(Opc, VT))
1525     return SDValue();
1526 
1527   EVT PVT = VT;
1528   // Consult target whether it is a good idea to promote this operation and
1529   // what's the right type to promote it to.
1530   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1531     assert(PVT != VT && "Don't know what type to promote to!");
1532 
1533     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1534 
1535     bool Replace0 = false;
1536     SDValue N0 = Op.getOperand(0);
1537     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1538 
1539     bool Replace1 = false;
1540     SDValue N1 = Op.getOperand(1);
1541     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1542     SDLoc DL(Op);
1543 
1544     SDValue RV =
1545         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1546 
1547     // We are always replacing N0/N1's use in N and only need additional
1548     // replacements if there are additional uses.
1549     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1550     //       (SDValue) here because the node may reference multiple values
1551     //       (for example, the chain value of a load node).
1552     Replace0 &= !N0->hasOneUse();
1553     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1554 
1555     // Combine Op here so it is preserved past replacements.
1556     CombineTo(Op.getNode(), RV);
1557 
1558     // If operands have a use ordering, make sure we deal with
1559     // predecessor first.
1560     if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1561       std::swap(N0, N1);
1562       std::swap(NN0, NN1);
1563     }
1564 
1565     if (Replace0) {
1566       AddToWorklist(NN0.getNode());
1567       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1568     }
1569     if (Replace1) {
1570       AddToWorklist(NN1.getNode());
1571       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1572     }
1573     return Op;
1574   }
1575   return SDValue();
1576 }
1577 
1578 /// Promote the specified integer shift operation if the target indicates it is
1579 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1580 /// i32 since i16 instructions are longer.
1581 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1582   if (!LegalOperations)
1583     return SDValue();
1584 
1585   EVT VT = Op.getValueType();
1586   if (VT.isVector() || !VT.isInteger())
1587     return SDValue();
1588 
1589   // If operation type is 'undesirable', e.g. i16 on x86, consider
1590   // promoting it.
1591   unsigned Opc = Op.getOpcode();
1592   if (TLI.isTypeDesirableForOp(Opc, VT))
1593     return SDValue();
1594 
1595   EVT PVT = VT;
1596   // Consult target whether it is a good idea to promote this operation and
1597   // what's the right type to promote it to.
1598   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1599     assert(PVT != VT && "Don't know what type to promote to!");
1600 
1601     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1602 
1603     bool Replace = false;
1604     SDValue N0 = Op.getOperand(0);
1605     if (Opc == ISD::SRA)
1606       N0 = SExtPromoteOperand(N0, PVT);
1607     else if (Opc == ISD::SRL)
1608       N0 = ZExtPromoteOperand(N0, PVT);
1609     else
1610       N0 = PromoteOperand(N0, PVT, Replace);
1611 
1612     if (!N0.getNode())
1613       return SDValue();
1614 
1615     SDLoc DL(Op);
1616     SDValue N1 = Op.getOperand(1);
1617     SDValue RV =
1618         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1619 
1620     if (Replace)
1621       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1622 
1623     // Deal with Op being deleted.
1624     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1625       return RV;
1626   }
1627   return SDValue();
1628 }
1629 
1630 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1631   if (!LegalOperations)
1632     return SDValue();
1633 
1634   EVT VT = Op.getValueType();
1635   if (VT.isVector() || !VT.isInteger())
1636     return SDValue();
1637 
1638   // If operation type is 'undesirable', e.g. i16 on x86, consider
1639   // promoting it.
1640   unsigned Opc = Op.getOpcode();
1641   if (TLI.isTypeDesirableForOp(Opc, VT))
1642     return SDValue();
1643 
1644   EVT PVT = VT;
1645   // Consult target whether it is a good idea to promote this operation and
1646   // what's the right type to promote it to.
1647   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1648     assert(PVT != VT && "Don't know what type to promote to!");
1649     // fold (aext (aext x)) -> (aext x)
1650     // fold (aext (zext x)) -> (zext x)
1651     // fold (aext (sext x)) -> (sext x)
1652     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1653     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1654   }
1655   return SDValue();
1656 }
1657 
1658 bool DAGCombiner::PromoteLoad(SDValue Op) {
1659   if (!LegalOperations)
1660     return false;
1661 
1662   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1663     return false;
1664 
1665   EVT VT = Op.getValueType();
1666   if (VT.isVector() || !VT.isInteger())
1667     return false;
1668 
1669   // If operation type is 'undesirable', e.g. i16 on x86, consider
1670   // promoting it.
1671   unsigned Opc = Op.getOpcode();
1672   if (TLI.isTypeDesirableForOp(Opc, VT))
1673     return false;
1674 
1675   EVT PVT = VT;
1676   // Consult target whether it is a good idea to promote this operation and
1677   // what's the right type to promote it to.
1678   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1679     assert(PVT != VT && "Don't know what type to promote to!");
1680 
1681     SDLoc DL(Op);
1682     SDNode *N = Op.getNode();
1683     LoadSDNode *LD = cast<LoadSDNode>(N);
1684     EVT MemVT = LD->getMemoryVT();
1685     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1686                                                       : LD->getExtensionType();
1687     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1688                                    LD->getChain(), LD->getBasePtr(),
1689                                    MemVT, LD->getMemOperand());
1690     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1691 
1692     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1693                Result.dump(&DAG); dbgs() << '\n');
1694 
1695     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1696     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1697 
1698     AddToWorklist(Result.getNode());
1699     recursivelyDeleteUnusedNodes(N);
1700     return true;
1701   }
1702 
1703   return false;
1704 }
1705 
1706 /// Recursively delete a node which has no uses and any operands for
1707 /// which it is the only use.
1708 ///
1709 /// Note that this both deletes the nodes and removes them from the worklist.
1710 /// It also adds any nodes who have had a user deleted to the worklist as they
1711 /// may now have only one use and subject to other combines.
1712 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1713   if (!N->use_empty())
1714     return false;
1715 
1716   SmallSetVector<SDNode *, 16> Nodes;
1717   Nodes.insert(N);
1718   do {
1719     N = Nodes.pop_back_val();
1720     if (!N)
1721       continue;
1722 
1723     if (N->use_empty()) {
1724       for (const SDValue &ChildN : N->op_values())
1725         Nodes.insert(ChildN.getNode());
1726 
1727       removeFromWorklist(N);
1728       DAG.DeleteNode(N);
1729     } else {
1730       AddToWorklist(N);
1731     }
1732   } while (!Nodes.empty());
1733   return true;
1734 }
1735 
1736 //===----------------------------------------------------------------------===//
1737 //  Main DAG Combiner implementation
1738 //===----------------------------------------------------------------------===//
1739 
1740 void DAGCombiner::Run(CombineLevel AtLevel) {
1741   // set the instance variables, so that the various visit routines may use it.
1742   Level = AtLevel;
1743   LegalDAG = Level >= AfterLegalizeDAG;
1744   LegalOperations = Level >= AfterLegalizeVectorOps;
1745   LegalTypes = Level >= AfterLegalizeTypes;
1746 
1747   WorklistInserter AddNodes(*this);
1748 
1749   // Add all the dag nodes to the worklist.
1750   //
1751   // Note: All nodes are not added to PruningList here, this is because the only
1752   // nodes which can be deleted are those which have no uses and all other nodes
1753   // which would otherwise be added to the worklist by the first call to
1754   // getNextWorklistEntry are already present in it.
1755   for (SDNode &Node : DAG.allnodes())
1756     AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1757 
1758   // Create a dummy node (which is not added to allnodes), that adds a reference
1759   // to the root node, preventing it from being deleted, and tracking any
1760   // changes of the root.
1761   HandleSDNode Dummy(DAG.getRoot());
1762 
1763   // While we have a valid worklist entry node, try to combine it.
1764   while (SDNode *N = getNextWorklistEntry()) {
1765     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1766     // N is deleted from the DAG, since they too may now be dead or may have a
1767     // reduced number of uses, allowing other xforms.
1768     if (recursivelyDeleteUnusedNodes(N))
1769       continue;
1770 
1771     WorklistRemover DeadNodes(*this);
1772 
1773     // If this combine is running after legalizing the DAG, re-legalize any
1774     // nodes pulled off the worklist.
1775     if (LegalDAG) {
1776       SmallSetVector<SDNode *, 16> UpdatedNodes;
1777       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1778 
1779       for (SDNode *LN : UpdatedNodes)
1780         AddToWorklistWithUsers(LN);
1781 
1782       if (!NIsValid)
1783         continue;
1784     }
1785 
1786     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1787 
1788     // Add any operands of the new node which have not yet been combined to the
1789     // worklist as well. getNextWorklistEntry flags nodes that have been
1790     // combined before. Because the worklist uniques things already, this won't
1791     // repeatedly process the same operand.
1792     for (const SDValue &ChildN : N->op_values())
1793       AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1794                     /*SkipIfCombinedBefore=*/true);
1795 
1796     SDValue RV = combine(N);
1797 
1798     if (!RV.getNode())
1799       continue;
1800 
1801     ++NodesCombined;
1802 
1803     // Invalidate cached info.
1804     ChainsWithoutMergeableStores.clear();
1805 
1806     // If we get back the same node we passed in, rather than a new node or
1807     // zero, we know that the node must have defined multiple values and
1808     // CombineTo was used.  Since CombineTo takes care of the worklist
1809     // mechanics for us, we have no work to do in this case.
1810     if (RV.getNode() == N)
1811       continue;
1812 
1813     assert(N->getOpcode() != ISD::DELETED_NODE &&
1814            RV.getOpcode() != ISD::DELETED_NODE &&
1815            "Node was deleted but visit returned new node!");
1816 
1817     LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1818 
1819     if (N->getNumValues() == RV->getNumValues())
1820       DAG.ReplaceAllUsesWith(N, RV.getNode());
1821     else {
1822       assert(N->getValueType(0) == RV.getValueType() &&
1823              N->getNumValues() == 1 && "Type mismatch");
1824       DAG.ReplaceAllUsesWith(N, &RV);
1825     }
1826 
1827     // Push the new node and any users onto the worklist.  Omit this if the
1828     // new node is the EntryToken (e.g. if a store managed to get optimized
1829     // out), because re-visiting the EntryToken and its users will not uncover
1830     // any additional opportunities, but there may be a large number of such
1831     // users, potentially causing compile time explosion.
1832     if (RV.getOpcode() != ISD::EntryToken)
1833       AddToWorklistWithUsers(RV.getNode());
1834 
1835     // Finally, if the node is now dead, remove it from the graph.  The node
1836     // may not be dead if the replacement process recursively simplified to
1837     // something else needing this node. This will also take care of adding any
1838     // operands which have lost a user to the worklist.
1839     recursivelyDeleteUnusedNodes(N);
1840   }
1841 
1842   // If the root changed (e.g. it was a dead load, update the root).
1843   DAG.setRoot(Dummy.getValue());
1844   DAG.RemoveDeadNodes();
1845 }
1846 
1847 SDValue DAGCombiner::visit(SDNode *N) {
1848   // clang-format off
1849   switch (N->getOpcode()) {
1850   default: break;
1851   case ISD::TokenFactor:        return visitTokenFactor(N);
1852   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1853   case ISD::ADD:                return visitADD(N);
1854   case ISD::SUB:                return visitSUB(N);
1855   case ISD::SADDSAT:
1856   case ISD::UADDSAT:            return visitADDSAT(N);
1857   case ISD::SSUBSAT:
1858   case ISD::USUBSAT:            return visitSUBSAT(N);
1859   case ISD::ADDC:               return visitADDC(N);
1860   case ISD::SADDO:
1861   case ISD::UADDO:              return visitADDO(N);
1862   case ISD::SUBC:               return visitSUBC(N);
1863   case ISD::SSUBO:
1864   case ISD::USUBO:              return visitSUBO(N);
1865   case ISD::ADDE:               return visitADDE(N);
1866   case ISD::UADDO_CARRY:        return visitUADDO_CARRY(N);
1867   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1868   case ISD::SUBE:               return visitSUBE(N);
1869   case ISD::USUBO_CARRY:        return visitUSUBO_CARRY(N);
1870   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1871   case ISD::SMULFIX:
1872   case ISD::SMULFIXSAT:
1873   case ISD::UMULFIX:
1874   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1875   case ISD::MUL:                return visitMUL<EmptyMatchContext>(N);
1876   case ISD::SDIV:               return visitSDIV(N);
1877   case ISD::UDIV:               return visitUDIV(N);
1878   case ISD::SREM:
1879   case ISD::UREM:               return visitREM(N);
1880   case ISD::MULHU:              return visitMULHU(N);
1881   case ISD::MULHS:              return visitMULHS(N);
1882   case ISD::AVGFLOORS:
1883   case ISD::AVGFLOORU:
1884   case ISD::AVGCEILS:
1885   case ISD::AVGCEILU:           return visitAVG(N);
1886   case ISD::ABDS:
1887   case ISD::ABDU:               return visitABD(N);
1888   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1889   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1890   case ISD::SMULO:
1891   case ISD::UMULO:              return visitMULO(N);
1892   case ISD::SMIN:
1893   case ISD::SMAX:
1894   case ISD::UMIN:
1895   case ISD::UMAX:               return visitIMINMAX(N);
1896   case ISD::AND:                return visitAND(N);
1897   case ISD::OR:                 return visitOR(N);
1898   case ISD::XOR:                return visitXOR(N);
1899   case ISD::SHL:                return visitSHL(N);
1900   case ISD::SRA:                return visitSRA(N);
1901   case ISD::SRL:                return visitSRL(N);
1902   case ISD::ROTR:
1903   case ISD::ROTL:               return visitRotate(N);
1904   case ISD::FSHL:
1905   case ISD::FSHR:               return visitFunnelShift(N);
1906   case ISD::SSHLSAT:
1907   case ISD::USHLSAT:            return visitSHLSAT(N);
1908   case ISD::ABS:                return visitABS(N);
1909   case ISD::BSWAP:              return visitBSWAP(N);
1910   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1911   case ISD::CTLZ:               return visitCTLZ(N);
1912   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1913   case ISD::CTTZ:               return visitCTTZ(N);
1914   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1915   case ISD::CTPOP:              return visitCTPOP(N);
1916   case ISD::SELECT:             return visitSELECT(N);
1917   case ISD::VSELECT:            return visitVSELECT(N);
1918   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1919   case ISD::SETCC:              return visitSETCC(N);
1920   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1921   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1922   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1923   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1924   case ISD::AssertSext:
1925   case ISD::AssertZext:         return visitAssertExt(N);
1926   case ISD::AssertAlign:        return visitAssertAlign(N);
1927   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1928   case ISD::SIGN_EXTEND_VECTOR_INREG:
1929   case ISD::ZERO_EXTEND_VECTOR_INREG:
1930   case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1931   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1932   case ISD::TRUNCATE_USAT_U:    return visitTRUNCATE_USAT_U(N);
1933   case ISD::BITCAST:            return visitBITCAST(N);
1934   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1935   case ISD::FADD:               return visitFADD(N);
1936   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1937   case ISD::FSUB:               return visitFSUB(N);
1938   case ISD::FMUL:               return visitFMUL(N);
1939   case ISD::FMA:                return visitFMA<EmptyMatchContext>(N);
1940   case ISD::FMAD:               return visitFMAD(N);
1941   case ISD::FDIV:               return visitFDIV(N);
1942   case ISD::FREM:               return visitFREM(N);
1943   case ISD::FSQRT:              return visitFSQRT(N);
1944   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1945   case ISD::FPOW:               return visitFPOW(N);
1946   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1947   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1948   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1949   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1950   case ISD::LROUND:
1951   case ISD::LLROUND:
1952   case ISD::LRINT:
1953   case ISD::LLRINT:             return visitXROUND(N);
1954   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1955   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1956   case ISD::FNEG:               return visitFNEG(N);
1957   case ISD::FABS:               return visitFABS(N);
1958   case ISD::FFLOOR:             return visitFFLOOR(N);
1959   case ISD::FMINNUM:
1960   case ISD::FMAXNUM:
1961   case ISD::FMINIMUM:
1962   case ISD::FMAXIMUM:
1963   case ISD::FMINIMUMNUM:
1964   case ISD::FMAXIMUMNUM:       return visitFMinMax(N);
1965   case ISD::FCEIL:              return visitFCEIL(N);
1966   case ISD::FTRUNC:             return visitFTRUNC(N);
1967   case ISD::FFREXP:             return visitFFREXP(N);
1968   case ISD::BRCOND:             return visitBRCOND(N);
1969   case ISD::BR_CC:              return visitBR_CC(N);
1970   case ISD::LOAD:               return visitLOAD(N);
1971   case ISD::STORE:              return visitSTORE(N);
1972   case ISD::ATOMIC_STORE:       return visitATOMIC_STORE(N);
1973   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1974   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1975   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1976   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1977   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1978   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1979   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1980   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1981   case ISD::MGATHER:            return visitMGATHER(N);
1982   case ISD::MLOAD:              return visitMLOAD(N);
1983   case ISD::MSCATTER:           return visitMSCATTER(N);
1984   case ISD::MSTORE:             return visitMSTORE(N);
1985   case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
1986   case ISD::VECTOR_COMPRESS:    return visitVECTOR_COMPRESS(N);
1987   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1988   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1989   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1990   case ISD::FP_TO_BF16:         return visitFP_TO_BF16(N);
1991   case ISD::BF16_TO_FP:         return visitBF16_TO_FP(N);
1992   case ISD::FREEZE:             return visitFREEZE(N);
1993   case ISD::GET_FPENV_MEM:      return visitGET_FPENV_MEM(N);
1994   case ISD::SET_FPENV_MEM:      return visitSET_FPENV_MEM(N);
1995   case ISD::FCANONICALIZE:      return visitFCANONICALIZE(N);
1996   case ISD::VECREDUCE_FADD:
1997   case ISD::VECREDUCE_FMUL:
1998   case ISD::VECREDUCE_ADD:
1999   case ISD::VECREDUCE_MUL:
2000   case ISD::VECREDUCE_AND:
2001   case ISD::VECREDUCE_OR:
2002   case ISD::VECREDUCE_XOR:
2003   case ISD::VECREDUCE_SMAX:
2004   case ISD::VECREDUCE_SMIN:
2005   case ISD::VECREDUCE_UMAX:
2006   case ISD::VECREDUCE_UMIN:
2007   case ISD::VECREDUCE_FMAX:
2008   case ISD::VECREDUCE_FMIN:
2009   case ISD::VECREDUCE_FMAXIMUM:
2010   case ISD::VECREDUCE_FMINIMUM:     return visitVECREDUCE(N);
2011 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2012 #include "llvm/IR/VPIntrinsics.def"
2013     return visitVPOp(N);
2014   }
2015   // clang-format on
2016   return SDValue();
2017 }
2018 
2019 SDValue DAGCombiner::combine(SDNode *N) {
2020   if (!DebugCounter::shouldExecute(DAGCombineCounter))
2021     return SDValue();
2022 
2023   SDValue RV;
2024   if (!DisableGenericCombines)
2025     RV = visit(N);
2026 
2027   // If nothing happened, try a target-specific DAG combine.
2028   if (!RV.getNode()) {
2029     assert(N->getOpcode() != ISD::DELETED_NODE &&
2030            "Node was deleted but visit returned NULL!");
2031 
2032     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2033         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2034 
2035       // Expose the DAG combiner to the target combiner impls.
2036       TargetLowering::DAGCombinerInfo
2037         DagCombineInfo(DAG, Level, false, this);
2038 
2039       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2040     }
2041   }
2042 
2043   // If nothing happened still, try promoting the operation.
2044   if (!RV.getNode()) {
2045     switch (N->getOpcode()) {
2046     default: break;
2047     case ISD::ADD:
2048     case ISD::SUB:
2049     case ISD::MUL:
2050     case ISD::AND:
2051     case ISD::OR:
2052     case ISD::XOR:
2053       RV = PromoteIntBinOp(SDValue(N, 0));
2054       break;
2055     case ISD::SHL:
2056     case ISD::SRA:
2057     case ISD::SRL:
2058       RV = PromoteIntShiftOp(SDValue(N, 0));
2059       break;
2060     case ISD::SIGN_EXTEND:
2061     case ISD::ZERO_EXTEND:
2062     case ISD::ANY_EXTEND:
2063       RV = PromoteExtend(SDValue(N, 0));
2064       break;
2065     case ISD::LOAD:
2066       if (PromoteLoad(SDValue(N, 0)))
2067         RV = SDValue(N, 0);
2068       break;
2069     }
2070   }
2071 
2072   // If N is a commutative binary node, try to eliminate it if the commuted
2073   // version is already present in the DAG.
2074   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2075     SDValue N0 = N->getOperand(0);
2076     SDValue N1 = N->getOperand(1);
2077 
2078     // Constant operands are canonicalized to RHS.
2079     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2080       SDValue Ops[] = {N1, N0};
2081       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2082                                             N->getFlags());
2083       if (CSENode)
2084         return SDValue(CSENode, 0);
2085     }
2086   }
2087 
2088   return RV;
2089 }
2090 
2091 /// Given a node, return its input chain if it has one, otherwise return a null
2092 /// sd operand.
2093 static SDValue getInputChainForNode(SDNode *N) {
2094   if (unsigned NumOps = N->getNumOperands()) {
2095     if (N->getOperand(0).getValueType() == MVT::Other)
2096       return N->getOperand(0);
2097     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2098       return N->getOperand(NumOps-1);
2099     for (unsigned i = 1; i < NumOps-1; ++i)
2100       if (N->getOperand(i).getValueType() == MVT::Other)
2101         return N->getOperand(i);
2102   }
2103   return SDValue();
2104 }
2105 
2106 SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2107   SDValue Operand = N->getOperand(0);
2108   EVT VT = Operand.getValueType();
2109   SDLoc dl(N);
2110 
2111   // Canonicalize undef to quiet NaN.
2112   if (Operand.isUndef()) {
2113     APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2114     return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2115   }
2116   return SDValue();
2117 }
2118 
2119 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2120   // If N has two operands, where one has an input chain equal to the other,
2121   // the 'other' chain is redundant.
2122   if (N->getNumOperands() == 2) {
2123     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2124       return N->getOperand(0);
2125     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2126       return N->getOperand(1);
2127   }
2128 
2129   // Don't simplify token factors if optnone.
2130   if (OptLevel == CodeGenOptLevel::None)
2131     return SDValue();
2132 
2133   // Don't simplify the token factor if the node itself has too many operands.
2134   if (N->getNumOperands() > TokenFactorInlineLimit)
2135     return SDValue();
2136 
2137   // If the sole user is a token factor, we should make sure we have a
2138   // chance to merge them together. This prevents TF chains from inhibiting
2139   // optimizations.
2140   if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2141     AddToWorklist(*(N->user_begin()));
2142 
2143   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
2144   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
2145   SmallPtrSet<SDNode*, 16> SeenOps;
2146   bool Changed = false;             // If we should replace this token factor.
2147 
2148   // Start out with this token factor.
2149   TFs.push_back(N);
2150 
2151   // Iterate through token factors.  The TFs grows when new token factors are
2152   // encountered.
2153   for (unsigned i = 0; i < TFs.size(); ++i) {
2154     // Limit number of nodes to inline, to avoid quadratic compile times.
2155     // We have to add the outstanding Token Factors to Ops, otherwise we might
2156     // drop Ops from the resulting Token Factors.
2157     if (Ops.size() > TokenFactorInlineLimit) {
2158       for (unsigned j = i; j < TFs.size(); j++)
2159         Ops.emplace_back(TFs[j], 0);
2160       // Drop unprocessed Token Factors from TFs, so we do not add them to the
2161       // combiner worklist later.
2162       TFs.resize(i);
2163       break;
2164     }
2165 
2166     SDNode *TF = TFs[i];
2167     // Check each of the operands.
2168     for (const SDValue &Op : TF->op_values()) {
2169       switch (Op.getOpcode()) {
2170       case ISD::EntryToken:
2171         // Entry tokens don't need to be added to the list. They are
2172         // redundant.
2173         Changed = true;
2174         break;
2175 
2176       case ISD::TokenFactor:
2177         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2178           // Queue up for processing.
2179           TFs.push_back(Op.getNode());
2180           Changed = true;
2181           break;
2182         }
2183         [[fallthrough]];
2184 
2185       default:
2186         // Only add if it isn't already in the list.
2187         if (SeenOps.insert(Op.getNode()).second)
2188           Ops.push_back(Op);
2189         else
2190           Changed = true;
2191         break;
2192       }
2193     }
2194   }
2195 
2196   // Re-visit inlined Token Factors, to clean them up in case they have been
2197   // removed. Skip the first Token Factor, as this is the current node.
2198   for (unsigned i = 1, e = TFs.size(); i < e; i++)
2199     AddToWorklist(TFs[i]);
2200 
2201   // Remove Nodes that are chained to another node in the list. Do so
2202   // by walking up chains breath-first stopping when we've seen
2203   // another operand. In general we must climb to the EntryNode, but we can exit
2204   // early if we find all remaining work is associated with just one operand as
2205   // no further pruning is possible.
2206 
2207   // List of nodes to search through and original Ops from which they originate.
2208   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
2209   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2210   SmallPtrSet<SDNode *, 16> SeenChains;
2211   bool DidPruneOps = false;
2212 
2213   unsigned NumLeftToConsider = 0;
2214   for (const SDValue &Op : Ops) {
2215     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2216     OpWorkCount.push_back(1);
2217   }
2218 
2219   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2220     // If this is an Op, we can remove the op from the list. Remark any
2221     // search associated with it as from the current OpNumber.
2222     if (SeenOps.contains(Op)) {
2223       Changed = true;
2224       DidPruneOps = true;
2225       unsigned OrigOpNumber = 0;
2226       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2227         OrigOpNumber++;
2228       assert((OrigOpNumber != Ops.size()) &&
2229              "expected to find TokenFactor Operand");
2230       // Re-mark worklist from OrigOpNumber to OpNumber
2231       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2232         if (Worklist[i].second == OrigOpNumber) {
2233           Worklist[i].second = OpNumber;
2234         }
2235       }
2236       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2237       OpWorkCount[OrigOpNumber] = 0;
2238       NumLeftToConsider--;
2239     }
2240     // Add if it's a new chain
2241     if (SeenChains.insert(Op).second) {
2242       OpWorkCount[OpNumber]++;
2243       Worklist.push_back(std::make_pair(Op, OpNumber));
2244     }
2245   };
2246 
2247   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2248     // We need at least be consider at least 2 Ops to prune.
2249     if (NumLeftToConsider <= 1)
2250       break;
2251     auto CurNode = Worklist[i].first;
2252     auto CurOpNumber = Worklist[i].second;
2253     assert((OpWorkCount[CurOpNumber] > 0) &&
2254            "Node should not appear in worklist");
2255     switch (CurNode->getOpcode()) {
2256     case ISD::EntryToken:
2257       // Hitting EntryToken is the only way for the search to terminate without
2258       // hitting
2259       // another operand's search. Prevent us from marking this operand
2260       // considered.
2261       NumLeftToConsider++;
2262       break;
2263     case ISD::TokenFactor:
2264       for (const SDValue &Op : CurNode->op_values())
2265         AddToWorklist(i, Op.getNode(), CurOpNumber);
2266       break;
2267     case ISD::LIFETIME_START:
2268     case ISD::LIFETIME_END:
2269     case ISD::CopyFromReg:
2270     case ISD::CopyToReg:
2271       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2272       break;
2273     default:
2274       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2275         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2276       break;
2277     }
2278     OpWorkCount[CurOpNumber]--;
2279     if (OpWorkCount[CurOpNumber] == 0)
2280       NumLeftToConsider--;
2281   }
2282 
2283   // If we've changed things around then replace token factor.
2284   if (Changed) {
2285     SDValue Result;
2286     if (Ops.empty()) {
2287       // The entry token is the only possible outcome.
2288       Result = DAG.getEntryNode();
2289     } else {
2290       if (DidPruneOps) {
2291         SmallVector<SDValue, 8> PrunedOps;
2292         //
2293         for (const SDValue &Op : Ops) {
2294           if (SeenChains.count(Op.getNode()) == 0)
2295             PrunedOps.push_back(Op);
2296         }
2297         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2298       } else {
2299         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2300       }
2301     }
2302     return Result;
2303   }
2304   return SDValue();
2305 }
2306 
2307 /// MERGE_VALUES can always be eliminated.
2308 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2309   WorklistRemover DeadNodes(*this);
2310   // Replacing results may cause a different MERGE_VALUES to suddenly
2311   // be CSE'd with N, and carry its uses with it. Iterate until no
2312   // uses remain, to ensure that the node can be safely deleted.
2313   // First add the users of this node to the work list so that they
2314   // can be tried again once they have new operands.
2315   AddUsersToWorklist(N);
2316   do {
2317     // Do as a single replacement to avoid rewalking use lists.
2318     SmallVector<SDValue, 8> Ops(N->ops());
2319     DAG.ReplaceAllUsesWith(N, Ops.data());
2320   } while (!N->use_empty());
2321   deleteAndRecombine(N);
2322   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2323 }
2324 
2325 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2326 /// ConstantSDNode pointer else nullptr.
2327 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2328   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2329   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2330 }
2331 
2332 // isTruncateOf - If N is a truncate of some other value, return true, record
2333 // the value being truncated in Op and which of Op's bits are zero/one in Known.
2334 // This function computes KnownBits to avoid a duplicated call to
2335 // computeKnownBits in the caller.
2336 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
2337                          KnownBits &Known) {
2338   if (N->getOpcode() == ISD::TRUNCATE) {
2339     Op = N->getOperand(0);
2340     Known = DAG.computeKnownBits(Op);
2341     if (N->getFlags().hasNoUnsignedWrap())
2342       Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2343     return true;
2344   }
2345 
2346   if (N.getValueType().getScalarType() != MVT::i1 ||
2347       !sd_match(
2348           N, m_c_SetCC(m_Value(Op), m_Zero(), m_SpecificCondCode(ISD::SETNE))))
2349     return false;
2350 
2351   Known = DAG.computeKnownBits(Op);
2352   return (Known.Zero | 1).isAllOnes();
2353 }
2354 
2355 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2356 /// and that N may be folded in the load / store addressing mode.
2357 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2358                                     const TargetLowering &TLI) {
2359   EVT VT;
2360   unsigned AS;
2361 
2362   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2363     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2364       return false;
2365     VT = LD->getMemoryVT();
2366     AS = LD->getAddressSpace();
2367   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2368     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2369       return false;
2370     VT = ST->getMemoryVT();
2371     AS = ST->getAddressSpace();
2372   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2373     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2374       return false;
2375     VT = LD->getMemoryVT();
2376     AS = LD->getAddressSpace();
2377   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2378     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2379       return false;
2380     VT = ST->getMemoryVT();
2381     AS = ST->getAddressSpace();
2382   } else {
2383     return false;
2384   }
2385 
2386   TargetLowering::AddrMode AM;
2387   if (N->getOpcode() == ISD::ADD) {
2388     AM.HasBaseReg = true;
2389     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2390     if (Offset)
2391       // [reg +/- imm]
2392       AM.BaseOffs = Offset->getSExtValue();
2393     else
2394       // [reg +/- reg]
2395       AM.Scale = 1;
2396   } else if (N->getOpcode() == ISD::SUB) {
2397     AM.HasBaseReg = true;
2398     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2399     if (Offset)
2400       // [reg +/- imm]
2401       AM.BaseOffs = -Offset->getSExtValue();
2402     else
2403       // [reg +/- reg]
2404       AM.Scale = 1;
2405   } else {
2406     return false;
2407   }
2408 
2409   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2410                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2411 }
2412 
2413 /// This inverts a canonicalization in IR that replaces a variable select arm
2414 /// with an identity constant. Codegen improves if we re-use the variable
2415 /// operand rather than load a constant. This can also be converted into a
2416 /// masked vector operation if the target supports it.
2417 static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
2418                                               bool ShouldCommuteOperands) {
2419   // Match a select as operand 1. The identity constant that we are looking for
2420   // is only valid as operand 1 of a non-commutative binop.
2421   SDValue N0 = N->getOperand(0);
2422   SDValue N1 = N->getOperand(1);
2423   if (ShouldCommuteOperands)
2424     std::swap(N0, N1);
2425 
2426   // TODO: Should this apply to scalar select too?
2427   if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2428     return SDValue();
2429 
2430   // We can't hoist all instructions because of immediate UB (not speculatable).
2431   // For example div/rem by zero.
2432   if (!DAG.isSafeToSpeculativelyExecuteNode(N))
2433     return SDValue();
2434 
2435   unsigned Opcode = N->getOpcode();
2436   EVT VT = N->getValueType(0);
2437   SDValue Cond = N1.getOperand(0);
2438   SDValue TVal = N1.getOperand(1);
2439   SDValue FVal = N1.getOperand(2);
2440 
2441   // This transform increases uses of N0, so freeze it to be safe.
2442   // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2443   unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2444   if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2445     SDValue F0 = DAG.getFreeze(N0);
2446     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2447     return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2448   }
2449   // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2450   if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2451     SDValue F0 = DAG.getFreeze(N0);
2452     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2453     return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2454   }
2455 
2456   return SDValue();
2457 }
2458 
2459 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2460   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2461          "Unexpected binary operator");
2462 
2463   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2464   auto BinOpcode = BO->getOpcode();
2465   EVT VT = BO->getValueType(0);
2466   if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2467     if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2468       return Sel;
2469 
2470     if (TLI.isCommutativeBinOp(BO->getOpcode()))
2471       if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2472         return Sel;
2473   }
2474 
2475   // Don't do this unless the old select is going away. We want to eliminate the
2476   // binary operator, not replace a binop with a select.
2477   // TODO: Handle ISD::SELECT_CC.
2478   unsigned SelOpNo = 0;
2479   SDValue Sel = BO->getOperand(0);
2480   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2481     SelOpNo = 1;
2482     Sel = BO->getOperand(1);
2483 
2484     // Peek through trunc to shift amount type.
2485     if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2486          BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2487       // This is valid when the truncated bits of x are already zero.
2488       SDValue Op;
2489       KnownBits Known;
2490       if (isTruncateOf(DAG, Sel, Op, Known) &&
2491           Known.countMaxActiveBits() < Sel.getScalarValueSizeInBits())
2492         Sel = Op;
2493     }
2494   }
2495 
2496   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2497     return SDValue();
2498 
2499   SDValue CT = Sel.getOperand(1);
2500   if (!isConstantOrConstantVector(CT, true) &&
2501       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2502     return SDValue();
2503 
2504   SDValue CF = Sel.getOperand(2);
2505   if (!isConstantOrConstantVector(CF, true) &&
2506       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2507     return SDValue();
2508 
2509   // Bail out if any constants are opaque because we can't constant fold those.
2510   // The exception is "and" and "or" with either 0 or -1 in which case we can
2511   // propagate non constant operands into select. I.e.:
2512   // and (select Cond, 0, -1), X --> select Cond, 0, X
2513   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2514   bool CanFoldNonConst =
2515       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2516       ((isNullOrNullSplat(CT) && isAllOnesOrAllOnesSplat(CF)) ||
2517        (isNullOrNullSplat(CF) && isAllOnesOrAllOnesSplat(CT)));
2518 
2519   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2520   if (!CanFoldNonConst &&
2521       !isConstantOrConstantVector(CBO, true) &&
2522       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2523     return SDValue();
2524 
2525   SDLoc DL(Sel);
2526   SDValue NewCT, NewCF;
2527 
2528   if (CanFoldNonConst) {
2529     // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2530     if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2531         (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2532       NewCT = CT;
2533     else
2534       NewCT = CBO;
2535 
2536     if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2537         (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2538       NewCF = CF;
2539     else
2540       NewCF = CBO;
2541   } else {
2542     // We have a select-of-constants followed by a binary operator with a
2543     // constant. Eliminate the binop by pulling the constant math into the
2544     // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2545     // CBO, CF + CBO
2546     NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2547                     : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2548     if (!NewCT)
2549       return SDValue();
2550 
2551     NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2552                     : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2553     if (!NewCF)
2554       return SDValue();
2555   }
2556 
2557   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2558   SelectOp->setFlags(BO->getFlags());
2559   return SelectOp;
2560 }
2561 
2562 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL,
2563                                          SelectionDAG &DAG) {
2564   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2565          "Expecting add or sub");
2566 
2567   // Match a constant operand and a zext operand for the math instruction:
2568   // add Z, C
2569   // sub C, Z
2570   bool IsAdd = N->getOpcode() == ISD::ADD;
2571   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2572   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2573   auto *CN = dyn_cast<ConstantSDNode>(C);
2574   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2575     return SDValue();
2576 
2577   // Match the zext operand as a setcc of a boolean.
2578   if (Z.getOperand(0).getValueType() != MVT::i1)
2579     return SDValue();
2580 
2581   // Match the compare as: setcc (X & 1), 0, eq.
2582   if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2583                                          m_SpecificCondCode(ISD::SETEQ))))
2584     return SDValue();
2585 
2586   // We are adding/subtracting a constant and an inverted low bit. Turn that
2587   // into a subtract/add of the low bit with incremented/decremented constant:
2588   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2589   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2590   EVT VT = C.getValueType();
2591   SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2592   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2593                      : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2594   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2595 }
2596 
2597 // Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2598 SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2599   SDValue N0 = N->getOperand(0);
2600   EVT VT = N0.getValueType();
2601   SDValue A, B;
2602 
2603   if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2604       sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
2605                         m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2606     return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2607   }
2608   if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2609       sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
2610                         m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2611     return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2612   }
2613   return SDValue();
2614 }
2615 
2616 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2617 /// a shift and add with a different constant.
2618 static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL,
2619                                    SelectionDAG &DAG) {
2620   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2621          "Expecting add or sub");
2622 
2623   // We need a constant operand for the add/sub, and the other operand is a
2624   // logical shift right: add (srl), C or sub C, (srl).
2625   bool IsAdd = N->getOpcode() == ISD::ADD;
2626   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2627   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2628   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2629       ShiftOp.getOpcode() != ISD::SRL)
2630     return SDValue();
2631 
2632   // The shift must be of a 'not' value.
2633   SDValue Not = ShiftOp.getOperand(0);
2634   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2635     return SDValue();
2636 
2637   // The shift must be moving the sign bit to the least-significant-bit.
2638   EVT VT = ShiftOp.getValueType();
2639   SDValue ShAmt = ShiftOp.getOperand(1);
2640   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2641   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2642     return SDValue();
2643 
2644   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2645   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2646   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2647   if (SDValue NewC = DAG.FoldConstantArithmetic(
2648           IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2649           {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2650     SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2651                                    Not.getOperand(0), ShAmt);
2652     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2653   }
2654 
2655   return SDValue();
2656 }
2657 
2658 static bool
2659 areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) {
2660   return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2661          (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2662 }
2663 
2664 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2665 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2666 /// are no common bits set in the operands).
2667 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2668   SDValue N0 = N->getOperand(0);
2669   SDValue N1 = N->getOperand(1);
2670   EVT VT = N0.getValueType();
2671   SDLoc DL(N);
2672 
2673   // fold (add x, undef) -> undef
2674   if (N0.isUndef())
2675     return N0;
2676   if (N1.isUndef())
2677     return N1;
2678 
2679   // fold (add c1, c2) -> c1+c2
2680   if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2681     return C;
2682 
2683   // canonicalize constant to RHS
2684   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2685       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2686     return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2687 
2688   if (areBitwiseNotOfEachother(N0, N1))
2689     return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2690 
2691   // fold vector ops
2692   if (VT.isVector()) {
2693     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2694       return FoldedVOp;
2695 
2696     // fold (add x, 0) -> x, vector edition
2697     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2698       return N0;
2699   }
2700 
2701   // fold (add x, 0) -> x
2702   if (isNullConstant(N1))
2703     return N0;
2704 
2705   if (N0.getOpcode() == ISD::SUB) {
2706     SDValue N00 = N0.getOperand(0);
2707     SDValue N01 = N0.getOperand(1);
2708 
2709     // fold ((A-c1)+c2) -> (A+(c2-c1))
2710     if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2711       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2712 
2713     // fold ((c1-A)+c2) -> (c1+c2)-A
2714     if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2715       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2716   }
2717 
2718   // add (sext i1 X), 1 -> zext (not i1 X)
2719   // We don't transform this pattern:
2720   //   add (zext i1 X), -1 -> sext (not i1 X)
2721   // because most (?) targets generate better code for the zext form.
2722   if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2723       isOneOrOneSplat(N1)) {
2724     SDValue X = N0.getOperand(0);
2725     if ((!LegalOperations ||
2726          (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2727           TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2728         X.getScalarValueSizeInBits() == 1) {
2729       SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2730       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2731     }
2732   }
2733 
2734   // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2735   // iff (or x, c0) is equivalent to (add x, c0).
2736   // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2737   // iff (xor x, c0) is equivalent to (add x, c0).
2738   if (DAG.isADDLike(N0)) {
2739     SDValue N01 = N0.getOperand(1);
2740     if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2741       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2742   }
2743 
2744   if (SDValue NewSel = foldBinOpIntoSelect(N))
2745     return NewSel;
2746 
2747   // reassociate add
2748   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2749     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2750       return RADD;
2751 
2752     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2753     // equivalent to (add x, c).
2754     // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2755     // equivalent to (add x, c).
2756     // Do this optimization only when adding c does not introduce instructions
2757     // for adding carries.
2758     auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2759       if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2760           isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2761         // If N0's type does not split or is a sign mask, it does not introduce
2762         // add carry.
2763         auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2764         bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2765                           TyActn == TargetLoweringBase::TypePromoteInteger ||
2766                           isMinSignedConstant(N0.getOperand(1));
2767         if (NoAddCarry)
2768           return DAG.getNode(
2769               ISD::ADD, DL, VT,
2770               DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2771               N0.getOperand(1));
2772       }
2773       return SDValue();
2774     };
2775     if (SDValue Add = ReassociateAddOr(N0, N1))
2776       return Add;
2777     if (SDValue Add = ReassociateAddOr(N1, N0))
2778       return Add;
2779 
2780     // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2781     if (SDValue SD =
2782             reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2783       return SD;
2784   }
2785 
2786   SDValue A, B, C, D;
2787 
2788   // fold ((0-A) + B) -> B-A
2789   if (sd_match(N0, m_Neg(m_Value(A))))
2790     return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2791 
2792   // fold (A + (0-B)) -> A-B
2793   if (sd_match(N1, m_Neg(m_Value(B))))
2794     return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2795 
2796   // fold (A+(B-A)) -> B
2797   if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2798     return B;
2799 
2800   // fold ((B-A)+A) -> B
2801   if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2802     return B;
2803 
2804   // fold ((A-B)+(C-A)) -> (C-B)
2805   if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2806       sd_match(N1, m_Sub(m_Value(C), m_Specific(A))))
2807     return DAG.getNode(ISD::SUB, DL, VT, C, B);
2808 
2809   // fold ((A-B)+(B-C)) -> (A-C)
2810   if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2811       sd_match(N1, m_Sub(m_Specific(B), m_Value(C))))
2812     return DAG.getNode(ISD::SUB, DL, VT, A, C);
2813 
2814   // fold (A+(B-(A+C))) to (B-C)
2815   // fold (A+(B-(C+A))) to (B-C)
2816   if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2817     return DAG.getNode(ISD::SUB, DL, VT, B, C);
2818 
2819   // fold (A+((B-A)+or-C)) to (B+or-C)
2820   if (sd_match(N1,
2821                m_AnyOf(m_Add(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)),
2822                        m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2823     return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2824 
2825   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2826   if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
2827       sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
2828       (isConstantOrConstantVector(A) || isConstantOrConstantVector(C)))
2829     return DAG.getNode(ISD::SUB, DL, VT,
2830                        DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
2831                        DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
2832 
2833   // fold (add (umax X, C), -C) --> (usubsat X, C)
2834   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2835     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2836       return (!Max && !Op) ||
2837              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2838     };
2839     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2840                                   /*AllowUndefs*/ true))
2841       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2842                          N0.getOperand(1));
2843   }
2844 
2845   if (SimplifyDemandedBits(SDValue(N, 0)))
2846     return SDValue(N, 0);
2847 
2848   if (isOneOrOneSplat(N1)) {
2849     // fold (add (xor a, -1), 1) -> (sub 0, a)
2850     if (isBitwiseNot(N0))
2851       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2852                          N0.getOperand(0));
2853 
2854     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2855     if (N0.getOpcode() == ISD::ADD) {
2856       SDValue A, Xor;
2857 
2858       if (isBitwiseNot(N0.getOperand(0))) {
2859         A = N0.getOperand(1);
2860         Xor = N0.getOperand(0);
2861       } else if (isBitwiseNot(N0.getOperand(1))) {
2862         A = N0.getOperand(0);
2863         Xor = N0.getOperand(1);
2864       }
2865 
2866       if (Xor)
2867         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2868     }
2869 
2870     // Look for:
2871     //   add (add x, y), 1
2872     // And if the target does not like this form then turn into:
2873     //   sub y, (xor x, -1)
2874     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2875         N0.hasOneUse() &&
2876         // Limit this to after legalization if the add has wrap flags
2877         (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2878                                        !N->getFlags().hasNoSignedWrap()))) {
2879       SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2880       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2881     }
2882   }
2883 
2884   // (x - y) + -1  ->  add (xor y, -1), x
2885   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2886       isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2887     SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2888     return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2889   }
2890 
2891   // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2892   // This can help if the inner add has multiple uses.
2893   APInt CM, CA;
2894   if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2895     if (VT.getScalarSizeInBits() <= 64) {
2896       if (sd_match(N0, m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)),
2897                                       m_ConstInt(CM)))) &&
2898           TLI.isLegalAddImmediate(
2899               (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2900         SDNodeFlags Flags;
2901         // If all the inputs are nuw, the outputs can be nuw. If all the input
2902         // are _also_ nsw the outputs can be too.
2903         if (N->getFlags().hasNoUnsignedWrap() &&
2904             N0->getFlags().hasNoUnsignedWrap() &&
2905             N0.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2906           Flags |= SDNodeFlags::NoUnsignedWrap;
2907           if (N->getFlags().hasNoSignedWrap() &&
2908               N0->getFlags().hasNoSignedWrap() &&
2909               N0.getOperand(0)->getFlags().hasNoSignedWrap())
2910             Flags |= SDNodeFlags::NoSignedWrap;
2911         }
2912         SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2913                                   DAG.getConstant(CM, DL, VT), Flags);
2914         return DAG.getNode(
2915             ISD::ADD, DL, VT, Mul,
2916             DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2917       }
2918       // Also look in case there is an intermediate add.
2919       if (sd_match(N0, m_OneUse(m_Add(
2920                            m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)),
2921                                           m_ConstInt(CM))),
2922                            m_Value(B)))) &&
2923           TLI.isLegalAddImmediate(
2924               (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2925         SDNodeFlags Flags;
2926         // If all the inputs are nuw, the outputs can be nuw. If all the input
2927         // are _also_ nsw the outputs can be too.
2928         SDValue OMul =
2929             N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2930         if (N->getFlags().hasNoUnsignedWrap() &&
2931             N0->getFlags().hasNoUnsignedWrap() &&
2932             OMul->getFlags().hasNoUnsignedWrap() &&
2933             OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2934           Flags |= SDNodeFlags::NoUnsignedWrap;
2935           if (N->getFlags().hasNoSignedWrap() &&
2936               N0->getFlags().hasNoSignedWrap() &&
2937               OMul->getFlags().hasNoSignedWrap() &&
2938               OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2939             Flags |= SDNodeFlags::NoSignedWrap;
2940         }
2941         SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2942                                   DAG.getConstant(CM, DL, VT), Flags);
2943         SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2944         return DAG.getNode(
2945             ISD::ADD, DL, VT, Add,
2946             DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2947       }
2948     }
2949   }
2950 
2951   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2952     return Combined;
2953 
2954   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2955     return Combined;
2956 
2957   return SDValue();
2958 }
2959 
2960 // Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2961 SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2962   SDValue N0 = N->getOperand(0);
2963   EVT VT = N0.getValueType();
2964   SDValue A, B;
2965 
2966   if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
2967       sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
2968                         m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2969     return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2970   }
2971   if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
2972       sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
2973                         m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2974     return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2975   }
2976 
2977   return SDValue();
2978 }
2979 
2980 SDValue DAGCombiner::visitADD(SDNode *N) {
2981   SDValue N0 = N->getOperand(0);
2982   SDValue N1 = N->getOperand(1);
2983   EVT VT = N0.getValueType();
2984   SDLoc DL(N);
2985 
2986   if (SDValue Combined = visitADDLike(N))
2987     return Combined;
2988 
2989   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2990     return V;
2991 
2992   if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2993     return V;
2994 
2995   // Try to match AVGFLOOR fixedwidth pattern
2996   if (SDValue V = foldAddToAvg(N, DL))
2997     return V;
2998 
2999   // fold (a+b) -> (a|b) iff a and b share no bits.
3000   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3001       DAG.haveNoCommonBitsSet(N0, N1))
3002     return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3003 
3004   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3005   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3006     const APInt &C0 = N0->getConstantOperandAPInt(0);
3007     const APInt &C1 = N1->getConstantOperandAPInt(0);
3008     return DAG.getVScale(DL, VT, C0 + C1);
3009   }
3010 
3011   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3012   if (N0.getOpcode() == ISD::ADD &&
3013       N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3014       N1.getOpcode() == ISD::VSCALE) {
3015     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3016     const APInt &VS1 = N1->getConstantOperandAPInt(0);
3017     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3018     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3019   }
3020 
3021   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
3022   if (N0.getOpcode() == ISD::STEP_VECTOR &&
3023       N1.getOpcode() == ISD::STEP_VECTOR) {
3024     const APInt &C0 = N0->getConstantOperandAPInt(0);
3025     const APInt &C1 = N1->getConstantOperandAPInt(0);
3026     APInt NewStep = C0 + C1;
3027     return DAG.getStepVector(DL, VT, NewStep);
3028   }
3029 
3030   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3031   if (N0.getOpcode() == ISD::ADD &&
3032       N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR &&
3033       N1.getOpcode() == ISD::STEP_VECTOR) {
3034     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3035     const APInt &SV1 = N1->getConstantOperandAPInt(0);
3036     APInt NewStep = SV0 + SV1;
3037     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3038     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3039   }
3040 
3041   return SDValue();
3042 }
3043 
3044 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3045   unsigned Opcode = N->getOpcode();
3046   SDValue N0 = N->getOperand(0);
3047   SDValue N1 = N->getOperand(1);
3048   EVT VT = N0.getValueType();
3049   bool IsSigned = Opcode == ISD::SADDSAT;
3050   SDLoc DL(N);
3051 
3052   // fold (add_sat x, undef) -> -1
3053   if (N0.isUndef() || N1.isUndef())
3054     return DAG.getAllOnesConstant(DL, VT);
3055 
3056   // fold (add_sat c1, c2) -> c3
3057   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3058     return C;
3059 
3060   // canonicalize constant to RHS
3061   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3062       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3063     return DAG.getNode(Opcode, DL, VT, N1, N0);
3064 
3065   // fold vector ops
3066   if (VT.isVector()) {
3067     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3068       return FoldedVOp;
3069 
3070     // fold (add_sat x, 0) -> x, vector edition
3071     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3072       return N0;
3073   }
3074 
3075   // fold (add_sat x, 0) -> x
3076   if (isNullConstant(N1))
3077     return N0;
3078 
3079   // If it cannot overflow, transform into an add.
3080   if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3081     return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3082 
3083   return SDValue();
3084 }
3085 
3086 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V,
3087                           bool ForceCarryReconstruction = false) {
3088   bool Masked = false;
3089 
3090   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3091   while (true) {
3092     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3093       V = V.getOperand(0);
3094       continue;
3095     }
3096 
3097     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3098       if (ForceCarryReconstruction)
3099         return V;
3100 
3101       Masked = true;
3102       V = V.getOperand(0);
3103       continue;
3104     }
3105 
3106     if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3107       return V;
3108 
3109     break;
3110   }
3111 
3112   // If this is not a carry, return.
3113   if (V.getResNo() != 1)
3114     return SDValue();
3115 
3116   if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3117       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3118     return SDValue();
3119 
3120   EVT VT = V->getValueType(0);
3121   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3122     return SDValue();
3123 
3124   // If the result is masked, then no matter what kind of bool it is we can
3125   // return. If it isn't, then we need to make sure the bool type is either 0 or
3126   // 1 and not other values.
3127   if (Masked ||
3128       TLI.getBooleanContents(V.getValueType()) ==
3129           TargetLoweringBase::ZeroOrOneBooleanContent)
3130     return V;
3131 
3132   return SDValue();
3133 }
3134 
3135 /// Given the operands of an add/sub operation, see if the 2nd operand is a
3136 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3137 /// the opcode and bypass the mask operation.
3138 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3139                                  SelectionDAG &DAG, const SDLoc &DL) {
3140   if (N1.getOpcode() == ISD::ZERO_EXTEND)
3141     N1 = N1.getOperand(0);
3142 
3143   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3144     return SDValue();
3145 
3146   EVT VT = N0.getValueType();
3147   SDValue N10 = N1.getOperand(0);
3148   if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3149     N10 = N10.getOperand(0);
3150 
3151   if (N10.getValueType() != VT)
3152     return SDValue();
3153 
3154   if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3155     return SDValue();
3156 
3157   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3158   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3159   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3160 }
3161 
3162 /// Helper for doing combines based on N0 and N1 being added to each other.
3163 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3164                                              SDNode *LocReference) {
3165   EVT VT = N0.getValueType();
3166   SDLoc DL(LocReference);
3167 
3168   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3169   SDValue Y, N;
3170   if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3171     return DAG.getNode(ISD::SUB, DL, VT, N0,
3172                        DAG.getNode(ISD::SHL, DL, VT, Y, N));
3173 
3174   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3175     return V;
3176 
3177   // Look for:
3178   //   add (add x, 1), y
3179   // And if the target does not like this form then turn into:
3180   //   sub y, (xor x, -1)
3181   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3182       N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3183       // Limit this to after legalization if the add has wrap flags
3184       (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3185                                      !N0->getFlags().hasNoSignedWrap()))) {
3186     SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3187     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3188   }
3189 
3190   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3191     // Hoist one-use subtraction by non-opaque constant:
3192     //   (x - C) + y  ->  (x + y) - C
3193     // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3194     if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3195       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3196       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3197     }
3198     // Hoist one-use subtraction from non-opaque constant:
3199     //   (C - x) + y  ->  (y - x) + C
3200     if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3201       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3202       return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3203     }
3204   }
3205 
3206   // add (mul x, C), x -> mul x, C+1
3207   if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3208       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3209       N0.hasOneUse()) {
3210     SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3211                                DAG.getConstant(1, DL, VT));
3212     return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3213   }
3214 
3215   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3216   // rather than 'add 0/-1' (the zext should get folded).
3217   // add (sext i1 Y), X --> sub X, (zext i1 Y)
3218   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3219       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3220       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
3221     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3222     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3223   }
3224 
3225   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3226   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3227     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3228     if (TN->getVT() == MVT::i1) {
3229       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3230                                  DAG.getConstant(1, DL, VT));
3231       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3232     }
3233   }
3234 
3235   // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3236   if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3237       N1.getResNo() == 0)
3238     return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3239                        N0, N1.getOperand(0), N1.getOperand(2));
3240 
3241   // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3242   if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
3243     if (SDValue Carry = getAsCarry(TLI, N1))
3244       return DAG.getNode(ISD::UADDO_CARRY, DL,
3245                          DAG.getVTList(VT, Carry.getValueType()), N0,
3246                          DAG.getConstant(0, DL, VT), Carry);
3247 
3248   return SDValue();
3249 }
3250 
3251 SDValue DAGCombiner::visitADDC(SDNode *N) {
3252   SDValue N0 = N->getOperand(0);
3253   SDValue N1 = N->getOperand(1);
3254   EVT VT = N0.getValueType();
3255   SDLoc DL(N);
3256 
3257   // If the flag result is dead, turn this into an ADD.
3258   if (!N->hasAnyUseOfValue(1))
3259     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3260                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3261 
3262   // canonicalize constant to RHS.
3263   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3264   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3265   if (N0C && !N1C)
3266     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3267 
3268   // fold (addc x, 0) -> x + no carry out
3269   if (isNullConstant(N1))
3270     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3271                                         DL, MVT::Glue));
3272 
3273   // If it cannot overflow, transform into an add.
3274   if (DAG.computeOverflowForUnsignedAdd(N0, N1) == SelectionDAG::OFK_Never)
3275     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3276                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3277 
3278   return SDValue();
3279 }
3280 
3281 /**
3282  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3283  * then the flip also occurs if computing the inverse is the same cost.
3284  * This function returns an empty SDValue in case it cannot flip the boolean
3285  * without increasing the cost of the computation. If you want to flip a boolean
3286  * no matter what, use DAG.getLogicalNOT.
3287  */
3288 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
3289                                   const TargetLowering &TLI,
3290                                   bool Force) {
3291   if (Force && isa<ConstantSDNode>(V))
3292     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3293 
3294   if (V.getOpcode() != ISD::XOR)
3295     return SDValue();
3296 
3297   if (DAG.isBoolConstant(V.getOperand(1)) == true)
3298     return V.getOperand(0);
3299   if (Force && isConstOrConstSplat(V.getOperand(1), false))
3300     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3301   return SDValue();
3302 }
3303 
3304 SDValue DAGCombiner::visitADDO(SDNode *N) {
3305   SDValue N0 = N->getOperand(0);
3306   SDValue N1 = N->getOperand(1);
3307   EVT VT = N0.getValueType();
3308   bool IsSigned = (ISD::SADDO == N->getOpcode());
3309 
3310   EVT CarryVT = N->getValueType(1);
3311   SDLoc DL(N);
3312 
3313   // If the flag result is dead, turn this into an ADD.
3314   if (!N->hasAnyUseOfValue(1))
3315     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3316                      DAG.getUNDEF(CarryVT));
3317 
3318   // canonicalize constant to RHS.
3319   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3320       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3321     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3322 
3323   // fold (addo x, 0) -> x + no carry out
3324   if (isNullOrNullSplat(N1))
3325     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3326 
3327   // If it cannot overflow, transform into an add.
3328   if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3329     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3330                      DAG.getConstant(0, DL, CarryVT));
3331 
3332   if (IsSigned) {
3333     // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3334     if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3335       return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3336                          DAG.getConstant(0, DL, VT), N0.getOperand(0));
3337   } else {
3338     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3339     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3340       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3341                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3342       return CombineTo(
3343           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3344     }
3345 
3346     if (SDValue Combined = visitUADDOLike(N0, N1, N))
3347       return Combined;
3348 
3349     if (SDValue Combined = visitUADDOLike(N1, N0, N))
3350       return Combined;
3351   }
3352 
3353   return SDValue();
3354 }
3355 
3356 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3357   EVT VT = N0.getValueType();
3358   if (VT.isVector())
3359     return SDValue();
3360 
3361   // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3362   // If Y + 1 cannot overflow.
3363   if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3364     SDValue Y = N1.getOperand(0);
3365     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3366     if (DAG.computeOverflowForUnsignedAdd(Y, One) == SelectionDAG::OFK_Never)
3367       return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3368                          N1.getOperand(2));
3369   }
3370 
3371   // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3372   if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
3373     if (SDValue Carry = getAsCarry(TLI, N1))
3374       return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3375                          DAG.getConstant(0, SDLoc(N), VT), Carry);
3376 
3377   return SDValue();
3378 }
3379 
3380 SDValue DAGCombiner::visitADDE(SDNode *N) {
3381   SDValue N0 = N->getOperand(0);
3382   SDValue N1 = N->getOperand(1);
3383   SDValue CarryIn = N->getOperand(2);
3384 
3385   // canonicalize constant to RHS
3386   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3387   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3388   if (N0C && !N1C)
3389     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3390                        N1, N0, CarryIn);
3391 
3392   // fold (adde x, y, false) -> (addc x, y)
3393   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3394     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3395 
3396   return SDValue();
3397 }
3398 
3399 SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3400   SDValue N0 = N->getOperand(0);
3401   SDValue N1 = N->getOperand(1);
3402   SDValue CarryIn = N->getOperand(2);
3403   SDLoc DL(N);
3404 
3405   // canonicalize constant to RHS
3406   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3407   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3408   if (N0C && !N1C)
3409     return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3410 
3411   // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3412   if (isNullConstant(CarryIn)) {
3413     if (!LegalOperations ||
3414         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3415       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3416   }
3417 
3418   // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3419   if (isNullConstant(N0) && isNullConstant(N1)) {
3420     EVT VT = N0.getValueType();
3421     EVT CarryVT = CarryIn.getValueType();
3422     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3423     AddToWorklist(CarryExt.getNode());
3424     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3425                                     DAG.getConstant(1, DL, VT)),
3426                      DAG.getConstant(0, DL, CarryVT));
3427   }
3428 
3429   if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3430     return Combined;
3431 
3432   if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3433     return Combined;
3434 
3435   // We want to avoid useless duplication.
3436   // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3437   // not a binary operation, this is not really possible to leverage this
3438   // existing mechanism for it. However, if more operations require the same
3439   // deduplication logic, then it may be worth generalize.
3440   SDValue Ops[] = {N1, N0, CarryIn};
3441   SDNode *CSENode =
3442       DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3443   if (CSENode)
3444     return SDValue(CSENode, 0);
3445 
3446   return SDValue();
3447 }
3448 
3449 /**
3450  * If we are facing some sort of diamond carry propagation pattern try to
3451  * break it up to generate something like:
3452  *   (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3453  *
3454  * The end result is usually an increase in operation required, but because the
3455  * carry is now linearized, other transforms can kick in and optimize the DAG.
3456  *
3457  * Patterns typically look something like
3458  *                (uaddo A, B)
3459  *                /          \
3460  *             Carry         Sum
3461  *               |             \
3462  *               | (uaddo_carry *, 0, Z)
3463  *               |       /
3464  *                \   Carry
3465  *                 |   /
3466  * (uaddo_carry X, *, *)
3467  *
3468  * But numerous variation exist. Our goal is to identify A, B, X and Z and
3469  * produce a combine with a single path for carry propagation.
3470  */
3471 static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
3472                                          SelectionDAG &DAG, SDValue X,
3473                                          SDValue Carry0, SDValue Carry1,
3474                                          SDNode *N) {
3475   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3476     return SDValue();
3477   if (Carry1.getOpcode() != ISD::UADDO)
3478     return SDValue();
3479 
3480   SDValue Z;
3481 
3482   /**
3483    * First look for a suitable Z. It will present itself in the form of
3484    * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3485    */
3486   if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3487       isNullConstant(Carry0.getOperand(1))) {
3488     Z = Carry0.getOperand(2);
3489   } else if (Carry0.getOpcode() == ISD::UADDO &&
3490              isOneConstant(Carry0.getOperand(1))) {
3491     EVT VT = Carry0->getValueType(1);
3492     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3493   } else {
3494     // We couldn't find a suitable Z.
3495     return SDValue();
3496   }
3497 
3498 
3499   auto cancelDiamond = [&](SDValue A,SDValue B) {
3500     SDLoc DL(N);
3501     SDValue NewY =
3502         DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3503     Combiner.AddToWorklist(NewY.getNode());
3504     return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3505                        DAG.getConstant(0, DL, X.getValueType()),
3506                        NewY.getValue(1));
3507   };
3508 
3509   /**
3510    *         (uaddo A, B)
3511    *              |
3512    *             Sum
3513    *              |
3514    * (uaddo_carry *, 0, Z)
3515    */
3516   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3517     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3518   }
3519 
3520   /**
3521    * (uaddo_carry A, 0, Z)
3522    *         |
3523    *        Sum
3524    *         |
3525    *  (uaddo *, B)
3526    */
3527   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3528     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3529   }
3530 
3531   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3532     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3533   }
3534 
3535   return SDValue();
3536 }
3537 
3538 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3539 // match patterns like:
3540 //
3541 //          (uaddo A, B)            CarryIn
3542 //            |  \                     |
3543 //            |   \                    |
3544 //    PartialSum   PartialCarryOutX   /
3545 //            |        |             /
3546 //            |    ____|____________/
3547 //            |   /    |
3548 //     (uaddo *, *)    \________
3549 //       |  \                   \
3550 //       |   \                   |
3551 //       |    PartialCarryOutY   |
3552 //       |        \              |
3553 //       |         \            /
3554 //   AddCarrySum    |    ______/
3555 //                  |   /
3556 //   CarryOut = (or *, *)
3557 //
3558 // And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3559 //
3560 //    {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3561 //
3562 // Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3563 // with a single path for carry/borrow out propagation.
3564 static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
3565                                    SDValue N0, SDValue N1, SDNode *N) {
3566   SDValue Carry0 = getAsCarry(TLI, N0);
3567   if (!Carry0)
3568     return SDValue();
3569   SDValue Carry1 = getAsCarry(TLI, N1);
3570   if (!Carry1)
3571     return SDValue();
3572 
3573   unsigned Opcode = Carry0.getOpcode();
3574   if (Opcode != Carry1.getOpcode())
3575     return SDValue();
3576   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3577     return SDValue();
3578   // Guarantee identical type of CarryOut
3579   EVT CarryOutType = N->getValueType(0);
3580   if (CarryOutType != Carry0.getValue(1).getValueType() ||
3581       CarryOutType != Carry1.getValue(1).getValueType())
3582     return SDValue();
3583 
3584   // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3585   // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3586   if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3587     std::swap(Carry0, Carry1);
3588 
3589   // Check if nodes are connected in expected way.
3590   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3591       Carry1.getOperand(1) != Carry0.getValue(0))
3592     return SDValue();
3593 
3594   // The carry in value must be on the righthand side for subtraction.
3595   unsigned CarryInOperandNum =
3596       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3597   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3598     return SDValue();
3599   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3600 
3601   unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3602   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3603     return SDValue();
3604 
3605   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3606   CarryIn = getAsCarry(TLI, CarryIn, true);
3607   if (!CarryIn)
3608     return SDValue();
3609 
3610   SDLoc DL(N);
3611   CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3612                                   Carry1->getValueType(0));
3613   SDValue Merged =
3614       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3615                   Carry0.getOperand(1), CarryIn);
3616 
3617   // Please note that because we have proven that the result of the UADDO/USUBO
3618   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3619   // therefore prove that if the first UADDO/USUBO overflows, the second
3620   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3621   // maximum value.
3622   //
3623   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3624   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3625   //
3626   // This is important because it means that OR and XOR can be used to merge
3627   // carry flags; and that AND can return a constant zero.
3628   //
3629   // TODO: match other operations that can merge flags (ADD, etc)
3630   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3631   if (N->getOpcode() == ISD::AND)
3632     return DAG.getConstant(0, DL, CarryOutType);
3633   return Merged.getValue(1);
3634 }
3635 
3636 SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3637                                           SDValue CarryIn, SDNode *N) {
3638   // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3639   // carry.
3640   if (isBitwiseNot(N0))
3641     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3642       SDLoc DL(N);
3643       SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3644                                 N0.getOperand(0), NotC);
3645       return CombineTo(
3646           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3647     }
3648 
3649   // Iff the flag result is dead:
3650   // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3651   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3652   // or the dependency between the instructions.
3653   if ((N0.getOpcode() == ISD::ADD ||
3654        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3655         N0.getValue(1) != CarryIn)) &&
3656       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3657     return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3658                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3659 
3660   /**
3661    * When one of the uaddo_carry argument is itself a carry, we may be facing
3662    * a diamond carry propagation. In which case we try to transform the DAG
3663    * to ensure linear carry propagation if that is possible.
3664    */
3665   if (auto Y = getAsCarry(TLI, N1)) {
3666     // Because both are carries, Y and Z can be swapped.
3667     if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3668       return R;
3669     if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3670       return R;
3671   }
3672 
3673   return SDValue();
3674 }
3675 
3676 SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3677                                           SDValue CarryIn, SDNode *N) {
3678   // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3679   if (isBitwiseNot(N0)) {
3680     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3681       return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3682                          N0.getOperand(0), NotC);
3683   }
3684 
3685   return SDValue();
3686 }
3687 
3688 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3689   SDValue N0 = N->getOperand(0);
3690   SDValue N1 = N->getOperand(1);
3691   SDValue CarryIn = N->getOperand(2);
3692   SDLoc DL(N);
3693 
3694   // canonicalize constant to RHS
3695   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3696   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3697   if (N0C && !N1C)
3698     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3699 
3700   // fold (saddo_carry x, y, false) -> (saddo x, y)
3701   if (isNullConstant(CarryIn)) {
3702     if (!LegalOperations ||
3703         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3704       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3705   }
3706 
3707   if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3708     return Combined;
3709 
3710   if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3711     return Combined;
3712 
3713   return SDValue();
3714 }
3715 
3716 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3717 // clamp/truncation if necessary.
3718 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3719                                    SDValue RHS, SelectionDAG &DAG,
3720                                    const SDLoc &DL) {
3721   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3722          "Illegal truncation");
3723 
3724   if (DstVT == SrcVT)
3725     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3726 
3727   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3728   // clamping RHS.
3729   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3730                                           DstVT.getScalarSizeInBits());
3731   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3732     return SDValue();
3733 
3734   SDValue SatLimit =
3735       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3736                                            DstVT.getScalarSizeInBits()),
3737                       DL, SrcVT);
3738   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3739   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3740   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3741   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3742 }
3743 
3744 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3745 // usubsat(a,b), optionally as a truncated type.
3746 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3747   if (N->getOpcode() != ISD::SUB ||
3748       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3749     return SDValue();
3750 
3751   EVT SubVT = N->getValueType(0);
3752   SDValue Op0 = N->getOperand(0);
3753   SDValue Op1 = N->getOperand(1);
3754 
3755   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3756   // they may be converted to usubsat(a,b).
3757   if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3758     SDValue MaxLHS = Op0.getOperand(0);
3759     SDValue MaxRHS = Op0.getOperand(1);
3760     if (MaxLHS == Op1)
3761       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3762     if (MaxRHS == Op1)
3763       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3764   }
3765 
3766   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3767     SDValue MinLHS = Op1.getOperand(0);
3768     SDValue MinRHS = Op1.getOperand(1);
3769     if (MinLHS == Op0)
3770       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3771     if (MinRHS == Op0)
3772       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3773   }
3774 
3775   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3776   if (Op1.getOpcode() == ISD::TRUNCATE &&
3777       Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3778       Op1.getOperand(0).hasOneUse()) {
3779     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3780     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3781     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3782       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3783                                  DAG, DL);
3784     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3785       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3786                                  DAG, DL);
3787   }
3788 
3789   return SDValue();
3790 }
3791 
3792 // Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
3793 // counting leading ones. Broadly, it replaces the substraction with a left
3794 // shift.
3795 //
3796 // * DAG Legalisation Pattern:
3797 //
3798 //     (sub (ctlz (zeroextend (not Src)))
3799 //          BitWidthDiff)
3800 //
3801 //       if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3802 //       -->
3803 //
3804 //     (ctlz_zero_undef (not (shl (anyextend Src)
3805 //                                BitWidthDiff)))
3806 //
3807 // * Type Legalisation Pattern:
3808 //
3809 //     (sub (ctlz (and (xor Src XorMask)
3810 //                     AndMask))
3811 //          BitWidthDiff)
3812 //
3813 //       if AndMask has only trailing ones
3814 //       and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3815 //       and XorMask has more trailing ones than AndMask
3816 //       -->
3817 //
3818 //     (ctlz_zero_undef (not (shl Src BitWidthDiff)))
3819 template <class MatchContextClass>
3820 static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG) {
3821   const SDLoc DL(N);
3822   SDValue N0 = N->getOperand(0);
3823   EVT VT = N0.getValueType();
3824   unsigned BitWidth = VT.getScalarSizeInBits();
3825 
3826   MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
3827 
3828   APInt AndMask;
3829   APInt XorMask;
3830   APInt BitWidthDiff;
3831 
3832   SDValue CtlzOp;
3833   SDValue Src;
3834 
3835   if (!sd_context_match(
3836           N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
3837     return SDValue();
3838 
3839   if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
3840     // DAG Legalisation Pattern:
3841     // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
3842     if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
3843       return SDValue();
3844 
3845     Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
3846   } else if (sd_context_match(CtlzOp, Matcher,
3847                               m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
3848                                     m_ConstInt(AndMask)))) {
3849     // Type Legalisation Pattern:
3850     // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
3851     unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
3852     if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
3853       return SDValue();
3854   } else
3855     return SDValue();
3856 
3857   SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
3858   SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
3859   SDValue Not =
3860       Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
3861 
3862   return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
3863 }
3864 
3865 // Since it may not be valid to emit a fold to zero for vector initializers
3866 // check if we can before folding.
3867 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3868                              SelectionDAG &DAG, bool LegalOperations) {
3869   if (!VT.isVector())
3870     return DAG.getConstant(0, DL, VT);
3871   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3872     return DAG.getConstant(0, DL, VT);
3873   return SDValue();
3874 }
3875 
3876 SDValue DAGCombiner::visitSUB(SDNode *N) {
3877   SDValue N0 = N->getOperand(0);
3878   SDValue N1 = N->getOperand(1);
3879   EVT VT = N0.getValueType();
3880   unsigned BitWidth = VT.getScalarSizeInBits();
3881   SDLoc DL(N);
3882 
3883   auto PeekThroughFreeze = [](SDValue N) {
3884     if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3885       return N->getOperand(0);
3886     return N;
3887   };
3888 
3889   if (SDValue V = foldSubCtlzNot<EmptyMatchContext>(N, DAG))
3890     return V;
3891 
3892   // fold (sub x, x) -> 0
3893   // FIXME: Refactor this and xor and other similar operations together.
3894   if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3895     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3896 
3897   // fold (sub c1, c2) -> c3
3898   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3899     return C;
3900 
3901   // fold vector ops
3902   if (VT.isVector()) {
3903     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3904       return FoldedVOp;
3905 
3906     // fold (sub x, 0) -> x, vector edition
3907     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3908       return N0;
3909   }
3910 
3911   if (SDValue NewSel = foldBinOpIntoSelect(N))
3912     return NewSel;
3913 
3914   // fold (sub x, c) -> (add x, -c)
3915   if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
3916     return DAG.getNode(ISD::ADD, DL, VT, N0,
3917                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3918 
3919   if (isNullOrNullSplat(N0)) {
3920     // Right-shifting everything out but the sign bit followed by negation is
3921     // the same as flipping arithmetic/logical shift type without the negation:
3922     // -(X >>u 31) -> (X >>s 31)
3923     // -(X >>s 31) -> (X >>u 31)
3924     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3925       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3926       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3927         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3928         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3929           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3930       }
3931     }
3932 
3933     // 0 - X --> 0 if the sub is NUW.
3934     if (N->getFlags().hasNoUnsignedWrap())
3935       return N0;
3936 
3937     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3938       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3939       // N1 must be 0 because negating the minimum signed value is undefined.
3940       if (N->getFlags().hasNoSignedWrap())
3941         return N0;
3942 
3943       // 0 - X --> X if X is 0 or the minimum signed value.
3944       return N1;
3945     }
3946 
3947     // Convert 0 - abs(x).
3948     if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3949         !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
3950       if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3951         return Result;
3952 
3953     // Similar to the previous rule, but this time targeting an expanded abs.
3954     // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
3955     // as well as
3956     // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
3957     // Note that these two are applicable to both signed and unsigned min/max.
3958     SDValue X;
3959     SDValue S0;
3960     auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
3961     if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
3962                                       m_UMax(m_Value(X), NegPat),
3963                                       m_SMin(m_Value(X), NegPat),
3964                                       m_UMin(m_Value(X), NegPat))))) {
3965       unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
3966       if (hasOperation(NewOpc, VT))
3967         return DAG.getNode(NewOpc, DL, VT, X, S0);
3968     }
3969 
3970     // Fold neg(splat(neg(x)) -> splat(x)
3971     if (VT.isVector()) {
3972       SDValue N1S = DAG.getSplatValue(N1, true);
3973       if (N1S && N1S.getOpcode() == ISD::SUB &&
3974           isNullConstant(N1S.getOperand(0)))
3975         return DAG.getSplat(VT, DL, N1S.getOperand(1));
3976     }
3977   }
3978 
3979   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3980   if (isAllOnesOrAllOnesSplat(N0))
3981     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3982 
3983   // fold (A - (0-B)) -> A+B
3984   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3985     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3986 
3987   // fold A-(A-B) -> B
3988   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3989     return N1.getOperand(1);
3990 
3991   // fold (A+B)-A -> B
3992   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3993     return N0.getOperand(1);
3994 
3995   // fold (A+B)-B -> A
3996   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3997     return N0.getOperand(0);
3998 
3999   // fold (A+C1)-C2 -> A+(C1-C2)
4000   if (N0.getOpcode() == ISD::ADD) {
4001     SDValue N01 = N0.getOperand(1);
4002     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4003       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4004   }
4005 
4006   // fold C2-(A+C1) -> (C2-C1)-A
4007   if (N1.getOpcode() == ISD::ADD) {
4008     SDValue N11 = N1.getOperand(1);
4009     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4010       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4011   }
4012 
4013   // fold (A-C1)-C2 -> A-(C1+C2)
4014   if (N0.getOpcode() == ISD::SUB) {
4015     SDValue N01 = N0.getOperand(1);
4016     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4017       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4018   }
4019 
4020   // fold (c1-A)-c2 -> (c1-c2)-A
4021   if (N0.getOpcode() == ISD::SUB) {
4022     SDValue N00 = N0.getOperand(0);
4023     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4024       return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4025   }
4026 
4027   SDValue A, B, C;
4028 
4029   // fold ((A+(B+C))-B) -> A+C
4030   if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4031     return DAG.getNode(ISD::ADD, DL, VT, A, C);
4032 
4033   // fold ((A+(B-C))-B) -> A-C
4034   if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4035     return DAG.getNode(ISD::SUB, DL, VT, A, C);
4036 
4037   // fold ((A-(B-C))-C) -> A-B
4038   if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4039     return DAG.getNode(ISD::SUB, DL, VT, A, B);
4040 
4041   // fold (A-(B-C)) -> A+(C-B)
4042   if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4043     return DAG.getNode(ISD::ADD, DL, VT, N0,
4044                        DAG.getNode(ISD::SUB, DL, VT, C, B));
4045 
4046   // A - (A & B)  ->  A & (~B)
4047   if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4048       (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4049     return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4050 
4051   // fold (A - (-B * C)) -> (A + (B * C))
4052   if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4053     return DAG.getNode(ISD::ADD, DL, VT, N0,
4054                        DAG.getNode(ISD::MUL, DL, VT, B, C));
4055 
4056   // If either operand of a sub is undef, the result is undef
4057   if (N0.isUndef())
4058     return N0;
4059   if (N1.isUndef())
4060     return N1;
4061 
4062   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4063     return V;
4064 
4065   if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4066     return V;
4067 
4068   // Try to match AVGCEIL fixedwidth pattern
4069   if (SDValue V = foldSubToAvg(N, DL))
4070     return V;
4071 
4072   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4073     return V;
4074 
4075   if (SDValue V = foldSubToUSubSat(VT, N, DL))
4076     return V;
4077 
4078   // (A - B) - 1  ->  add (xor B, -1), A
4079   if (sd_match(N, m_Sub(m_OneUse(m_Sub(m_Value(A), m_Value(B))), m_One())))
4080     return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4081 
4082   // Look for:
4083   //   sub y, (xor x, -1)
4084   // And if the target does not like this form then turn into:
4085   //   add (add x, y), 1
4086   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4087     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4088     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4089   }
4090 
4091   // Hoist one-use addition by non-opaque constant:
4092   //   (x + C) - y  ->  (x - y) + C
4093   if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4094       N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4095       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4096     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4097     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4098   }
4099   // y - (x + C)  ->  (y - x) - C
4100   if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4101       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4102     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4103     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4104   }
4105   // (x - C) - y  ->  (x - y) - C
4106   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4107   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4108       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4109     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4110     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4111   }
4112   // (C - x) - y  ->  C - (x + y)
4113   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4114       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4115     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4116     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4117   }
4118 
4119   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4120   // rather than 'sub 0/1' (the sext should get folded).
4121   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4122   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4123       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4124       TLI.getBooleanContents(VT) ==
4125           TargetLowering::ZeroOrNegativeOneBooleanContent) {
4126     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4127     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4128   }
4129 
4130   // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4131   if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4132       sd_match(N1, m_Sra(m_Value(A), m_SpecificInt(BitWidth - 1))) &&
4133       sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4134     return DAG.getNode(ISD::ABS, DL, VT, A);
4135 
4136   // If the relocation model supports it, consider symbol offsets.
4137   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4138     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4139       // fold (sub Sym+c1, Sym+c2) -> c1-c2
4140       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4141         if (GA->getGlobal() == GB->getGlobal())
4142           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4143                                  DL, VT);
4144     }
4145 
4146   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4147   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4148     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4149     if (TN->getVT() == MVT::i1) {
4150       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4151                                  DAG.getConstant(1, DL, VT));
4152       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4153     }
4154   }
4155 
4156   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4157   if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4158     const APInt &IntVal = N1.getConstantOperandAPInt(0);
4159     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4160   }
4161 
4162   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4163   if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4164     APInt NewStep = -N1.getConstantOperandAPInt(0);
4165     return DAG.getNode(ISD::ADD, DL, VT, N0,
4166                        DAG.getStepVector(DL, VT, NewStep));
4167   }
4168 
4169   // Prefer an add for more folding potential and possibly better codegen:
4170   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4171   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4172     SDValue ShAmt = N1.getOperand(1);
4173     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4174     if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4175       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4176       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4177     }
4178   }
4179 
4180   // As with the previous fold, prefer add for more folding potential.
4181   // Subtracting SMIN/0 is the same as adding SMIN/0:
4182   // N0 - (X << BW-1) --> N0 + (X << BW-1)
4183   if (N1.getOpcode() == ISD::SHL) {
4184     ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4185     if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4186       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4187   }
4188 
4189   // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4190   if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4191       N0.getResNo() == 0 && N0.hasOneUse())
4192     return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4193                        N0.getOperand(0), N1, N0.getOperand(2));
4194 
4195   if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT)) {
4196     // (sub Carry, X)  ->  (uaddo_carry (sub 0, X), 0, Carry)
4197     if (SDValue Carry = getAsCarry(TLI, N0)) {
4198       SDValue X = N1;
4199       SDValue Zero = DAG.getConstant(0, DL, VT);
4200       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4201       return DAG.getNode(ISD::UADDO_CARRY, DL,
4202                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4203                          Carry);
4204     }
4205   }
4206 
4207   // If there's no chance of borrowing from adjacent bits, then sub is xor:
4208   // sub C0, X --> xor X, C0
4209   if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4210     if (!C0->isOpaque()) {
4211       const APInt &C0Val = C0->getAPIntValue();
4212       const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4213       if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4214         return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4215     }
4216   }
4217 
4218   // smax(a,b) - smin(a,b) --> abds(a,b)
4219   if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4220       sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4221       sd_match(N1, m_SMinLike(m_Specific(A), m_Specific(B))))
4222     return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4223 
4224   // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4225   if (hasOperation(ISD::ABDS, VT) &&
4226       sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4227       sd_match(N1, m_SMaxLike(m_Specific(A), m_Specific(B))))
4228     return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4229 
4230   // umax(a,b) - umin(a,b) --> abdu(a,b)
4231   if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4232       sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4233       sd_match(N1, m_UMinLike(m_Specific(A), m_Specific(B))))
4234     return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4235 
4236   // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4237   if (hasOperation(ISD::ABDU, VT) &&
4238       sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4239       sd_match(N1, m_UMaxLike(m_Specific(A), m_Specific(B))))
4240     return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4241 
4242   return SDValue();
4243 }
4244 
4245 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4246   unsigned Opcode = N->getOpcode();
4247   SDValue N0 = N->getOperand(0);
4248   SDValue N1 = N->getOperand(1);
4249   EVT VT = N0.getValueType();
4250   bool IsSigned = Opcode == ISD::SSUBSAT;
4251   SDLoc DL(N);
4252 
4253   // fold (sub_sat x, undef) -> 0
4254   if (N0.isUndef() || N1.isUndef())
4255     return DAG.getConstant(0, DL, VT);
4256 
4257   // fold (sub_sat x, x) -> 0
4258   if (N0 == N1)
4259     return DAG.getConstant(0, DL, VT);
4260 
4261   // fold (sub_sat c1, c2) -> c3
4262   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4263     return C;
4264 
4265   // fold vector ops
4266   if (VT.isVector()) {
4267     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4268       return FoldedVOp;
4269 
4270     // fold (sub_sat x, 0) -> x, vector edition
4271     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4272       return N0;
4273   }
4274 
4275   // fold (sub_sat x, 0) -> x
4276   if (isNullConstant(N1))
4277     return N0;
4278 
4279   // If it cannot overflow, transform into an sub.
4280   if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4281     return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4282 
4283   return SDValue();
4284 }
4285 
4286 SDValue DAGCombiner::visitSUBC(SDNode *N) {
4287   SDValue N0 = N->getOperand(0);
4288   SDValue N1 = N->getOperand(1);
4289   EVT VT = N0.getValueType();
4290   SDLoc DL(N);
4291 
4292   // If the flag result is dead, turn this into an SUB.
4293   if (!N->hasAnyUseOfValue(1))
4294     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4295                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4296 
4297   // fold (subc x, x) -> 0 + no borrow
4298   if (N0 == N1)
4299     return CombineTo(N, DAG.getConstant(0, DL, VT),
4300                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4301 
4302   // fold (subc x, 0) -> x + no borrow
4303   if (isNullConstant(N1))
4304     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4305 
4306   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4307   if (isAllOnesConstant(N0))
4308     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4309                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4310 
4311   return SDValue();
4312 }
4313 
4314 SDValue DAGCombiner::visitSUBO(SDNode *N) {
4315   SDValue N0 = N->getOperand(0);
4316   SDValue N1 = N->getOperand(1);
4317   EVT VT = N0.getValueType();
4318   bool IsSigned = (ISD::SSUBO == N->getOpcode());
4319 
4320   EVT CarryVT = N->getValueType(1);
4321   SDLoc DL(N);
4322 
4323   // If the flag result is dead, turn this into an SUB.
4324   if (!N->hasAnyUseOfValue(1))
4325     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4326                      DAG.getUNDEF(CarryVT));
4327 
4328   // fold (subo x, x) -> 0 + no borrow
4329   if (N0 == N1)
4330     return CombineTo(N, DAG.getConstant(0, DL, VT),
4331                      DAG.getConstant(0, DL, CarryVT));
4332 
4333   // fold (subox, c) -> (addo x, -c)
4334   if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4335     if (IsSigned && !N1C->isMinSignedValue())
4336       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4337                          DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4338 
4339   // fold (subo x, 0) -> x + no borrow
4340   if (isNullOrNullSplat(N1))
4341     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4342 
4343   // If it cannot overflow, transform into an sub.
4344   if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4345     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4346                      DAG.getConstant(0, DL, CarryVT));
4347 
4348   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4349   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4350     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4351                      DAG.getConstant(0, DL, CarryVT));
4352 
4353   return SDValue();
4354 }
4355 
4356 SDValue DAGCombiner::visitSUBE(SDNode *N) {
4357   SDValue N0 = N->getOperand(0);
4358   SDValue N1 = N->getOperand(1);
4359   SDValue CarryIn = N->getOperand(2);
4360 
4361   // fold (sube x, y, false) -> (subc x, y)
4362   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4363     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4364 
4365   return SDValue();
4366 }
4367 
4368 SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4369   SDValue N0 = N->getOperand(0);
4370   SDValue N1 = N->getOperand(1);
4371   SDValue CarryIn = N->getOperand(2);
4372 
4373   // fold (usubo_carry x, y, false) -> (usubo x, y)
4374   if (isNullConstant(CarryIn)) {
4375     if (!LegalOperations ||
4376         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4377       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4378   }
4379 
4380   return SDValue();
4381 }
4382 
4383 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4384   SDValue N0 = N->getOperand(0);
4385   SDValue N1 = N->getOperand(1);
4386   SDValue CarryIn = N->getOperand(2);
4387 
4388   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4389   if (isNullConstant(CarryIn)) {
4390     if (!LegalOperations ||
4391         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4392       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4393   }
4394 
4395   return SDValue();
4396 }
4397 
4398 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4399 // UMULFIXSAT here.
4400 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4401   SDValue N0 = N->getOperand(0);
4402   SDValue N1 = N->getOperand(1);
4403   SDValue Scale = N->getOperand(2);
4404   EVT VT = N0.getValueType();
4405 
4406   // fold (mulfix x, undef, scale) -> 0
4407   if (N0.isUndef() || N1.isUndef())
4408     return DAG.getConstant(0, SDLoc(N), VT);
4409 
4410   // Canonicalize constant to RHS (vector doesn't have to splat)
4411   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4412      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4413     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4414 
4415   // fold (mulfix x, 0, scale) -> 0
4416   if (isNullConstant(N1))
4417     return DAG.getConstant(0, SDLoc(N), VT);
4418 
4419   return SDValue();
4420 }
4421 
4422 template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4423   SDValue N0 = N->getOperand(0);
4424   SDValue N1 = N->getOperand(1);
4425   EVT VT = N0.getValueType();
4426   unsigned BitWidth = VT.getScalarSizeInBits();
4427   SDLoc DL(N);
4428   bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4429   MatchContextClass Matcher(DAG, TLI, N);
4430 
4431   // fold (mul x, undef) -> 0
4432   if (N0.isUndef() || N1.isUndef())
4433     return DAG.getConstant(0, DL, VT);
4434 
4435   // fold (mul c1, c2) -> c1*c2
4436   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4437     return C;
4438 
4439   // canonicalize constant to RHS (vector doesn't have to splat)
4440   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4441       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4442     return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4443 
4444   bool N1IsConst = false;
4445   bool N1IsOpaqueConst = false;
4446   APInt ConstValue1;
4447 
4448   // fold vector ops
4449   if (VT.isVector()) {
4450     // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4451     if (!UseVP)
4452       if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4453         return FoldedVOp;
4454 
4455     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4456     assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4457            "Splat APInt should be element width");
4458   } else {
4459     N1IsConst = isa<ConstantSDNode>(N1);
4460     if (N1IsConst) {
4461       ConstValue1 = N1->getAsAPIntVal();
4462       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4463     }
4464   }
4465 
4466   // fold (mul x, 0) -> 0
4467   if (N1IsConst && ConstValue1.isZero())
4468     return N1;
4469 
4470   // fold (mul x, 1) -> x
4471   if (N1IsConst && ConstValue1.isOne())
4472     return N0;
4473 
4474   if (!UseVP)
4475     if (SDValue NewSel = foldBinOpIntoSelect(N))
4476       return NewSel;
4477 
4478   // fold (mul x, -1) -> 0-x
4479   if (N1IsConst && ConstValue1.isAllOnes())
4480     return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4481 
4482   // fold (mul x, (1 << c)) -> x << c
4483   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4484       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4485     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4486       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4487       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4488       return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
4489     }
4490   }
4491 
4492   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4493   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4494     unsigned Log2Val = (-ConstValue1).logBase2();
4495 
4496     // FIXME: If the input is something that is easily negated (e.g. a
4497     // single-use add), we should put the negate there.
4498     return Matcher.getNode(
4499         ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4500         Matcher.getNode(ISD::SHL, DL, VT, N0,
4501                         DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4502   }
4503 
4504   // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4505   // hi result is in use in case we hit this mid-legalization.
4506   if (!UseVP) {
4507     for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4508       if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4509         SDVTList LoHiVT = DAG.getVTList(VT, VT);
4510         // TODO: Can we match commutable operands with getNodeIfExists?
4511         if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4512           if (LoHi->hasAnyUseOfValue(1))
4513             return SDValue(LoHi, 0);
4514         if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4515           if (LoHi->hasAnyUseOfValue(1))
4516             return SDValue(LoHi, 0);
4517       }
4518     }
4519   }
4520 
4521   // Try to transform:
4522   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4523   // mul x, (2^N + 1) --> add (shl x, N), x
4524   // mul x, (2^N - 1) --> sub (shl x, N), x
4525   // Examples: x * 33 --> (x << 5) + x
4526   //           x * 15 --> (x << 4) - x
4527   //           x * -33 --> -((x << 5) + x)
4528   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4529   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4530   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4531   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4532   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4533   //           x * 0xf800 --> (x << 16) - (x << 11)
4534   //           x * -0x8800 --> -((x << 15) + (x << 11))
4535   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4536   if (!UseVP && N1IsConst &&
4537       TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4538     // TODO: We could handle more general decomposition of any constant by
4539     //       having the target set a limit on number of ops and making a
4540     //       callback to determine that sequence (similar to sqrt expansion).
4541     unsigned MathOp = ISD::DELETED_NODE;
4542     APInt MulC = ConstValue1.abs();
4543     // The constant `2` should be treated as (2^0 + 1).
4544     unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4545     MulC.lshrInPlace(TZeros);
4546     if ((MulC - 1).isPowerOf2())
4547       MathOp = ISD::ADD;
4548     else if ((MulC + 1).isPowerOf2())
4549       MathOp = ISD::SUB;
4550 
4551     if (MathOp != ISD::DELETED_NODE) {
4552       unsigned ShAmt =
4553           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4554       ShAmt += TZeros;
4555       assert(ShAmt < BitWidth &&
4556              "multiply-by-constant generated out of bounds shift");
4557       SDValue Shl =
4558           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4559       SDValue R =
4560           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4561                                DAG.getNode(ISD::SHL, DL, VT, N0,
4562                                            DAG.getConstant(TZeros, DL, VT)))
4563                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
4564       if (ConstValue1.isNegative())
4565         R = DAG.getNegative(R, DL, VT);
4566       return R;
4567     }
4568   }
4569 
4570   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4571   if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4572     SDValue N01 = N0.getOperand(1);
4573     if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4574       return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4575   }
4576 
4577   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4578   // use.
4579   {
4580     SDValue Sh, Y;
4581 
4582     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
4583     if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4584         isConstantOrConstantVector(N0.getOperand(1))) {
4585       Sh = N0; Y = N1;
4586     } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4587                isConstantOrConstantVector(N1.getOperand(1))) {
4588       Sh = N1; Y = N0;
4589     }
4590 
4591     if (Sh.getNode()) {
4592       SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4593       return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4594     }
4595   }
4596 
4597   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4598   if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4599       DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
4600       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
4601       isMulAddWithConstProfitable(N, N0, N1))
4602     return Matcher.getNode(
4603         ISD::ADD, DL, VT,
4604         Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4605         Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4606 
4607   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4608   ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4609   if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4610     const APInt &C0 = N0.getConstantOperandAPInt(0);
4611     const APInt &C1 = NC1->getAPIntValue();
4612     return DAG.getVScale(DL, VT, C0 * C1);
4613   }
4614 
4615   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4616   APInt MulVal;
4617   if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4618       ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4619     const APInt &C0 = N0.getConstantOperandAPInt(0);
4620     APInt NewStep = C0 * MulVal;
4621     return DAG.getStepVector(DL, VT, NewStep);
4622   }
4623 
4624   // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4625   SDValue X;
4626   if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4627       sd_context_match(
4628           N, Matcher,
4629           m_Mul(m_Or(m_Sra(m_Value(X), m_SpecificInt(BitWidth - 1)), m_One()),
4630                 m_Deferred(X)))) {
4631     return Matcher.getNode(ISD::ABS, DL, VT, X);
4632   }
4633 
4634   // Fold ((mul x, 0/undef) -> 0,
4635   //       (mul x, 1) -> x) -> x)
4636   // -> and(x, mask)
4637   // We can replace vectors with '0' and '1' factors with a clearing mask.
4638   if (VT.isFixedLengthVector()) {
4639     unsigned NumElts = VT.getVectorNumElements();
4640     SmallBitVector ClearMask;
4641     ClearMask.reserve(NumElts);
4642     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4643       if (!V || V->isZero()) {
4644         ClearMask.push_back(true);
4645         return true;
4646       }
4647       ClearMask.push_back(false);
4648       return V->isOne();
4649     };
4650     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4651         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4652       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4653       EVT LegalSVT = N1.getOperand(0).getValueType();
4654       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4655       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4656       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4657       for (unsigned I = 0; I != NumElts; ++I)
4658         if (ClearMask[I])
4659           Mask[I] = Zero;
4660       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4661     }
4662   }
4663 
4664   // reassociate mul
4665   // TODO: Change reassociateOps to support vp ops.
4666   if (!UseVP)
4667     if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4668       return RMUL;
4669 
4670   // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4671   // TODO: Change reassociateReduction to support vp ops.
4672   if (!UseVP)
4673     if (SDValue SD =
4674             reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4675       return SD;
4676 
4677   // Simplify the operands using demanded-bits information.
4678   if (SimplifyDemandedBits(SDValue(N, 0)))
4679     return SDValue(N, 0);
4680 
4681   return SDValue();
4682 }
4683 
4684 /// Return true if divmod libcall is available.
4685 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4686                                      const TargetLowering &TLI) {
4687   RTLIB::Libcall LC;
4688   EVT NodeType = Node->getValueType(0);
4689   if (!NodeType.isSimple())
4690     return false;
4691   switch (NodeType.getSimpleVT().SimpleTy) {
4692   default: return false; // No libcall for vector types.
4693   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
4694   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4695   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4696   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4697   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4698   }
4699 
4700   return TLI.getLibcallName(LC) != nullptr;
4701 }
4702 
4703 /// Issue divrem if both quotient and remainder are needed.
4704 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4705   if (Node->use_empty())
4706     return SDValue(); // This is a dead node, leave it alone.
4707 
4708   unsigned Opcode = Node->getOpcode();
4709   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4710   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4711 
4712   // DivMod lib calls can still work on non-legal types if using lib-calls.
4713   EVT VT = Node->getValueType(0);
4714   if (VT.isVector() || !VT.isInteger())
4715     return SDValue();
4716 
4717   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4718     return SDValue();
4719 
4720   // If DIVREM is going to get expanded into a libcall,
4721   // but there is no libcall available, then don't combine.
4722   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4723       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4724     return SDValue();
4725 
4726   // If div is legal, it's better to do the normal expansion
4727   unsigned OtherOpcode = 0;
4728   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4729     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4730     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4731       return SDValue();
4732   } else {
4733     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4734     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4735       return SDValue();
4736   }
4737 
4738   SDValue Op0 = Node->getOperand(0);
4739   SDValue Op1 = Node->getOperand(1);
4740   SDValue combined;
4741   for (SDNode *User : Op0->users()) {
4742     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4743         User->use_empty())
4744       continue;
4745     // Convert the other matching node(s), too;
4746     // otherwise, the DIVREM may get target-legalized into something
4747     // target-specific that we won't be able to recognize.
4748     unsigned UserOpc = User->getOpcode();
4749     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4750         User->getOperand(0) == Op0 &&
4751         User->getOperand(1) == Op1) {
4752       if (!combined) {
4753         if (UserOpc == OtherOpcode) {
4754           SDVTList VTs = DAG.getVTList(VT, VT);
4755           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4756         } else if (UserOpc == DivRemOpc) {
4757           combined = SDValue(User, 0);
4758         } else {
4759           assert(UserOpc == Opcode);
4760           continue;
4761         }
4762       }
4763       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4764         CombineTo(User, combined);
4765       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4766         CombineTo(User, combined.getValue(1));
4767     }
4768   }
4769   return combined;
4770 }
4771 
4772 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4773   SDValue N0 = N->getOperand(0);
4774   SDValue N1 = N->getOperand(1);
4775   EVT VT = N->getValueType(0);
4776   SDLoc DL(N);
4777 
4778   unsigned Opc = N->getOpcode();
4779   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4780   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4781 
4782   // X / undef -> undef
4783   // X % undef -> undef
4784   // X / 0 -> undef
4785   // X % 0 -> undef
4786   // NOTE: This includes vectors where any divisor element is zero/undef.
4787   if (DAG.isUndef(Opc, {N0, N1}))
4788     return DAG.getUNDEF(VT);
4789 
4790   // undef / X -> 0
4791   // undef % X -> 0
4792   if (N0.isUndef())
4793     return DAG.getConstant(0, DL, VT);
4794 
4795   // 0 / X -> 0
4796   // 0 % X -> 0
4797   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4798   if (N0C && N0C->isZero())
4799     return N0;
4800 
4801   // X / X -> 1
4802   // X % X -> 0
4803   if (N0 == N1)
4804     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4805 
4806   // X / 1 -> X
4807   // X % 1 -> 0
4808   // If this is a boolean op (single-bit element type), we can't have
4809   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4810   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4811   // it's a 1.
4812   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4813     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4814 
4815   return SDValue();
4816 }
4817 
4818 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4819   SDValue N0 = N->getOperand(0);
4820   SDValue N1 = N->getOperand(1);
4821   EVT VT = N->getValueType(0);
4822   EVT CCVT = getSetCCResultType(VT);
4823   SDLoc DL(N);
4824 
4825   // fold (sdiv c1, c2) -> c1/c2
4826   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4827     return C;
4828 
4829   // fold vector ops
4830   if (VT.isVector())
4831     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4832       return FoldedVOp;
4833 
4834   // fold (sdiv X, -1) -> 0-X
4835   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4836   if (N1C && N1C->isAllOnes())
4837     return DAG.getNegative(N0, DL, VT);
4838 
4839   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4840   if (N1C && N1C->isMinSignedValue())
4841     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4842                          DAG.getConstant(1, DL, VT),
4843                          DAG.getConstant(0, DL, VT));
4844 
4845   if (SDValue V = simplifyDivRem(N, DAG))
4846     return V;
4847 
4848   if (SDValue NewSel = foldBinOpIntoSelect(N))
4849     return NewSel;
4850 
4851   // If we know the sign bits of both operands are zero, strength reduce to a
4852   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4853   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4854     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4855 
4856   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4857     // If the corresponding remainder node exists, update its users with
4858     // (Dividend - (Quotient * Divisor).
4859     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4860                                               { N0, N1 })) {
4861       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4862       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4863       AddToWorklist(Mul.getNode());
4864       AddToWorklist(Sub.getNode());
4865       CombineTo(RemNode, Sub);
4866     }
4867     return V;
4868   }
4869 
4870   // sdiv, srem -> sdivrem
4871   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4872   // true.  Otherwise, we break the simplification logic in visitREM().
4873   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4874   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4875     if (SDValue DivRem = useDivRem(N))
4876         return DivRem;
4877 
4878   return SDValue();
4879 }
4880 
4881 static bool isDivisorPowerOfTwo(SDValue Divisor) {
4882   // Helper for determining whether a value is a power-2 constant scalar or a
4883   // vector of such elements.
4884   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4885     if (C->isZero() || C->isOpaque())
4886       return false;
4887     if (C->getAPIntValue().isPowerOf2())
4888       return true;
4889     if (C->getAPIntValue().isNegatedPowerOf2())
4890       return true;
4891     return false;
4892   };
4893 
4894   return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4895 }
4896 
4897 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4898   SDLoc DL(N);
4899   EVT VT = N->getValueType(0);
4900   EVT CCVT = getSetCCResultType(VT);
4901   unsigned BitWidth = VT.getScalarSizeInBits();
4902 
4903   // fold (sdiv X, pow2) -> simple ops after legalize
4904   // FIXME: We check for the exact bit here because the generic lowering gives
4905   // better results in that case. The target-specific lowering should learn how
4906   // to handle exact sdivs efficiently.
4907   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4908     // Target-specific implementation of sdiv x, pow2.
4909     if (SDValue Res = BuildSDIVPow2(N))
4910       return Res;
4911 
4912     // Create constants that are functions of the shift amount value.
4913     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4914     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4915     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4916     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4917     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4918     if (!isConstantOrConstantVector(Inexact))
4919       return SDValue();
4920 
4921     // Splat the sign bit into the register
4922     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4923                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4924     AddToWorklist(Sign.getNode());
4925 
4926     // Add (N0 < 0) ? abs2 - 1 : 0;
4927     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4928     AddToWorklist(Srl.getNode());
4929     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4930     AddToWorklist(Add.getNode());
4931     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4932     AddToWorklist(Sra.getNode());
4933 
4934     // Special case: (sdiv X, 1) -> X
4935     // Special Case: (sdiv X, -1) -> 0-X
4936     SDValue One = DAG.getConstant(1, DL, VT);
4937     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4938     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4939     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4940     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4941     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4942 
4943     // If dividing by a positive value, we're done. Otherwise, the result must
4944     // be negated.
4945     SDValue Zero = DAG.getConstant(0, DL, VT);
4946     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4947 
4948     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4949     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4950     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4951     return Res;
4952   }
4953 
4954   // If integer divide is expensive and we satisfy the requirements, emit an
4955   // alternate sequence.  Targets may check function attributes for size/speed
4956   // trade-offs.
4957   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4958   if (isConstantOrConstantVector(N1) &&
4959       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4960     if (SDValue Op = BuildSDIV(N))
4961       return Op;
4962 
4963   return SDValue();
4964 }
4965 
4966 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4967   SDValue N0 = N->getOperand(0);
4968   SDValue N1 = N->getOperand(1);
4969   EVT VT = N->getValueType(0);
4970   EVT CCVT = getSetCCResultType(VT);
4971   SDLoc DL(N);
4972 
4973   // fold (udiv c1, c2) -> c1/c2
4974   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4975     return C;
4976 
4977   // fold vector ops
4978   if (VT.isVector())
4979     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4980       return FoldedVOp;
4981 
4982   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4983   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4984   if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4985     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4986                          DAG.getConstant(1, DL, VT),
4987                          DAG.getConstant(0, DL, VT));
4988   }
4989 
4990   if (SDValue V = simplifyDivRem(N, DAG))
4991     return V;
4992 
4993   if (SDValue NewSel = foldBinOpIntoSelect(N))
4994     return NewSel;
4995 
4996   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4997     // If the corresponding remainder node exists, update its users with
4998     // (Dividend - (Quotient * Divisor).
4999     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5000                                               { N0, N1 })) {
5001       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5002       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5003       AddToWorklist(Mul.getNode());
5004       AddToWorklist(Sub.getNode());
5005       CombineTo(RemNode, Sub);
5006     }
5007     return V;
5008   }
5009 
5010   // sdiv, srem -> sdivrem
5011   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5012   // true.  Otherwise, we break the simplification logic in visitREM().
5013   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5014   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5015     if (SDValue DivRem = useDivRem(N))
5016         return DivRem;
5017 
5018   // Simplify the operands using demanded-bits information.
5019   // We don't have demanded bits support for UDIV so this just enables constant
5020   // folding based on known bits.
5021   if (SimplifyDemandedBits(SDValue(N, 0)))
5022     return SDValue(N, 0);
5023 
5024   return SDValue();
5025 }
5026 
5027 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5028   SDLoc DL(N);
5029   EVT VT = N->getValueType(0);
5030 
5031   // fold (udiv x, (1 << c)) -> x >>u c
5032   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5033     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5034       AddToWorklist(LogBase2.getNode());
5035 
5036       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5037       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5038       AddToWorklist(Trunc.getNode());
5039       return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5040     }
5041   }
5042 
5043   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5044   if (N1.getOpcode() == ISD::SHL) {
5045     SDValue N10 = N1.getOperand(0);
5046     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5047       if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5048         AddToWorklist(LogBase2.getNode());
5049 
5050         EVT ADDVT = N1.getOperand(1).getValueType();
5051         SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5052         AddToWorklist(Trunc.getNode());
5053         SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5054         AddToWorklist(Add.getNode());
5055         return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5056       }
5057     }
5058   }
5059 
5060   // fold (udiv x, c) -> alternate
5061   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5062   if (isConstantOrConstantVector(N1) &&
5063       !TLI.isIntDivCheap(N->getValueType(0), Attr))
5064     if (SDValue Op = BuildUDIV(N))
5065       return Op;
5066 
5067   return SDValue();
5068 }
5069 
5070 SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5071   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5072       !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5073     // Target-specific implementation of srem x, pow2.
5074     if (SDValue Res = BuildSREMPow2(N))
5075       return Res;
5076   }
5077   return SDValue();
5078 }
5079 
5080 // handles ISD::SREM and ISD::UREM
5081 SDValue DAGCombiner::visitREM(SDNode *N) {
5082   unsigned Opcode = N->getOpcode();
5083   SDValue N0 = N->getOperand(0);
5084   SDValue N1 = N->getOperand(1);
5085   EVT VT = N->getValueType(0);
5086   EVT CCVT = getSetCCResultType(VT);
5087 
5088   bool isSigned = (Opcode == ISD::SREM);
5089   SDLoc DL(N);
5090 
5091   // fold (rem c1, c2) -> c1%c2
5092   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5093     return C;
5094 
5095   // fold (urem X, -1) -> select(FX == -1, 0, FX)
5096   // Freeze the numerator to avoid a miscompile with an undefined value.
5097   if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5098       CCVT.isVector() == VT.isVector()) {
5099     SDValue F0 = DAG.getFreeze(N0);
5100     SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5101     return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5102   }
5103 
5104   if (SDValue V = simplifyDivRem(N, DAG))
5105     return V;
5106 
5107   if (SDValue NewSel = foldBinOpIntoSelect(N))
5108     return NewSel;
5109 
5110   if (isSigned) {
5111     // If we know the sign bits of both operands are zero, strength reduce to a
5112     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5113     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5114       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5115   } else {
5116     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5117       // fold (urem x, pow2) -> (and x, pow2-1)
5118       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5119       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5120       AddToWorklist(Add.getNode());
5121       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5122     }
5123     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5124     // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5125     // TODO: We should sink the following into isKnownToBePowerOfTwo
5126     // using a OrZero parameter analogous to our handling in ValueTracking.
5127     if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5128         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
5129       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5130       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5131       AddToWorklist(Add.getNode());
5132       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5133     }
5134   }
5135 
5136   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5137 
5138   // If X/C can be simplified by the division-by-constant logic, lower
5139   // X%C to the equivalent of X-X/C*C.
5140   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5141   // speculative DIV must not cause a DIVREM conversion.  We guard against this
5142   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
5143   // combine will not return a DIVREM.  Regardless, checking cheapness here
5144   // makes sense since the simplification results in fatter code.
5145   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5146     if (isSigned) {
5147       // check if we can build faster implementation for srem
5148       if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5149         return OptimizedRem;
5150     }
5151 
5152     SDValue OptimizedDiv =
5153         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5154     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5155       // If the equivalent Div node also exists, update its users.
5156       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5157       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5158                                                 { N0, N1 }))
5159         CombineTo(DivNode, OptimizedDiv);
5160       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5161       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5162       AddToWorklist(OptimizedDiv.getNode());
5163       AddToWorklist(Mul.getNode());
5164       return Sub;
5165     }
5166   }
5167 
5168   // sdiv, srem -> sdivrem
5169   if (SDValue DivRem = useDivRem(N))
5170     return DivRem.getValue(1);
5171 
5172   return SDValue();
5173 }
5174 
5175 SDValue DAGCombiner::visitMULHS(SDNode *N) {
5176   SDValue N0 = N->getOperand(0);
5177   SDValue N1 = N->getOperand(1);
5178   EVT VT = N->getValueType(0);
5179   SDLoc DL(N);
5180 
5181   // fold (mulhs c1, c2)
5182   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5183     return C;
5184 
5185   // canonicalize constant to RHS.
5186   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5187       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5188     return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5189 
5190   if (VT.isVector()) {
5191     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5192       return FoldedVOp;
5193 
5194     // fold (mulhs x, 0) -> 0
5195     // do not return N1, because undef node may exist.
5196     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5197       return DAG.getConstant(0, DL, VT);
5198   }
5199 
5200   // fold (mulhs x, 0) -> 0
5201   if (isNullConstant(N1))
5202     return N1;
5203 
5204   // fold (mulhs x, 1) -> (sra x, size(x)-1)
5205   if (isOneConstant(N1))
5206     return DAG.getNode(
5207         ISD::SRA, DL, VT, N0,
5208         DAG.getShiftAmountConstant(N0.getScalarValueSizeInBits() - 1, VT, DL));
5209 
5210   // fold (mulhs x, undef) -> 0
5211   if (N0.isUndef() || N1.isUndef())
5212     return DAG.getConstant(0, DL, VT);
5213 
5214   // If the type twice as wide is legal, transform the mulhs to a wider multiply
5215   // plus a shift.
5216   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5217       !VT.isVector()) {
5218     MVT Simple = VT.getSimpleVT();
5219     unsigned SimpleSize = Simple.getSizeInBits();
5220     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5221     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5222       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5223       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5224       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5225       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5226                        DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5227       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5228     }
5229   }
5230 
5231   return SDValue();
5232 }
5233 
5234 SDValue DAGCombiner::visitMULHU(SDNode *N) {
5235   SDValue N0 = N->getOperand(0);
5236   SDValue N1 = N->getOperand(1);
5237   EVT VT = N->getValueType(0);
5238   SDLoc DL(N);
5239 
5240   // fold (mulhu c1, c2)
5241   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5242     return C;
5243 
5244   // canonicalize constant to RHS.
5245   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5246       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5247     return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5248 
5249   if (VT.isVector()) {
5250     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5251       return FoldedVOp;
5252 
5253     // fold (mulhu x, 0) -> 0
5254     // do not return N1, because undef node may exist.
5255     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5256       return DAG.getConstant(0, DL, VT);
5257   }
5258 
5259   // fold (mulhu x, 0) -> 0
5260   if (isNullConstant(N1))
5261     return N1;
5262 
5263   // fold (mulhu x, 1) -> 0
5264   if (isOneConstant(N1))
5265     return DAG.getConstant(0, DL, VT);
5266 
5267   // fold (mulhu x, undef) -> 0
5268   if (N0.isUndef() || N1.isUndef())
5269     return DAG.getConstant(0, DL, VT);
5270 
5271   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5272   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5273       hasOperation(ISD::SRL, VT)) {
5274     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5275       unsigned NumEltBits = VT.getScalarSizeInBits();
5276       SDValue SRLAmt = DAG.getNode(
5277           ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5278       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5279       SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5280       return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5281     }
5282   }
5283 
5284   // If the type twice as wide is legal, transform the mulhu to a wider multiply
5285   // plus a shift.
5286   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5287       !VT.isVector()) {
5288     MVT Simple = VT.getSimpleVT();
5289     unsigned SimpleSize = Simple.getSizeInBits();
5290     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5291     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5292       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5293       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5294       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5295       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5296                        DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5297       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5298     }
5299   }
5300 
5301   // Simplify the operands using demanded-bits information.
5302   // We don't have demanded bits support for MULHU so this just enables constant
5303   // folding based on known bits.
5304   if (SimplifyDemandedBits(SDValue(N, 0)))
5305     return SDValue(N, 0);
5306 
5307   return SDValue();
5308 }
5309 
5310 SDValue DAGCombiner::visitAVG(SDNode *N) {
5311   unsigned Opcode = N->getOpcode();
5312   SDValue N0 = N->getOperand(0);
5313   SDValue N1 = N->getOperand(1);
5314   EVT VT = N->getValueType(0);
5315   SDLoc DL(N);
5316   bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5317 
5318   // fold (avg c1, c2)
5319   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5320     return C;
5321 
5322   // canonicalize constant to RHS.
5323   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5324       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5325     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5326 
5327   if (VT.isVector())
5328     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5329       return FoldedVOp;
5330 
5331   // fold (avg x, undef) -> x
5332   if (N0.isUndef())
5333     return N1;
5334   if (N1.isUndef())
5335     return N0;
5336 
5337   // fold (avg x, x) --> x
5338   if (N0 == N1 && Level >= AfterLegalizeTypes)
5339     return N0;
5340 
5341   // fold (avgfloor x, 0) -> x >> 1
5342   SDValue X, Y;
5343   if (sd_match(N, m_c_BinOp(ISD::AVGFLOORS, m_Value(X), m_Zero())))
5344     return DAG.getNode(ISD::SRA, DL, VT, X,
5345                        DAG.getShiftAmountConstant(1, VT, DL));
5346   if (sd_match(N, m_c_BinOp(ISD::AVGFLOORU, m_Value(X), m_Zero())))
5347     return DAG.getNode(ISD::SRL, DL, VT, X,
5348                        DAG.getShiftAmountConstant(1, VT, DL));
5349 
5350   // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5351   // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5352   if (!IsSigned &&
5353       sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5354       X.getValueType() == Y.getValueType() &&
5355       hasOperation(Opcode, X.getValueType())) {
5356     SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5357     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5358   }
5359   if (IsSigned &&
5360       sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5361       X.getValueType() == Y.getValueType() &&
5362       hasOperation(Opcode, X.getValueType())) {
5363     SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5364     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5365   }
5366 
5367   // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5368   // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5369   // Check if avgflooru isn't legal/custom but avgceilu is.
5370   if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5371       (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5372     if (DAG.isKnownNeverZero(N1))
5373       return DAG.getNode(
5374           ISD::AVGCEILU, DL, VT, N0,
5375           DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5376     if (DAG.isKnownNeverZero(N0))
5377       return DAG.getNode(
5378           ISD::AVGCEILU, DL, VT, N1,
5379           DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5380   }
5381 
5382   // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5383   // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5384   if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5385       (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5386     SDValue Add;
5387     if (sd_match(N,
5388                  m_c_BinOp(Opcode,
5389                            m_AllOf(m_Value(Add), m_Add(m_Value(X), m_Value(Y))),
5390                            m_One())) ||
5391         sd_match(N, m_c_BinOp(Opcode,
5392                               m_AllOf(m_Value(Add), m_Add(m_Value(X), m_One())),
5393                               m_Value(Y)))) {
5394 
5395       if (IsSigned && Add->getFlags().hasNoSignedWrap())
5396         return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5397 
5398       if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5399         return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5400     }
5401   }
5402 
5403   return SDValue();
5404 }
5405 
5406 SDValue DAGCombiner::visitABD(SDNode *N) {
5407   unsigned Opcode = N->getOpcode();
5408   SDValue N0 = N->getOperand(0);
5409   SDValue N1 = N->getOperand(1);
5410   EVT VT = N->getValueType(0);
5411   SDLoc DL(N);
5412 
5413   // fold (abd c1, c2)
5414   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5415     return C;
5416 
5417   // canonicalize constant to RHS.
5418   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5419       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5420     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5421 
5422   if (VT.isVector())
5423     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5424       return FoldedVOp;
5425 
5426   // fold (abd x, undef) -> 0
5427   if (N0.isUndef() || N1.isUndef())
5428     return DAG.getConstant(0, DL, VT);
5429 
5430   // fold (abd x, x) -> 0
5431   if (N0 == N1)
5432     return DAG.getConstant(0, DL, VT);
5433 
5434   SDValue X;
5435 
5436   // fold (abds x, 0) -> abs x
5437   if (sd_match(N, m_c_BinOp(ISD::ABDS, m_Value(X), m_Zero())) &&
5438       (!LegalOperations || hasOperation(ISD::ABS, VT)))
5439     return DAG.getNode(ISD::ABS, DL, VT, X);
5440 
5441   // fold (abdu x, 0) -> x
5442   if (sd_match(N, m_c_BinOp(ISD::ABDU, m_Value(X), m_Zero())))
5443     return X;
5444 
5445   // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5446   if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5447       DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5448     return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5449 
5450   return SDValue();
5451 }
5452 
5453 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5454 /// give the opcodes for the two computations that are being performed. Return
5455 /// true if a simplification was made.
5456 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5457                                                 unsigned HiOp) {
5458   // If the high half is not needed, just compute the low half.
5459   bool HiExists = N->hasAnyUseOfValue(1);
5460   if (!HiExists && (!LegalOperations ||
5461                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5462     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5463     return CombineTo(N, Res, Res);
5464   }
5465 
5466   // If the low half is not needed, just compute the high half.
5467   bool LoExists = N->hasAnyUseOfValue(0);
5468   if (!LoExists && (!LegalOperations ||
5469                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5470     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5471     return CombineTo(N, Res, Res);
5472   }
5473 
5474   // If both halves are used, return as it is.
5475   if (LoExists && HiExists)
5476     return SDValue();
5477 
5478   // If the two computed results can be simplified separately, separate them.
5479   if (LoExists) {
5480     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5481     AddToWorklist(Lo.getNode());
5482     SDValue LoOpt = combine(Lo.getNode());
5483     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5484         (!LegalOperations ||
5485          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5486       return CombineTo(N, LoOpt, LoOpt);
5487   }
5488 
5489   if (HiExists) {
5490     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5491     AddToWorklist(Hi.getNode());
5492     SDValue HiOpt = combine(Hi.getNode());
5493     if (HiOpt.getNode() && HiOpt != Hi &&
5494         (!LegalOperations ||
5495          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5496       return CombineTo(N, HiOpt, HiOpt);
5497   }
5498 
5499   return SDValue();
5500 }
5501 
5502 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5503   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5504     return Res;
5505 
5506   SDValue N0 = N->getOperand(0);
5507   SDValue N1 = N->getOperand(1);
5508   EVT VT = N->getValueType(0);
5509   SDLoc DL(N);
5510 
5511   // Constant fold.
5512   if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5513     return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5514 
5515   // canonicalize constant to RHS (vector doesn't have to splat)
5516   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5517       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5518     return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5519 
5520   // If the type is twice as wide is legal, transform the mulhu to a wider
5521   // multiply plus a shift.
5522   if (VT.isSimple() && !VT.isVector()) {
5523     MVT Simple = VT.getSimpleVT();
5524     unsigned SimpleSize = Simple.getSizeInBits();
5525     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5526     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5527       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5528       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5529       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5530       // Compute the high part as N1.
5531       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5532                        DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5533       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5534       // Compute the low part as N0.
5535       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5536       return CombineTo(N, Lo, Hi);
5537     }
5538   }
5539 
5540   return SDValue();
5541 }
5542 
5543 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5544   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5545     return Res;
5546 
5547   SDValue N0 = N->getOperand(0);
5548   SDValue N1 = N->getOperand(1);
5549   EVT VT = N->getValueType(0);
5550   SDLoc DL(N);
5551 
5552   // Constant fold.
5553   if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5554     return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5555 
5556   // canonicalize constant to RHS (vector doesn't have to splat)
5557   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5558       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5559     return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5560 
5561   // (umul_lohi N0, 0) -> (0, 0)
5562   if (isNullConstant(N1)) {
5563     SDValue Zero = DAG.getConstant(0, DL, VT);
5564     return CombineTo(N, Zero, Zero);
5565   }
5566 
5567   // (umul_lohi N0, 1) -> (N0, 0)
5568   if (isOneConstant(N1)) {
5569     SDValue Zero = DAG.getConstant(0, DL, VT);
5570     return CombineTo(N, N0, Zero);
5571   }
5572 
5573   // If the type is twice as wide is legal, transform the mulhu to a wider
5574   // multiply plus a shift.
5575   if (VT.isSimple() && !VT.isVector()) {
5576     MVT Simple = VT.getSimpleVT();
5577     unsigned SimpleSize = Simple.getSizeInBits();
5578     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5579     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5580       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5581       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5582       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5583       // Compute the high part as N1.
5584       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5585                        DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5586       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5587       // Compute the low part as N0.
5588       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5589       return CombineTo(N, Lo, Hi);
5590     }
5591   }
5592 
5593   return SDValue();
5594 }
5595 
5596 SDValue DAGCombiner::visitMULO(SDNode *N) {
5597   SDValue N0 = N->getOperand(0);
5598   SDValue N1 = N->getOperand(1);
5599   EVT VT = N0.getValueType();
5600   bool IsSigned = (ISD::SMULO == N->getOpcode());
5601 
5602   EVT CarryVT = N->getValueType(1);
5603   SDLoc DL(N);
5604 
5605   ConstantSDNode *N0C = isConstOrConstSplat(N0);
5606   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5607 
5608   // fold operation with constant operands.
5609   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5610   // multiple results.
5611   if (N0C && N1C) {
5612     bool Overflow;
5613     APInt Result =
5614         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5615                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5616     return CombineTo(N, DAG.getConstant(Result, DL, VT),
5617                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5618   }
5619 
5620   // canonicalize constant to RHS.
5621   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5622       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5623     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5624 
5625   // fold (mulo x, 0) -> 0 + no carry out
5626   if (isNullOrNullSplat(N1))
5627     return CombineTo(N, DAG.getConstant(0, DL, VT),
5628                      DAG.getConstant(0, DL, CarryVT));
5629 
5630   // (mulo x, 2) -> (addo x, x)
5631   // FIXME: This needs a freeze.
5632   if (N1C && N1C->getAPIntValue() == 2 &&
5633       (!IsSigned || VT.getScalarSizeInBits() > 2))
5634     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5635                        N->getVTList(), N0, N0);
5636 
5637   // A 1 bit SMULO overflows if both inputs are 1.
5638   if (IsSigned && VT.getScalarSizeInBits() == 1) {
5639     SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5640     SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5641                                DAG.getConstant(0, DL, VT), ISD::SETNE);
5642     return CombineTo(N, And, Cmp);
5643   }
5644 
5645   // If it cannot overflow, transform into a mul.
5646   if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5647     return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5648                      DAG.getConstant(0, DL, CarryVT));
5649   return SDValue();
5650 }
5651 
5652 // Function to calculate whether the Min/Max pair of SDNodes (potentially
5653 // swapped around) make a signed saturate pattern, clamping to between a signed
5654 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5655 // Returns the node being clamped and the bitwidth of the clamp in BW. Should
5656 // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5657 // same as SimplifySelectCC. N0<N1 ? N2 : N3.
5658 static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
5659                                   SDValue N3, ISD::CondCode CC, unsigned &BW,
5660                                   bool &Unsigned, SelectionDAG &DAG) {
5661   auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5662                             ISD::CondCode CC) {
5663     // The compare and select operand should be the same or the select operands
5664     // should be truncated versions of the comparison.
5665     if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5666       return 0;
5667     // The constants need to be the same or a truncated version of each other.
5668     ConstantSDNode *N1C = isConstOrConstSplat(peekThroughTruncates(N1));
5669     ConstantSDNode *N3C = isConstOrConstSplat(peekThroughTruncates(N3));
5670     if (!N1C || !N3C)
5671       return 0;
5672     const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5673     const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5674     if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5675       return 0;
5676     return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5677   };
5678 
5679   // Check the initial value is a SMIN/SMAX equivalent.
5680   unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5681   if (!Opcode0)
5682     return SDValue();
5683 
5684   // We could only need one range check, if the fptosi could never produce
5685   // the upper value.
5686   if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5687     if (isNullOrNullSplat(N3)) {
5688       EVT IntVT = N0.getValueType().getScalarType();
5689       EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5690       if (FPVT.isSimple()) {
5691         Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5692         const fltSemantics &Semantics = InputTy->getFltSemantics();
5693         uint32_t MinBitWidth =
5694           APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5695         if (IntVT.getSizeInBits() >= MinBitWidth) {
5696           Unsigned = true;
5697           BW = PowerOf2Ceil(MinBitWidth);
5698           return N0;
5699         }
5700       }
5701     }
5702   }
5703 
5704   SDValue N00, N01, N02, N03;
5705   ISD::CondCode N0CC;
5706   switch (N0.getOpcode()) {
5707   case ISD::SMIN:
5708   case ISD::SMAX:
5709     N00 = N02 = N0.getOperand(0);
5710     N01 = N03 = N0.getOperand(1);
5711     N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5712     break;
5713   case ISD::SELECT_CC:
5714     N00 = N0.getOperand(0);
5715     N01 = N0.getOperand(1);
5716     N02 = N0.getOperand(2);
5717     N03 = N0.getOperand(3);
5718     N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5719     break;
5720   case ISD::SELECT:
5721   case ISD::VSELECT:
5722     if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5723       return SDValue();
5724     N00 = N0.getOperand(0).getOperand(0);
5725     N01 = N0.getOperand(0).getOperand(1);
5726     N02 = N0.getOperand(1);
5727     N03 = N0.getOperand(2);
5728     N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5729     break;
5730   default:
5731     return SDValue();
5732   }
5733 
5734   unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5735   if (!Opcode1 || Opcode0 == Opcode1)
5736     return SDValue();
5737 
5738   ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5739   ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5740   if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5741     return SDValue();
5742 
5743   const APInt &MinC = MinCOp->getAPIntValue();
5744   const APInt &MaxC = MaxCOp->getAPIntValue();
5745   APInt MinCPlus1 = MinC + 1;
5746   if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5747     BW = MinCPlus1.exactLogBase2() + 1;
5748     Unsigned = false;
5749     return N02;
5750   }
5751 
5752   if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5753     BW = MinCPlus1.exactLogBase2();
5754     Unsigned = true;
5755     return N02;
5756   }
5757 
5758   return SDValue();
5759 }
5760 
5761 static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5762                                            SDValue N3, ISD::CondCode CC,
5763                                            SelectionDAG &DAG) {
5764   unsigned BW;
5765   bool Unsigned;
5766   SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5767   if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5768     return SDValue();
5769   EVT FPVT = Fp.getOperand(0).getValueType();
5770   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5771   if (FPVT.isVector())
5772     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5773                              FPVT.getVectorElementCount());
5774   unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5775   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5776     return SDValue();
5777   SDLoc DL(Fp);
5778   SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5779                             DAG.getValueType(NewVT.getScalarType()));
5780   return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5781 }
5782 
5783 static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5784                                          SDValue N3, ISD::CondCode CC,
5785                                          SelectionDAG &DAG) {
5786   // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5787   // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5788   // be truncated versions of the setcc (N0/N1).
5789   if ((N0 != N2 &&
5790        (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5791       N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
5792     return SDValue();
5793   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5794   ConstantSDNode *N3C = isConstOrConstSplat(N3);
5795   if (!N1C || !N3C)
5796     return SDValue();
5797   const APInt &C1 = N1C->getAPIntValue();
5798   const APInt &C3 = N3C->getAPIntValue();
5799   if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5800       C1 != C3.zext(C1.getBitWidth()))
5801     return SDValue();
5802 
5803   unsigned BW = (C1 + 1).exactLogBase2();
5804   EVT FPVT = N0.getOperand(0).getValueType();
5805   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5806   if (FPVT.isVector())
5807     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5808                              FPVT.getVectorElementCount());
5809   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
5810                                                         FPVT, NewVT))
5811     return SDValue();
5812 
5813   SDValue Sat =
5814       DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5815                   DAG.getValueType(NewVT.getScalarType()));
5816   return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5817 }
5818 
5819 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5820   SDValue N0 = N->getOperand(0);
5821   SDValue N1 = N->getOperand(1);
5822   EVT VT = N0.getValueType();
5823   unsigned Opcode = N->getOpcode();
5824   SDLoc DL(N);
5825 
5826   // fold operation with constant operands.
5827   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5828     return C;
5829 
5830   // If the operands are the same, this is a no-op.
5831   if (N0 == N1)
5832     return N0;
5833 
5834   // canonicalize constant to RHS
5835   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5836       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5837     return DAG.getNode(Opcode, DL, VT, N1, N0);
5838 
5839   // fold vector ops
5840   if (VT.isVector())
5841     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5842       return FoldedVOp;
5843 
5844   // reassociate minmax
5845   if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5846     return RMINMAX;
5847 
5848   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5849   // Only do this if:
5850   // 1. The current op isn't legal and the flipped is.
5851   // 2. The saturation pattern is broken by canonicalization in InstCombine.
5852   bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5853   bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5854   if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5855       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5856     unsigned AltOpcode;
5857     switch (Opcode) {
5858     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5859     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5860     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5861     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5862     default: llvm_unreachable("Unknown MINMAX opcode");
5863     }
5864     if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5865       return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5866   }
5867 
5868   if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5869     if (SDValue S = PerformMinMaxFpToSatCombine(
5870             N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5871       return S;
5872   if (Opcode == ISD::UMIN)
5873     if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5874       return S;
5875 
5876   // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5877   auto ReductionOpcode = [](unsigned Opcode) {
5878     switch (Opcode) {
5879     case ISD::SMIN:
5880       return ISD::VECREDUCE_SMIN;
5881     case ISD::SMAX:
5882       return ISD::VECREDUCE_SMAX;
5883     case ISD::UMIN:
5884       return ISD::VECREDUCE_UMIN;
5885     case ISD::UMAX:
5886       return ISD::VECREDUCE_UMAX;
5887     default:
5888       llvm_unreachable("Unexpected opcode");
5889     }
5890   };
5891   if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5892                                         SDLoc(N), VT, N0, N1))
5893     return SD;
5894 
5895   // Simplify the operands using demanded-bits information.
5896   if (SimplifyDemandedBits(SDValue(N, 0)))
5897     return SDValue(N, 0);
5898 
5899   return SDValue();
5900 }
5901 
5902 /// If this is a bitwise logic instruction and both operands have the same
5903 /// opcode, try to sink the other opcode after the logic instruction.
5904 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5905   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5906   EVT VT = N0.getValueType();
5907   unsigned LogicOpcode = N->getOpcode();
5908   unsigned HandOpcode = N0.getOpcode();
5909   assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5910   assert(HandOpcode == N1.getOpcode() && "Bad input!");
5911 
5912   // Bail early if none of these transforms apply.
5913   if (N0.getNumOperands() == 0)
5914     return SDValue();
5915 
5916   // FIXME: We should check number of uses of the operands to not increase
5917   //        the instruction count for all transforms.
5918 
5919   // Handle size-changing casts (or sign_extend_inreg).
5920   SDValue X = N0.getOperand(0);
5921   SDValue Y = N1.getOperand(0);
5922   EVT XVT = X.getValueType();
5923   SDLoc DL(N);
5924   if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5925       (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5926        N0.getOperand(1) == N1.getOperand(1))) {
5927     // If both operands have other uses, this transform would create extra
5928     // instructions without eliminating anything.
5929     if (!N0.hasOneUse() && !N1.hasOneUse())
5930       return SDValue();
5931     // We need matching integer source types.
5932     if (XVT != Y.getValueType())
5933       return SDValue();
5934     // Don't create an illegal op during or after legalization. Don't ever
5935     // create an unsupported vector op.
5936     if ((VT.isVector() || LegalOperations) &&
5937         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5938       return SDValue();
5939     // Avoid infinite looping with PromoteIntBinOp.
5940     // TODO: Should we apply desirable/legal constraints to all opcodes?
5941     if ((HandOpcode == ISD::ANY_EXTEND ||
5942          HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5943         LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5944       return SDValue();
5945     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5946     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5947     if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5948       return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5949     return DAG.getNode(HandOpcode, DL, VT, Logic);
5950   }
5951 
5952   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5953   if (HandOpcode == ISD::TRUNCATE) {
5954     // If both operands have other uses, this transform would create extra
5955     // instructions without eliminating anything.
5956     if (!N0.hasOneUse() && !N1.hasOneUse())
5957       return SDValue();
5958     // We need matching source types.
5959     if (XVT != Y.getValueType())
5960       return SDValue();
5961     // Don't create an illegal op during or after legalization.
5962     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5963       return SDValue();
5964     // Be extra careful sinking truncate. If it's free, there's no benefit in
5965     // widening a binop. Also, don't create a logic op on an illegal type.
5966     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5967       return SDValue();
5968     if (!TLI.isTypeLegal(XVT))
5969       return SDValue();
5970     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5971     return DAG.getNode(HandOpcode, DL, VT, Logic);
5972   }
5973 
5974   // For binops SHL/SRL/SRA/AND:
5975   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5976   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5977        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5978       N0.getOperand(1) == N1.getOperand(1)) {
5979     // If either operand has other uses, this transform is not an improvement.
5980     if (!N0.hasOneUse() || !N1.hasOneUse())
5981       return SDValue();
5982     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5983     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5984   }
5985 
5986   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5987   if (HandOpcode == ISD::BSWAP) {
5988     // If either operand has other uses, this transform is not an improvement.
5989     if (!N0.hasOneUse() || !N1.hasOneUse())
5990       return SDValue();
5991     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5992     return DAG.getNode(HandOpcode, DL, VT, Logic);
5993   }
5994 
5995   // For funnel shifts FSHL/FSHR:
5996   // logic_op (OP x, x1, s), (OP y, y1, s) -->
5997   // --> OP (logic_op x, y), (logic_op, x1, y1), s
5998   if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5999       N0.getOperand(2) == N1.getOperand(2)) {
6000     if (!N0.hasOneUse() || !N1.hasOneUse())
6001       return SDValue();
6002     SDValue X1 = N0.getOperand(1);
6003     SDValue Y1 = N1.getOperand(1);
6004     SDValue S = N0.getOperand(2);
6005     SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6006     SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6007     return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6008   }
6009 
6010   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6011   // Only perform this optimization up until type legalization, before
6012   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6013   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6014   // we don't want to undo this promotion.
6015   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6016   // on scalars.
6017   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6018        Level <= AfterLegalizeTypes) {
6019     // Input types must be integer and the same.
6020     if (XVT.isInteger() && XVT == Y.getValueType() &&
6021         !(VT.isVector() && TLI.isTypeLegal(VT) &&
6022           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6023       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6024       return DAG.getNode(HandOpcode, DL, VT, Logic);
6025     }
6026   }
6027 
6028   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6029   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6030   // If both shuffles use the same mask, and both shuffle within a single
6031   // vector, then it is worthwhile to move the swizzle after the operation.
6032   // The type-legalizer generates this pattern when loading illegal
6033   // vector types from memory. In many cases this allows additional shuffle
6034   // optimizations.
6035   // There are other cases where moving the shuffle after the xor/and/or
6036   // is profitable even if shuffles don't perform a swizzle.
6037   // If both shuffles use the same mask, and both shuffles have the same first
6038   // or second operand, then it might still be profitable to move the shuffle
6039   // after the xor/and/or operation.
6040   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6041     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6042     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6043     assert(X.getValueType() == Y.getValueType() &&
6044            "Inputs to shuffles are not the same type");
6045 
6046     // Check that both shuffles use the same mask. The masks are known to be of
6047     // the same length because the result vector type is the same.
6048     // Check also that shuffles have only one use to avoid introducing extra
6049     // instructions.
6050     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6051         !SVN0->getMask().equals(SVN1->getMask()))
6052       return SDValue();
6053 
6054     // Don't try to fold this node if it requires introducing a
6055     // build vector of all zeros that might be illegal at this stage.
6056     SDValue ShOp = N0.getOperand(1);
6057     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6058       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6059 
6060     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6061     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6062       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6063                                   N0.getOperand(0), N1.getOperand(0));
6064       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6065     }
6066 
6067     // Don't try to fold this node if it requires introducing a
6068     // build vector of all zeros that might be illegal at this stage.
6069     ShOp = N0.getOperand(0);
6070     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6071       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6072 
6073     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6074     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6075       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6076                                   N1.getOperand(1));
6077       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6078     }
6079   }
6080 
6081   return SDValue();
6082 }
6083 
6084 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6085 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6086                                        const SDLoc &DL) {
6087   SDValue LL, LR, RL, RR, N0CC, N1CC;
6088   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6089       !isSetCCEquivalent(N1, RL, RR, N1CC))
6090     return SDValue();
6091 
6092   assert(N0.getValueType() == N1.getValueType() &&
6093          "Unexpected operand types for bitwise logic op");
6094   assert(LL.getValueType() == LR.getValueType() &&
6095          RL.getValueType() == RR.getValueType() &&
6096          "Unexpected operand types for setcc");
6097 
6098   // If we're here post-legalization or the logic op type is not i1, the logic
6099   // op type must match a setcc result type. Also, all folds require new
6100   // operations on the left and right operands, so those types must match.
6101   EVT VT = N0.getValueType();
6102   EVT OpVT = LL.getValueType();
6103   if (LegalOperations || VT.getScalarType() != MVT::i1)
6104     if (VT != getSetCCResultType(OpVT))
6105       return SDValue();
6106   if (OpVT != RL.getValueType())
6107     return SDValue();
6108 
6109   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6110   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6111   bool IsInteger = OpVT.isInteger();
6112   if (LR == RR && CC0 == CC1 && IsInteger) {
6113     bool IsZero = isNullOrNullSplat(LR);
6114     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6115 
6116     // All bits clear?
6117     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6118     // All sign bits clear?
6119     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6120     // Any bits set?
6121     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6122     // Any sign bits set?
6123     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6124 
6125     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
6126     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6127     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
6128     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
6129     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6130       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6131       AddToWorklist(Or.getNode());
6132       return DAG.getSetCC(DL, VT, Or, LR, CC1);
6133     }
6134 
6135     // All bits set?
6136     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6137     // All sign bits set?
6138     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6139     // Any bits clear?
6140     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6141     // Any sign bits clear?
6142     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6143 
6144     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6145     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
6146     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6147     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
6148     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6149       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6150       AddToWorklist(And.getNode());
6151       return DAG.getSetCC(DL, VT, And, LR, CC1);
6152     }
6153   }
6154 
6155   // TODO: What is the 'or' equivalent of this fold?
6156   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6157   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6158       IsInteger && CC0 == ISD::SETNE &&
6159       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6160        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6161     SDValue One = DAG.getConstant(1, DL, OpVT);
6162     SDValue Two = DAG.getConstant(2, DL, OpVT);
6163     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6164     AddToWorklist(Add.getNode());
6165     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6166   }
6167 
6168   // Try more general transforms if the predicates match and the only user of
6169   // the compares is the 'and' or 'or'.
6170   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6171       N0.hasOneUse() && N1.hasOneUse()) {
6172     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6173     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6174     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6175       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6176       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6177       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6178       SDValue Zero = DAG.getConstant(0, DL, OpVT);
6179       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6180     }
6181 
6182     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6183     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6184       // Match a shared variable operand and 2 non-opaque constant operands.
6185       auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6186         // The difference of the constants must be a single bit.
6187         const APInt &CMax =
6188             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6189         const APInt &CMin =
6190             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6191         return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6192       };
6193       if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6194         // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6195         // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6196         SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6197         SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6198         SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6199         SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6200         SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6201         SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6202         SDValue Zero = DAG.getConstant(0, DL, OpVT);
6203         return DAG.getSetCC(DL, VT, And, Zero, CC0);
6204       }
6205     }
6206   }
6207 
6208   // Canonicalize equivalent operands to LL == RL.
6209   if (LL == RR && LR == RL) {
6210     CC1 = ISD::getSetCCSwappedOperands(CC1);
6211     std::swap(RL, RR);
6212   }
6213 
6214   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6215   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6216   if (LL == RL && LR == RR) {
6217     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6218                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6219     if (NewCC != ISD::SETCC_INVALID &&
6220         (!LegalOperations ||
6221          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6222           TLI.isOperationLegal(ISD::SETCC, OpVT))))
6223       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6224   }
6225 
6226   return SDValue();
6227 }
6228 
6229 static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6230                                    SelectionDAG &DAG) {
6231   return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6232 }
6233 
6234 static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6235                                   SelectionDAG &DAG) {
6236   return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6237 }
6238 
6239 // FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6240 static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6241                                      ISD::CondCode CC, unsigned OrAndOpcode,
6242                                      SelectionDAG &DAG,
6243                                      bool isFMAXNUMFMINNUM_IEEE,
6244                                      bool isFMAXNUMFMINNUM) {
6245   // The optimization cannot be applied for all the predicates because
6246   // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6247   // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6248   // applied at all if one of the operands is a signaling NaN.
6249 
6250   // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6251   // are non NaN values.
6252   if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6253       ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
6254     return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6255                    isFMAXNUMFMINNUM_IEEE
6256                ? ISD::FMINNUM_IEEE
6257                : ISD::DELETED_NODE;
6258   else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
6259             (OrAndOpcode == ISD::OR)) ||
6260            ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6261             (OrAndOpcode == ISD::AND)))
6262     return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6263                    isFMAXNUMFMINNUM_IEEE
6264                ? ISD::FMAXNUM_IEEE
6265                : ISD::DELETED_NODE;
6266   // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6267   // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6268   // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6269   // that there are not any sNaNs, then the optimization is not valid
6270   // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6271   // the optimization using FMINNUM/FMAXNUM for the following cases. If
6272   // we can prove that we do not have any sNaNs, then we can do the
6273   // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6274   // cases.
6275   else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6276             (OrAndOpcode == ISD::OR)) ||
6277            ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6278             (OrAndOpcode == ISD::AND)))
6279     return isFMAXNUMFMINNUM ? ISD::FMINNUM
6280                             : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6281                                       isFMAXNUMFMINNUM_IEEE
6282                                   ? ISD::FMINNUM_IEEE
6283                                   : ISD::DELETED_NODE;
6284   else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6285             (OrAndOpcode == ISD::OR)) ||
6286            ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6287             (OrAndOpcode == ISD::AND)))
6288     return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6289                             : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6290                                       isFMAXNUMFMINNUM_IEEE
6291                                   ? ISD::FMAXNUM_IEEE
6292                                   : ISD::DELETED_NODE;
6293   return ISD::DELETED_NODE;
6294 }
6295 
6296 static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
6297   using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
6298   assert(
6299       (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6300       "Invalid Op to combine SETCC with");
6301 
6302   // TODO: Search past casts/truncates.
6303   SDValue LHS = LogicOp->getOperand(0);
6304   SDValue RHS = LogicOp->getOperand(1);
6305   if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6306       !LHS->hasOneUse() || !RHS->hasOneUse())
6307     return SDValue();
6308 
6309   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6310   AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
6311       LogicOp, LHS.getNode(), RHS.getNode());
6312 
6313   SDValue LHS0 = LHS->getOperand(0);
6314   SDValue RHS0 = RHS->getOperand(0);
6315   SDValue LHS1 = LHS->getOperand(1);
6316   SDValue RHS1 = RHS->getOperand(1);
6317   // TODO: We don't actually need a splat here, for vectors we just need the
6318   // invariants to hold for each element.
6319   auto *LHS1C = isConstOrConstSplat(LHS1);
6320   auto *RHS1C = isConstOrConstSplat(RHS1);
6321   ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6322   ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6323   EVT VT = LogicOp->getValueType(0);
6324   EVT OpVT = LHS0.getValueType();
6325   SDLoc DL(LogicOp);
6326 
6327   // Check if the operands of an and/or operation are comparisons and if they
6328   // compare against the same value. Replace the and/or-cmp-cmp sequence with
6329   // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6330   // sequence will be replaced with min-cmp sequence:
6331   // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6332   // and and-cmp-cmp will be replaced with max-cmp sequence:
6333   // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6334   // The optimization does not work for `==` or `!=` .
6335   // The two comparisons should have either the same predicate or the
6336   // predicate of one of the comparisons is the opposite of the other one.
6337   bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6338                                TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
6339   bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6340                           TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
6341   if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6342         TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6343         TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6344         TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6345        (OpVT.isFloatingPoint() &&
6346         (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6347       !ISD::isIntEqualitySetCC(CCL) && !ISD::isFPEqualitySetCC(CCL) &&
6348       CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6349       CCL != ISD::SETTRUE &&
6350       (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6351 
6352     SDValue CommonValue, Operand1, Operand2;
6353     ISD::CondCode CC = ISD::SETCC_INVALID;
6354     if (CCL == CCR) {
6355       if (LHS0 == RHS0) {
6356         CommonValue = LHS0;
6357         Operand1 = LHS1;
6358         Operand2 = RHS1;
6359         CC = ISD::getSetCCSwappedOperands(CCL);
6360       } else if (LHS1 == RHS1) {
6361         CommonValue = LHS1;
6362         Operand1 = LHS0;
6363         Operand2 = RHS0;
6364         CC = CCL;
6365       }
6366     } else {
6367       assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6368       if (LHS0 == RHS1) {
6369         CommonValue = LHS0;
6370         Operand1 = LHS1;
6371         Operand2 = RHS0;
6372         CC = CCR;
6373       } else if (RHS0 == LHS1) {
6374         CommonValue = LHS1;
6375         Operand1 = LHS0;
6376         Operand2 = RHS1;
6377         CC = CCL;
6378       }
6379     }
6380 
6381     // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6382     // handle it using OR/AND.
6383     if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6384       CC = ISD::SETCC_INVALID;
6385     else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6386       CC = ISD::SETCC_INVALID;
6387 
6388     if (CC != ISD::SETCC_INVALID) {
6389       unsigned NewOpcode = ISD::DELETED_NODE;
6390       bool IsSigned = isSignedIntSetCC(CC);
6391       if (OpVT.isInteger()) {
6392         bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6393                        CC == ISD::SETLT || CC == ISD::SETULT);
6394         bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6395         if (IsLess == IsOr)
6396           NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6397         else
6398           NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6399       } else if (OpVT.isFloatingPoint())
6400         NewOpcode =
6401             getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6402                                  DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6403 
6404       if (NewOpcode != ISD::DELETED_NODE) {
6405         SDValue MinMaxValue =
6406             DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6407         return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6408       }
6409     }
6410   }
6411 
6412   if (TargetPreference == AndOrSETCCFoldKind::None)
6413     return SDValue();
6414 
6415   if (CCL == CCR &&
6416       CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6417       LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6418     const APInt &APLhs = LHS1C->getAPIntValue();
6419     const APInt &APRhs = RHS1C->getAPIntValue();
6420 
6421     // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6422     // case this is just a compare).
6423     if (APLhs == (-APRhs) &&
6424         ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6425          DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6426       const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6427       // (icmp eq A, C) | (icmp eq A, -C)
6428       //    -> (icmp eq Abs(A), C)
6429       // (icmp ne A, C) & (icmp ne A, -C)
6430       //    -> (icmp ne Abs(A), C)
6431       SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6432       return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6433                          DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6434     } else if (TargetPreference &
6435                (AndOrSETCCFoldKind::AddAnd | AndOrSETCCFoldKind::NotAnd)) {
6436 
6437       // AndOrSETCCFoldKind::AddAnd:
6438       // A == C0 | A == C1
6439       //  IF IsPow2(smax(C0, C1)-smin(C0, C1))
6440       //    -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6441       // A != C0 & A != C1
6442       //  IF IsPow2(smax(C0, C1)-smin(C0, C1))
6443       //    -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6444 
6445       // AndOrSETCCFoldKind::NotAnd:
6446       // A == C0 | A == C1
6447       //  IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6448       //    -> ~A & smin(C0, C1) == 0
6449       // A != C0 & A != C1
6450       //  IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6451       //    -> ~A & smin(C0, C1) != 0
6452 
6453       const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6454       const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6455       APInt Dif = MaxC - MinC;
6456       if (!Dif.isZero() && Dif.isPowerOf2()) {
6457         if (MaxC.isAllOnes() &&
6458             (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6459           SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6460           SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6461                                       DAG.getConstant(MinC, DL, OpVT));
6462           return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6463                              DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6464         } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6465 
6466           SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6467                                       DAG.getConstant(-MinC, DL, OpVT));
6468           SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6469                                       DAG.getConstant(~Dif, DL, OpVT));
6470           return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6471                              DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6472         }
6473       }
6474     }
6475   }
6476 
6477   return SDValue();
6478 }
6479 
6480 // Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6481 // We canonicalize to the `select` form in the middle end, but the `and` form
6482 // gets better codegen and all tested targets (arm, x86, riscv)
6483 static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F,
6484                                      const SDLoc &DL, SelectionDAG &DAG) {
6485   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6486   if (!isNullConstant(F))
6487     return SDValue();
6488 
6489   EVT CondVT = Cond.getValueType();
6490   if (TLI.getBooleanContents(CondVT) !=
6491       TargetLoweringBase::ZeroOrOneBooleanContent)
6492     return SDValue();
6493 
6494   if (T.getOpcode() != ISD::AND)
6495     return SDValue();
6496 
6497   if (!isOneConstant(T.getOperand(1)))
6498     return SDValue();
6499 
6500   EVT OpVT = T.getValueType();
6501 
6502   SDValue CondMask =
6503       OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6504   return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6505 }
6506 
6507 /// This contains all DAGCombine rules which reduce two values combined by
6508 /// an And operation to a single value. This makes them reusable in the context
6509 /// of visitSELECT(). Rules involving constants are not included as
6510 /// visitSELECT() already handles those cases.
6511 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6512   EVT VT = N1.getValueType();
6513   SDLoc DL(N);
6514 
6515   // fold (and x, undef) -> 0
6516   if (N0.isUndef() || N1.isUndef())
6517     return DAG.getConstant(0, DL, VT);
6518 
6519   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6520     return V;
6521 
6522   // Canonicalize:
6523   //   and(x, add) -> and(add, x)
6524   if (N1.getOpcode() == ISD::ADD)
6525     std::swap(N0, N1);
6526 
6527   // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6528   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6529       VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6530     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6531       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6532         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6533         // immediate for an add, but it is legal if its top c2 bits are set,
6534         // transform the ADD so the immediate doesn't need to be materialized
6535         // in a register.
6536         APInt ADDC = ADDI->getAPIntValue();
6537         APInt SRLC = SRLI->getAPIntValue();
6538         if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6539             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6540           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
6541                                              SRLC.getZExtValue());
6542           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6543             ADDC |= Mask;
6544             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6545               SDLoc DL0(N0);
6546               SDValue NewAdd =
6547                 DAG.getNode(ISD::ADD, DL0, VT,
6548                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6549               CombineTo(N0.getNode(), NewAdd);
6550               // Return N so it doesn't get rechecked!
6551               return SDValue(N, 0);
6552             }
6553           }
6554         }
6555       }
6556     }
6557   }
6558 
6559   return SDValue();
6560 }
6561 
6562 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6563                                    EVT LoadResultTy, EVT &ExtVT) {
6564   if (!AndC->getAPIntValue().isMask())
6565     return false;
6566 
6567   unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6568 
6569   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6570   EVT LoadedVT = LoadN->getMemoryVT();
6571 
6572   if (ExtVT == LoadedVT &&
6573       (!LegalOperations ||
6574        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6575     // ZEXTLOAD will match without needing to change the size of the value being
6576     // loaded.
6577     return true;
6578   }
6579 
6580   // Do not change the width of a volatile or atomic loads.
6581   if (!LoadN->isSimple())
6582     return false;
6583 
6584   // Do not generate loads of non-round integer types since these can
6585   // be expensive (and would be wrong if the type is not byte sized).
6586   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6587     return false;
6588 
6589   if (LegalOperations &&
6590       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6591     return false;
6592 
6593   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6594     return false;
6595 
6596   return true;
6597 }
6598 
6599 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6600                                     ISD::LoadExtType ExtType, EVT &MemVT,
6601                                     unsigned ShAmt) {
6602   if (!LDST)
6603     return false;
6604   // Only allow byte offsets.
6605   if (ShAmt % 8)
6606     return false;
6607 
6608   // Do not generate loads of non-round integer types since these can
6609   // be expensive (and would be wrong if the type is not byte sized).
6610   if (!MemVT.isRound())
6611     return false;
6612 
6613   // Don't change the width of a volatile or atomic loads.
6614   if (!LDST->isSimple())
6615     return false;
6616 
6617   EVT LdStMemVT = LDST->getMemoryVT();
6618 
6619   // Bail out when changing the scalable property, since we can't be sure that
6620   // we're actually narrowing here.
6621   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6622     return false;
6623 
6624   // Verify that we are actually reducing a load width here.
6625   if (LdStMemVT.bitsLT(MemVT))
6626     return false;
6627 
6628   // Ensure that this isn't going to produce an unsupported memory access.
6629   if (ShAmt) {
6630     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6631     const unsigned ByteShAmt = ShAmt / 8;
6632     const Align LDSTAlign = LDST->getAlign();
6633     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6634     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6635                                 LDST->getAddressSpace(), NarrowAlign,
6636                                 LDST->getMemOperand()->getFlags()))
6637       return false;
6638   }
6639 
6640   // It's not possible to generate a constant of extended or untyped type.
6641   EVT PtrType = LDST->getBasePtr().getValueType();
6642   if (PtrType == MVT::Untyped || PtrType.isExtended())
6643     return false;
6644 
6645   if (isa<LoadSDNode>(LDST)) {
6646     LoadSDNode *Load = cast<LoadSDNode>(LDST);
6647     // Don't transform one with multiple uses, this would require adding a new
6648     // load.
6649     if (!SDValue(Load, 0).hasOneUse())
6650       return false;
6651 
6652     if (LegalOperations &&
6653         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6654       return false;
6655 
6656     // For the transform to be legal, the load must produce only two values
6657     // (the value loaded and the chain).  Don't transform a pre-increment
6658     // load, for example, which produces an extra value.  Otherwise the
6659     // transformation is not equivalent, and the downstream logic to replace
6660     // uses gets things wrong.
6661     if (Load->getNumValues() > 2)
6662       return false;
6663 
6664     // If the load that we're shrinking is an extload and we're not just
6665     // discarding the extension we can't simply shrink the load. Bail.
6666     // TODO: It would be possible to merge the extensions in some cases.
6667     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6668         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6669       return false;
6670 
6671     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6672       return false;
6673   } else {
6674     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6675     StoreSDNode *Store = cast<StoreSDNode>(LDST);
6676     // Can't write outside the original store
6677     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6678       return false;
6679 
6680     if (LegalOperations &&
6681         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6682       return false;
6683   }
6684   return true;
6685 }
6686 
6687 bool DAGCombiner::SearchForAndLoads(SDNode *N,
6688                                     SmallVectorImpl<LoadSDNode*> &Loads,
6689                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6690                                     ConstantSDNode *Mask,
6691                                     SDNode *&NodeToMask) {
6692   // Recursively search for the operands, looking for loads which can be
6693   // narrowed.
6694   for (SDValue Op : N->op_values()) {
6695     if (Op.getValueType().isVector())
6696       return false;
6697 
6698     // Some constants may need fixing up later if they are too large.
6699     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6700       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6701           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6702         NodesWithConsts.insert(N);
6703       continue;
6704     }
6705 
6706     if (!Op.hasOneUse())
6707       return false;
6708 
6709     switch(Op.getOpcode()) {
6710     case ISD::LOAD: {
6711       auto *Load = cast<LoadSDNode>(Op);
6712       EVT ExtVT;
6713       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6714           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6715 
6716         // ZEXTLOAD is already small enough.
6717         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6718             ExtVT.bitsGE(Load->getMemoryVT()))
6719           continue;
6720 
6721         // Use LE to convert equal sized loads to zext.
6722         if (ExtVT.bitsLE(Load->getMemoryVT()))
6723           Loads.push_back(Load);
6724 
6725         continue;
6726       }
6727       return false;
6728     }
6729     case ISD::ZERO_EXTEND:
6730     case ISD::AssertZext: {
6731       unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6732       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6733       EVT VT = Op.getOpcode() == ISD::AssertZext ?
6734         cast<VTSDNode>(Op.getOperand(1))->getVT() :
6735         Op.getOperand(0).getValueType();
6736 
6737       // We can accept extending nodes if the mask is wider or an equal
6738       // width to the original type.
6739       if (ExtVT.bitsGE(VT))
6740         continue;
6741       break;
6742     }
6743     case ISD::OR:
6744     case ISD::XOR:
6745     case ISD::AND:
6746       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6747                              NodeToMask))
6748         return false;
6749       continue;
6750     }
6751 
6752     // Allow one node which will masked along with any loads found.
6753     if (NodeToMask)
6754       return false;
6755 
6756     // Also ensure that the node to be masked only produces one data result.
6757     NodeToMask = Op.getNode();
6758     if (NodeToMask->getNumValues() > 1) {
6759       bool HasValue = false;
6760       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6761         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6762         if (VT != MVT::Glue && VT != MVT::Other) {
6763           if (HasValue) {
6764             NodeToMask = nullptr;
6765             return false;
6766           }
6767           HasValue = true;
6768         }
6769       }
6770       assert(HasValue && "Node to be masked has no data result?");
6771     }
6772   }
6773   return true;
6774 }
6775 
6776 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6777   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6778   if (!Mask)
6779     return false;
6780 
6781   if (!Mask->getAPIntValue().isMask())
6782     return false;
6783 
6784   // No need to do anything if the and directly uses a load.
6785   if (isa<LoadSDNode>(N->getOperand(0)))
6786     return false;
6787 
6788   SmallVector<LoadSDNode*, 8> Loads;
6789   SmallPtrSet<SDNode*, 2> NodesWithConsts;
6790   SDNode *FixupNode = nullptr;
6791   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6792     if (Loads.empty())
6793       return false;
6794 
6795     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6796     SDValue MaskOp = N->getOperand(1);
6797 
6798     // If it exists, fixup the single node we allow in the tree that needs
6799     // masking.
6800     if (FixupNode) {
6801       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6802       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6803                                 FixupNode->getValueType(0),
6804                                 SDValue(FixupNode, 0), MaskOp);
6805       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6806       if (And.getOpcode() == ISD ::AND)
6807         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6808     }
6809 
6810     // Narrow any constants that need it.
6811     for (auto *LogicN : NodesWithConsts) {
6812       SDValue Op0 = LogicN->getOperand(0);
6813       SDValue Op1 = LogicN->getOperand(1);
6814 
6815       if (isa<ConstantSDNode>(Op0))
6816         Op0 =
6817             DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6818 
6819       if (isa<ConstantSDNode>(Op1))
6820         Op1 =
6821             DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6822 
6823       if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6824         std::swap(Op0, Op1);
6825 
6826       DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6827     }
6828 
6829     // Create narrow loads.
6830     for (auto *Load : Loads) {
6831       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6832       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6833                                 SDValue(Load, 0), MaskOp);
6834       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6835       if (And.getOpcode() == ISD ::AND)
6836         And = SDValue(
6837             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6838       SDValue NewLoad = reduceLoadWidth(And.getNode());
6839       assert(NewLoad &&
6840              "Shouldn't be masking the load if it can't be narrowed");
6841       CombineTo(Load, NewLoad, NewLoad.getValue(1));
6842     }
6843     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6844     return true;
6845   }
6846   return false;
6847 }
6848 
6849 // Unfold
6850 //    x &  (-1 'logical shift' y)
6851 // To
6852 //    (x 'opposite logical shift' y) 'logical shift' y
6853 // if it is better for performance.
6854 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6855   assert(N->getOpcode() == ISD::AND);
6856 
6857   SDValue N0 = N->getOperand(0);
6858   SDValue N1 = N->getOperand(1);
6859 
6860   // Do we actually prefer shifts over mask?
6861   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
6862     return SDValue();
6863 
6864   // Try to match  (-1 '[outer] logical shift' y)
6865   unsigned OuterShift;
6866   unsigned InnerShift; // The opposite direction to the OuterShift.
6867   SDValue Y;           // Shift amount.
6868   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6869     if (!M.hasOneUse())
6870       return false;
6871     OuterShift = M->getOpcode();
6872     if (OuterShift == ISD::SHL)
6873       InnerShift = ISD::SRL;
6874     else if (OuterShift == ISD::SRL)
6875       InnerShift = ISD::SHL;
6876     else
6877       return false;
6878     if (!isAllOnesConstant(M->getOperand(0)))
6879       return false;
6880     Y = M->getOperand(1);
6881     return true;
6882   };
6883 
6884   SDValue X;
6885   if (matchMask(N1))
6886     X = N0;
6887   else if (matchMask(N0))
6888     X = N1;
6889   else
6890     return SDValue();
6891 
6892   SDLoc DL(N);
6893   EVT VT = N->getValueType(0);
6894 
6895   //     tmp = x   'opposite logical shift' y
6896   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6897   //     ret = tmp 'logical shift' y
6898   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6899 
6900   return T1;
6901 }
6902 
6903 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6904 /// For a target with a bit test, this is expected to become test + set and save
6905 /// at least 1 instruction.
6906 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
6907   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6908 
6909   // Look through an optional extension.
6910   SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6911   if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6912     And0 = And0.getOperand(0);
6913   if (!isOneConstant(And1) || !And0.hasOneUse())
6914     return SDValue();
6915 
6916   SDValue Src = And0;
6917 
6918   // Attempt to find a 'not' op.
6919   // TODO: Should we favor test+set even without the 'not' op?
6920   bool FoundNot = false;
6921   if (isBitwiseNot(Src)) {
6922     FoundNot = true;
6923     Src = Src.getOperand(0);
6924 
6925     // Look though an optional truncation. The source operand may not be the
6926     // same type as the original 'and', but that is ok because we are masking
6927     // off everything but the low bit.
6928     if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6929       Src = Src.getOperand(0);
6930   }
6931 
6932   // Match a shift-right by constant.
6933   if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6934     return SDValue();
6935 
6936   // This is probably not worthwhile without a supported type.
6937   EVT SrcVT = Src.getValueType();
6938   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6939   if (!TLI.isTypeLegal(SrcVT))
6940     return SDValue();
6941 
6942   // We might have looked through casts that make this transform invalid.
6943   unsigned BitWidth = SrcVT.getScalarSizeInBits();
6944   SDValue ShiftAmt = Src.getOperand(1);
6945   auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6946   if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6947     return SDValue();
6948 
6949   // Set source to shift source.
6950   Src = Src.getOperand(0);
6951 
6952   // Try again to find a 'not' op.
6953   // TODO: Should we favor test+set even with two 'not' ops?
6954   if (!FoundNot) {
6955     if (!isBitwiseNot(Src))
6956       return SDValue();
6957     Src = Src.getOperand(0);
6958   }
6959 
6960   if (!TLI.hasBitTest(Src, ShiftAmt))
6961     return SDValue();
6962 
6963   // Turn this into a bit-test pattern using mask op + setcc:
6964   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6965   // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6966   SDLoc DL(And);
6967   SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6968   EVT CCVT =
6969       TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6970   SDValue Mask = DAG.getConstant(
6971       APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6972   SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6973   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6974   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6975   return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6976 }
6977 
6978 /// For targets that support usubsat, match a bit-hack form of that operation
6979 /// that ends in 'and' and convert it.
6980 static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL) {
6981   EVT VT = N->getValueType(0);
6982   unsigned BitWidth = VT.getScalarSizeInBits();
6983   APInt SignMask = APInt::getSignMask(BitWidth);
6984 
6985   // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6986   // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6987   // xor/add with SMIN (signmask) are logically equivalent.
6988   SDValue X;
6989   if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6990                          m_OneUse(m_Sra(m_Deferred(X),
6991                                         m_SpecificInt(BitWidth - 1))))) &&
6992       !sd_match(N, m_And(m_OneUse(m_Add(m_Value(X), m_SpecificInt(SignMask))),
6993                          m_OneUse(m_Sra(m_Deferred(X),
6994                                         m_SpecificInt(BitWidth - 1))))))
6995     return SDValue();
6996 
6997   return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6998                      DAG.getConstant(SignMask, DL, VT));
6999 }
7000 
7001 /// Given a bitwise logic operation N with a matching bitwise logic operand,
7002 /// fold a pattern where 2 of the source operands are identically shifted
7003 /// values. For example:
7004 /// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7005 static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
7006                                  SelectionDAG &DAG) {
7007   unsigned LogicOpcode = N->getOpcode();
7008   assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7009          "Expected bitwise logic operation");
7010 
7011   if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7012     return SDValue();
7013 
7014   // Match another bitwise logic op and a shift.
7015   unsigned ShiftOpcode = ShiftOp.getOpcode();
7016   if (LogicOp.getOpcode() != LogicOpcode ||
7017       !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7018         ShiftOpcode == ISD::SRA))
7019     return SDValue();
7020 
7021   // Match another shift op inside the first logic operand. Handle both commuted
7022   // possibilities.
7023   // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7024   // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7025   SDValue X1 = ShiftOp.getOperand(0);
7026   SDValue Y = ShiftOp.getOperand(1);
7027   SDValue X0, Z;
7028   if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7029       LogicOp.getOperand(0).getOperand(1) == Y) {
7030     X0 = LogicOp.getOperand(0).getOperand(0);
7031     Z = LogicOp.getOperand(1);
7032   } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7033              LogicOp.getOperand(1).getOperand(1) == Y) {
7034     X0 = LogicOp.getOperand(1).getOperand(0);
7035     Z = LogicOp.getOperand(0);
7036   } else {
7037     return SDValue();
7038   }
7039 
7040   EVT VT = N->getValueType(0);
7041   SDLoc DL(N);
7042   SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7043   SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7044   return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7045 }
7046 
7047 /// Given a tree of logic operations with shape like
7048 /// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7049 /// try to match and fold shift operations with the same shift amount.
7050 /// For example:
7051 /// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7052 /// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7053 static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
7054                                      SDValue RightHand, SelectionDAG &DAG) {
7055   unsigned LogicOpcode = N->getOpcode();
7056   assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7057          "Expected bitwise logic operation");
7058   if (LeftHand.getOpcode() != LogicOpcode ||
7059       RightHand.getOpcode() != LogicOpcode)
7060     return SDValue();
7061   if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7062     return SDValue();
7063 
7064   // Try to match one of following patterns:
7065   // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7066   // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7067   // Note that foldLogicOfShifts will handle commuted versions of the left hand
7068   // itself.
7069   SDValue CombinedShifts, W;
7070   SDValue R0 = RightHand.getOperand(0);
7071   SDValue R1 = RightHand.getOperand(1);
7072   if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7073     W = R1;
7074   else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7075     W = R0;
7076   else
7077     return SDValue();
7078 
7079   EVT VT = N->getValueType(0);
7080   SDLoc DL(N);
7081   return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7082 }
7083 
7084 SDValue DAGCombiner::visitAND(SDNode *N) {
7085   SDValue N0 = N->getOperand(0);
7086   SDValue N1 = N->getOperand(1);
7087   EVT VT = N1.getValueType();
7088   SDLoc DL(N);
7089 
7090   // x & x --> x
7091   if (N0 == N1)
7092     return N0;
7093 
7094   // fold (and c1, c2) -> c1&c2
7095   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7096     return C;
7097 
7098   // canonicalize constant to RHS
7099   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7100       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7101     return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7102 
7103   if (areBitwiseNotOfEachother(N0, N1))
7104     return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7105 
7106   // fold vector ops
7107   if (VT.isVector()) {
7108     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7109       return FoldedVOp;
7110 
7111     // fold (and x, 0) -> 0, vector edition
7112     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7113       // do not return N1, because undef node may exist in N1
7114       return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()), DL,
7115                              N1.getValueType());
7116 
7117     // fold (and x, -1) -> x, vector edition
7118     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
7119       return N0;
7120 
7121     // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7122     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
7123     ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7124     if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7125       EVT LoadVT = MLoad->getMemoryVT();
7126       EVT ExtVT = VT;
7127       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7128         // For this AND to be a zero extension of the masked load the elements
7129         // of the BuildVec must mask the bottom bits of the extended element
7130         // type
7131         uint64_t ElementSize =
7132             LoadVT.getVectorElementType().getScalarSizeInBits();
7133         if (Splat->getAPIntValue().isMask(ElementSize)) {
7134           SDValue NewLoad = DAG.getMaskedLoad(
7135               ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7136               MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7137               LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7138               ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7139           bool LoadHasOtherUsers = !N0.hasOneUse();
7140           CombineTo(N, NewLoad);
7141           if (LoadHasOtherUsers)
7142             CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7143           return SDValue(N, 0);
7144         }
7145       }
7146     }
7147   }
7148 
7149   // fold (and x, -1) -> x
7150   if (isAllOnesConstant(N1))
7151     return N0;
7152 
7153   // if (and x, c) is known to be zero, return 0
7154   unsigned BitWidth = VT.getScalarSizeInBits();
7155   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7156   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
7157     return DAG.getConstant(0, DL, VT);
7158 
7159   if (SDValue R = foldAndOrOfSETCC(N, DAG))
7160     return R;
7161 
7162   if (SDValue NewSel = foldBinOpIntoSelect(N))
7163     return NewSel;
7164 
7165   // reassociate and
7166   if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7167     return RAND;
7168 
7169   // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7170   if (SDValue SD =
7171           reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7172     return SD;
7173 
7174   // fold (and (or x, C), D) -> D if (C & D) == D
7175   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7176     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7177   };
7178   if (N0.getOpcode() == ISD::OR &&
7179       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7180     return N1;
7181 
7182   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7183     SDValue N0Op0 = N0.getOperand(0);
7184     EVT SrcVT = N0Op0.getValueType();
7185     unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7186     APInt Mask = ~N1C->getAPIntValue();
7187     Mask = Mask.trunc(SrcBitWidth);
7188 
7189     // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7190     if (DAG.MaskedValueIsZero(N0Op0, Mask))
7191       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7192 
7193     // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7194     if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7195         TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7196         TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7197         TLI.isNarrowingProfitable(N, VT, SrcVT))
7198       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7199                          DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7200                                      DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7201   }
7202 
7203   // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7204   if (ISD::isExtOpcode(N0.getOpcode())) {
7205     unsigned ExtOpc = N0.getOpcode();
7206     SDValue N0Op0 = N0.getOperand(0);
7207     if (N0Op0.getOpcode() == ISD::AND &&
7208         (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7209         N0->hasOneUse() && N0Op0->hasOneUse()) {
7210       if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7211                                                       {N0Op0.getOperand(1)})) {
7212         if (SDValue NewMask =
7213                 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7214           return DAG.getNode(ISD::AND, DL, VT,
7215                              DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7216                              NewMask);
7217         }
7218       }
7219     }
7220   }
7221 
7222   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7223   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7224   // already be zero by virtue of the width of the base type of the load.
7225   //
7226   // the 'X' node here can either be nothing or an extract_vector_elt to catch
7227   // more cases.
7228   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7229        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
7230        N0.getOperand(0).getOpcode() == ISD::LOAD &&
7231        N0.getOperand(0).getResNo() == 0) ||
7232       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7233     auto *Load =
7234         cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7235 
7236     // Get the constant (if applicable) the zero'th operand is being ANDed with.
7237     // This can be a pure constant or a vector splat, in which case we treat the
7238     // vector as a scalar and use the splat value.
7239     APInt Constant = APInt::getZero(1);
7240     if (const ConstantSDNode *C = isConstOrConstSplat(
7241             N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
7242       Constant = C->getAPIntValue();
7243     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7244       unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7245       APInt SplatValue, SplatUndef;
7246       unsigned SplatBitSize;
7247       bool HasAnyUndefs;
7248       // Endianness should not matter here. Code below makes sure that we only
7249       // use the result if the SplatBitSize is a multiple of the vector element
7250       // size. And after that we AND all element sized parts of the splat
7251       // together. So the end result should be the same regardless of in which
7252       // order we do those operations.
7253       const bool IsBigEndian = false;
7254       bool IsSplat =
7255           Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7256                                   HasAnyUndefs, EltBitWidth, IsBigEndian);
7257 
7258       // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7259       // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7260       if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7261         // Undef bits can contribute to a possible optimisation if set, so
7262         // set them.
7263         SplatValue |= SplatUndef;
7264 
7265         // The splat value may be something like "0x00FFFFFF", which means 0 for
7266         // the first vector value and FF for the rest, repeating. We need a mask
7267         // that will apply equally to all members of the vector, so AND all the
7268         // lanes of the constant together.
7269         Constant = APInt::getAllOnes(EltBitWidth);
7270         for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7271           Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7272       }
7273     }
7274 
7275     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7276     // actually legal and isn't going to get expanded, else this is a false
7277     // optimisation.
7278     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7279                                                     Load->getValueType(0),
7280                                                     Load->getMemoryVT());
7281 
7282     // Resize the constant to the same size as the original memory access before
7283     // extension. If it is still the AllOnesValue then this AND is completely
7284     // unneeded.
7285     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7286 
7287     bool B;
7288     switch (Load->getExtensionType()) {
7289     default: B = false; break;
7290     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7291     case ISD::ZEXTLOAD:
7292     case ISD::NON_EXTLOAD: B = true; break;
7293     }
7294 
7295     if (B && Constant.isAllOnes()) {
7296       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7297       // preserve semantics once we get rid of the AND.
7298       SDValue NewLoad(Load, 0);
7299 
7300       // Fold the AND away. NewLoad may get replaced immediately.
7301       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7302 
7303       if (Load->getExtensionType() == ISD::EXTLOAD) {
7304         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7305                               Load->getValueType(0), SDLoc(Load),
7306                               Load->getChain(), Load->getBasePtr(),
7307                               Load->getOffset(), Load->getMemoryVT(),
7308                               Load->getMemOperand());
7309         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7310         if (Load->getNumValues() == 3) {
7311           // PRE/POST_INC loads have 3 values.
7312           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7313                            NewLoad.getValue(2) };
7314           CombineTo(Load, To, 3, true);
7315         } else {
7316           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7317         }
7318       }
7319 
7320       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7321     }
7322   }
7323 
7324   // Try to convert a constant mask AND into a shuffle clear mask.
7325   if (VT.isVector())
7326     if (SDValue Shuffle = XformToShuffleWithZero(N))
7327       return Shuffle;
7328 
7329   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7330     return Combined;
7331 
7332   if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7333       ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
7334     SDValue Ext = N0.getOperand(0);
7335     EVT ExtVT = Ext->getValueType(0);
7336     SDValue Extendee = Ext->getOperand(0);
7337 
7338     unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7339     if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7340         (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7341       //    (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7342       // => (extract_subvector (iN_zeroext v))
7343       SDValue ZeroExtExtendee =
7344           DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7345 
7346       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7347                          N0.getOperand(1));
7348     }
7349   }
7350 
7351   // fold (and (masked_gather x)) -> (zext_masked_gather x)
7352   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7353     EVT MemVT = GN0->getMemoryVT();
7354     EVT ScalarVT = MemVT.getScalarType();
7355 
7356     if (SDValue(GN0, 0).hasOneUse() &&
7357         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7358         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
7359       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
7360                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
7361 
7362       SDValue ZExtLoad = DAG.getMaskedGather(
7363           DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7364           GN0->getIndexType(), ISD::ZEXTLOAD);
7365 
7366       CombineTo(N, ZExtLoad);
7367       AddToWorklist(ZExtLoad.getNode());
7368       // Avoid recheck of N.
7369       return SDValue(N, 0);
7370     }
7371   }
7372 
7373   // fold (and (load x), 255) -> (zextload x, i8)
7374   // fold (and (extload x, i16), 255) -> (zextload x, i8)
7375   if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7376     if (SDValue Res = reduceLoadWidth(N))
7377       return Res;
7378 
7379   if (LegalTypes) {
7380     // Attempt to propagate the AND back up to the leaves which, if they're
7381     // loads, can be combined to narrow loads and the AND node can be removed.
7382     // Perform after legalization so that extend nodes will already be
7383     // combined into the loads.
7384     if (BackwardsPropagateMask(N))
7385       return SDValue(N, 0);
7386   }
7387 
7388   if (SDValue Combined = visitANDLike(N0, N1, N))
7389     return Combined;
7390 
7391   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
7392   if (N0.getOpcode() == N1.getOpcode())
7393     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7394       return V;
7395 
7396   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7397     return R;
7398   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7399     return R;
7400 
7401   // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7402   // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7403   SDValue X, Y, Z, NotY;
7404   for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7405     if (sd_match(N,
7406                  m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7407         sd_match(NotY, m_Not(m_Value(Y))) &&
7408         (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7409       return DAG.getNode(ISD::AND, DL, VT, X,
7410                          DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7411 
7412   // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7413   for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7414     if (sd_match(N, m_And(m_Value(X),
7415                           m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7416         sd_match(NotY, m_Not(m_Value(Y))) &&
7417         (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7418       return DAG.getNode(ISD::AND, DL, VT, X,
7419                          DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7420 
7421   // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7422   // If we are shifting down an extended sign bit, see if we can simplify
7423   // this to shifting the MSB directly to expose further simplifications.
7424   // This pattern often appears after sext_inreg legalization.
7425   APInt Amt;
7426   if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7427       Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7428     return DAG.getNode(ISD::SRL, DL, VT, X,
7429                        DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7430 
7431   // Masking the negated extension of a boolean is just the zero-extended
7432   // boolean:
7433   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7434   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7435   //
7436   // Note: the SimplifyDemandedBits fold below can make an information-losing
7437   // transform, and then we have no way to find this better fold.
7438   if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7439     if (X.getOpcode() == ISD::ZERO_EXTEND &&
7440         X.getOperand(0).getScalarValueSizeInBits() == 1)
7441       return X;
7442     if (X.getOpcode() == ISD::SIGN_EXTEND &&
7443         X.getOperand(0).getScalarValueSizeInBits() == 1)
7444       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7445   }
7446 
7447   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7448   // fold (and (sra)) -> (and (srl)) when possible.
7449   if (SimplifyDemandedBits(SDValue(N, 0)))
7450     return SDValue(N, 0);
7451 
7452   // fold (zext_inreg (extload x)) -> (zextload x)
7453   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7454   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7455       (ISD::isEXTLoad(N0.getNode()) ||
7456        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7457     auto *LN0 = cast<LoadSDNode>(N0);
7458     EVT MemVT = LN0->getMemoryVT();
7459     // If we zero all the possible extended bits, then we can turn this into
7460     // a zextload if we are running before legalize or the operation is legal.
7461     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7462     unsigned MemBitSize = MemVT.getScalarSizeInBits();
7463     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7464     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7465         ((!LegalOperations && LN0->isSimple()) ||
7466          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7467       SDValue ExtLoad =
7468           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7469                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7470       AddToWorklist(N);
7471       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7472       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7473     }
7474   }
7475 
7476   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7477   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7478     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7479                                            N0.getOperand(1), false))
7480       return BSwap;
7481   }
7482 
7483   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7484     return Shifts;
7485 
7486   if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7487     return V;
7488 
7489   // Recognize the following pattern:
7490   //
7491   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7492   //
7493   // where bitmask is a mask that clears the upper bits of AndVT. The
7494   // number of bits in bitmask must be a power of two.
7495   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7496     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7497       return false;
7498 
7499     auto *C = dyn_cast<ConstantSDNode>(RHS);
7500     if (!C)
7501       return false;
7502 
7503     if (!C->getAPIntValue().isMask(
7504             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7505       return false;
7506 
7507     return true;
7508   };
7509 
7510   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7511   if (IsAndZeroExtMask(N0, N1))
7512     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7513 
7514   if (hasOperation(ISD::USUBSAT, VT))
7515     if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7516       return V;
7517 
7518   // Postpone until legalization completed to avoid interference with bswap
7519   // folding
7520   if (LegalOperations || VT.isVector())
7521     if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7522       return R;
7523 
7524   return SDValue();
7525 }
7526 
7527 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7528 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7529                                         bool DemandHighBits) {
7530   if (!LegalOperations)
7531     return SDValue();
7532 
7533   EVT VT = N->getValueType(0);
7534   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7535     return SDValue();
7536   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
7537     return SDValue();
7538 
7539   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7540   bool LookPassAnd0 = false;
7541   bool LookPassAnd1 = false;
7542   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7543     std::swap(N0, N1);
7544   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7545     std::swap(N0, N1);
7546   if (N0.getOpcode() == ISD::AND) {
7547     if (!N0->hasOneUse())
7548       return SDValue();
7549     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7550     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7551     // This is needed for X86.
7552     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7553                   N01C->getZExtValue() != 0xFFFF))
7554       return SDValue();
7555     N0 = N0.getOperand(0);
7556     LookPassAnd0 = true;
7557   }
7558 
7559   if (N1.getOpcode() == ISD::AND) {
7560     if (!N1->hasOneUse())
7561       return SDValue();
7562     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7563     if (!N11C || N11C->getZExtValue() != 0xFF)
7564       return SDValue();
7565     N1 = N1.getOperand(0);
7566     LookPassAnd1 = true;
7567   }
7568 
7569   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7570     std::swap(N0, N1);
7571   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7572     return SDValue();
7573   if (!N0->hasOneUse() || !N1->hasOneUse())
7574     return SDValue();
7575 
7576   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7577   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7578   if (!N01C || !N11C)
7579     return SDValue();
7580   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7581     return SDValue();
7582 
7583   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7584   SDValue N00 = N0->getOperand(0);
7585   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7586     if (!N00->hasOneUse())
7587       return SDValue();
7588     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7589     if (!N001C || N001C->getZExtValue() != 0xFF)
7590       return SDValue();
7591     N00 = N00.getOperand(0);
7592     LookPassAnd0 = true;
7593   }
7594 
7595   SDValue N10 = N1->getOperand(0);
7596   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7597     if (!N10->hasOneUse())
7598       return SDValue();
7599     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7600     // Also allow 0xFFFF since the bits will be shifted out. This is needed
7601     // for X86.
7602     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7603                    N101C->getZExtValue() != 0xFFFF))
7604       return SDValue();
7605     N10 = N10.getOperand(0);
7606     LookPassAnd1 = true;
7607   }
7608 
7609   if (N00 != N10)
7610     return SDValue();
7611 
7612   // Make sure everything beyond the low halfword gets set to zero since the SRL
7613   // 16 will clear the top bits.
7614   unsigned OpSizeInBits = VT.getSizeInBits();
7615   if (OpSizeInBits > 16) {
7616     // If the left-shift isn't masked out then the only way this is a bswap is
7617     // if all bits beyond the low 8 are 0. In that case the entire pattern
7618     // reduces to a left shift anyway: leave it for other parts of the combiner.
7619     if (DemandHighBits && !LookPassAnd0)
7620       return SDValue();
7621 
7622     // However, if the right shift isn't masked out then it might be because
7623     // it's not needed. See if we can spot that too. If the high bits aren't
7624     // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7625     // upper bits to be zero.
7626     if (!LookPassAnd1) {
7627       unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7628       if (!DAG.MaskedValueIsZero(N10,
7629                                  APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7630         return SDValue();
7631     }
7632   }
7633 
7634   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7635   if (OpSizeInBits > 16) {
7636     SDLoc DL(N);
7637     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7638                       DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7639   }
7640   return Res;
7641 }
7642 
7643 /// Return true if the specified node is an element that makes up a 32-bit
7644 /// packed halfword byteswap.
7645 /// ((x & 0x000000ff) << 8) |
7646 /// ((x & 0x0000ff00) >> 8) |
7647 /// ((x & 0x00ff0000) << 8) |
7648 /// ((x & 0xff000000) >> 8)
7649 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
7650   if (!N->hasOneUse())
7651     return false;
7652 
7653   unsigned Opc = N.getOpcode();
7654   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7655     return false;
7656 
7657   SDValue N0 = N.getOperand(0);
7658   unsigned Opc0 = N0.getOpcode();
7659   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7660     return false;
7661 
7662   ConstantSDNode *N1C = nullptr;
7663   // SHL or SRL: look upstream for AND mask operand
7664   if (Opc == ISD::AND)
7665     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7666   else if (Opc0 == ISD::AND)
7667     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7668   if (!N1C)
7669     return false;
7670 
7671   unsigned MaskByteOffset;
7672   switch (N1C->getZExtValue()) {
7673   default:
7674     return false;
7675   case 0xFF:       MaskByteOffset = 0; break;
7676   case 0xFF00:     MaskByteOffset = 1; break;
7677   case 0xFFFF:
7678     // In case demanded bits didn't clear the bits that will be shifted out.
7679     // This is needed for X86.
7680     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7681       MaskByteOffset = 1;
7682       break;
7683     }
7684     return false;
7685   case 0xFF0000:   MaskByteOffset = 2; break;
7686   case 0xFF000000: MaskByteOffset = 3; break;
7687   }
7688 
7689   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7690   if (Opc == ISD::AND) {
7691     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7692       // (x >> 8) & 0xff
7693       // (x >> 8) & 0xff0000
7694       if (Opc0 != ISD::SRL)
7695         return false;
7696       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7697       if (!C || C->getZExtValue() != 8)
7698         return false;
7699     } else {
7700       // (x << 8) & 0xff00
7701       // (x << 8) & 0xff000000
7702       if (Opc0 != ISD::SHL)
7703         return false;
7704       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7705       if (!C || C->getZExtValue() != 8)
7706         return false;
7707     }
7708   } else if (Opc == ISD::SHL) {
7709     // (x & 0xff) << 8
7710     // (x & 0xff0000) << 8
7711     if (MaskByteOffset != 0 && MaskByteOffset != 2)
7712       return false;
7713     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7714     if (!C || C->getZExtValue() != 8)
7715       return false;
7716   } else { // Opc == ISD::SRL
7717     // (x & 0xff00) >> 8
7718     // (x & 0xff000000) >> 8
7719     if (MaskByteOffset != 1 && MaskByteOffset != 3)
7720       return false;
7721     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7722     if (!C || C->getZExtValue() != 8)
7723       return false;
7724   }
7725 
7726   if (Parts[MaskByteOffset])
7727     return false;
7728 
7729   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7730   return true;
7731 }
7732 
7733 // Match 2 elements of a packed halfword bswap.
7734 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
7735   if (N.getOpcode() == ISD::OR)
7736     return isBSwapHWordElement(N.getOperand(0), Parts) &&
7737            isBSwapHWordElement(N.getOperand(1), Parts);
7738 
7739   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7740     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7741     if (!C || C->getAPIntValue() != 16)
7742       return false;
7743     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7744     return true;
7745   }
7746 
7747   return false;
7748 }
7749 
7750 // Match this pattern:
7751 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7752 // And rewrite this to:
7753 //   (rotr (bswap A), 16)
7754 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
7755                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
7756                                        SDValue N1, EVT VT) {
7757   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7758          "MatchBSwapHWordOrAndAnd: expecting i32");
7759   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7760     return SDValue();
7761   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7762     return SDValue();
7763   // TODO: this is too restrictive; lifting this restriction requires more tests
7764   if (!N0->hasOneUse() || !N1->hasOneUse())
7765     return SDValue();
7766   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
7767   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
7768   if (!Mask0 || !Mask1)
7769     return SDValue();
7770   if (Mask0->getAPIntValue() != 0xff00ff00 ||
7771       Mask1->getAPIntValue() != 0x00ff00ff)
7772     return SDValue();
7773   SDValue Shift0 = N0.getOperand(0);
7774   SDValue Shift1 = N1.getOperand(0);
7775   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7776     return SDValue();
7777   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7778   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7779   if (!ShiftAmt0 || !ShiftAmt1)
7780     return SDValue();
7781   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7782     return SDValue();
7783   if (Shift0.getOperand(0) != Shift1.getOperand(0))
7784     return SDValue();
7785 
7786   SDLoc DL(N);
7787   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7788   SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7789   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7790 }
7791 
7792 /// Match a 32-bit packed halfword bswap. That is
7793 /// ((x & 0x000000ff) << 8) |
7794 /// ((x & 0x0000ff00) >> 8) |
7795 /// ((x & 0x00ff0000) << 8) |
7796 /// ((x & 0xff000000) >> 8)
7797 /// => (rotl (bswap x), 16)
7798 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7799   if (!LegalOperations)
7800     return SDValue();
7801 
7802   EVT VT = N->getValueType(0);
7803   if (VT != MVT::i32)
7804     return SDValue();
7805   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
7806     return SDValue();
7807 
7808   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
7809     return BSwap;
7810 
7811   // Try again with commuted operands.
7812   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
7813     return BSwap;
7814 
7815 
7816   // Look for either
7817   // (or (bswaphpair), (bswaphpair))
7818   // (or (or (bswaphpair), (and)), (and))
7819   // (or (or (and), (bswaphpair)), (and))
7820   SDNode *Parts[4] = {};
7821 
7822   if (isBSwapHWordPair(N0, Parts)) {
7823     // (or (or (and), (and)), (or (and), (and)))
7824     if (!isBSwapHWordPair(N1, Parts))
7825       return SDValue();
7826   } else if (N0.getOpcode() == ISD::OR) {
7827     // (or (or (or (and), (and)), (and)), (and))
7828     if (!isBSwapHWordElement(N1, Parts))
7829       return SDValue();
7830     SDValue N00 = N0.getOperand(0);
7831     SDValue N01 = N0.getOperand(1);
7832     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7833         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7834       return SDValue();
7835   } else {
7836     return SDValue();
7837   }
7838 
7839   // Make sure the parts are all coming from the same node.
7840   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7841     return SDValue();
7842 
7843   SDLoc DL(N);
7844   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7845                               SDValue(Parts[0], 0));
7846 
7847   // Result of the bswap should be rotated by 16. If it's not legal, then
7848   // do  (x << 16) | (x >> 16).
7849   SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7850   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
7851     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7852   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7853     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7854   return DAG.getNode(ISD::OR, DL, VT,
7855                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7856                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7857 }
7858 
7859 /// This contains all DAGCombine rules which reduce two values combined by
7860 /// an Or operation to a single value \see visitANDLike().
7861 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7862   EVT VT = N1.getValueType();
7863 
7864   // fold (or x, undef) -> -1
7865   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7866     return DAG.getAllOnesConstant(DL, VT);
7867 
7868   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7869     return V;
7870 
7871   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
7872   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7873       // Don't increase # computations.
7874       (N0->hasOneUse() || N1->hasOneUse())) {
7875     // We can only do this xform if we know that bits from X that are set in C2
7876     // but not in C1 are already zero.  Likewise for Y.
7877     if (const ConstantSDNode *N0O1C =
7878         getAsNonOpaqueConstant(N0.getOperand(1))) {
7879       if (const ConstantSDNode *N1O1C =
7880           getAsNonOpaqueConstant(N1.getOperand(1))) {
7881         // We can only do this xform if we know that bits from X that are set in
7882         // C2 but not in C1 are already zero.  Likewise for Y.
7883         const APInt &LHSMask = N0O1C->getAPIntValue();
7884         const APInt &RHSMask = N1O1C->getAPIntValue();
7885 
7886         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7887             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7888           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7889                                   N0.getOperand(0), N1.getOperand(0));
7890           return DAG.getNode(ISD::AND, DL, VT, X,
7891                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
7892         }
7893       }
7894     }
7895   }
7896 
7897   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7898   if (N0.getOpcode() == ISD::AND &&
7899       N1.getOpcode() == ISD::AND &&
7900       N0.getOperand(0) == N1.getOperand(0) &&
7901       // Don't increase # computations.
7902       (N0->hasOneUse() || N1->hasOneUse())) {
7903     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7904                             N0.getOperand(1), N1.getOperand(1));
7905     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7906   }
7907 
7908   return SDValue();
7909 }
7910 
7911 /// OR combines for which the commuted variant will be tried as well.
7912 static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
7913                                   SDNode *N) {
7914   EVT VT = N0.getValueType();
7915   unsigned BW = VT.getScalarSizeInBits();
7916   SDLoc DL(N);
7917 
7918   auto peekThroughResize = [](SDValue V) {
7919     if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7920       return V->getOperand(0);
7921     return V;
7922   };
7923 
7924   SDValue N0Resized = peekThroughResize(N0);
7925   if (N0Resized.getOpcode() == ISD::AND) {
7926     SDValue N1Resized = peekThroughResize(N1);
7927     SDValue N00 = N0Resized.getOperand(0);
7928     SDValue N01 = N0Resized.getOperand(1);
7929 
7930     // fold or (and x, y), x --> x
7931     if (N00 == N1Resized || N01 == N1Resized)
7932       return N1;
7933 
7934     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7935     // TODO: Set AllowUndefs = true.
7936     if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7937                                                   /* AllowUndefs */ false)) {
7938       if (peekThroughResize(NotOperand) == N1Resized)
7939         return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7940                            N1);
7941     }
7942 
7943     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7944     if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7945                                                   /* AllowUndefs */ false)) {
7946       if (peekThroughResize(NotOperand) == N1Resized)
7947         return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7948                            N1);
7949     }
7950   }
7951 
7952   SDValue X, Y;
7953 
7954   // fold or (xor X, N1), N1 --> or X, N1
7955   if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7956     return DAG.getNode(ISD::OR, DL, VT, X, N1);
7957 
7958   // fold or (xor x, y), (x and/or y) --> or x, y
7959   if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7960       (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7961        sd_match(N1, m_Or(m_Specific(X), m_Specific(Y)))))
7962     return DAG.getNode(ISD::OR, DL, VT, X, Y);
7963 
7964   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7965     return R;
7966 
7967   auto peekThroughZext = [](SDValue V) {
7968     if (V->getOpcode() == ISD::ZERO_EXTEND)
7969       return V->getOperand(0);
7970     return V;
7971   };
7972 
7973   // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7974   if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7975       N0.getOperand(0) == N1.getOperand(0) &&
7976       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7977     return N0;
7978 
7979   // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7980   if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7981       N0.getOperand(1) == N1.getOperand(0) &&
7982       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7983     return N0;
7984 
7985   // Attempt to match a legalized build_pair-esque pattern:
7986   // or(shl(aext(Hi),BW/2),zext(Lo))
7987   SDValue Lo, Hi;
7988   if (sd_match(N0,
7989                m_OneUse(m_Shl(m_AnyExt(m_Value(Hi)), m_SpecificInt(BW / 2)))) &&
7990       sd_match(N1, m_ZExt(m_Value(Lo))) &&
7991       Lo.getScalarValueSizeInBits() == (BW / 2) &&
7992       Lo.getValueType() == Hi.getValueType()) {
7993     // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7994     SDValue NotLo, NotHi;
7995     if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7996         sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7997       Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7998       Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7999       Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8000                        DAG.getShiftAmountConstant(BW / 2, VT, DL));
8001       return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8002     }
8003   }
8004 
8005   return SDValue();
8006 }
8007 
8008 SDValue DAGCombiner::visitOR(SDNode *N) {
8009   SDValue N0 = N->getOperand(0);
8010   SDValue N1 = N->getOperand(1);
8011   EVT VT = N1.getValueType();
8012   SDLoc DL(N);
8013 
8014   // x | x --> x
8015   if (N0 == N1)
8016     return N0;
8017 
8018   // fold (or c1, c2) -> c1|c2
8019   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8020     return C;
8021 
8022   // canonicalize constant to RHS
8023   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
8024       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
8025     return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8026 
8027   // fold vector ops
8028   if (VT.isVector()) {
8029     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8030       return FoldedVOp;
8031 
8032     // fold (or x, 0) -> x, vector edition
8033     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
8034       return N0;
8035 
8036     // fold (or x, -1) -> -1, vector edition
8037     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
8038       // do not return N1, because undef node may exist in N1
8039       return DAG.getAllOnesConstant(DL, N1.getValueType());
8040 
8041     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8042     // Do this only if the resulting type / shuffle is legal.
8043     auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8044     auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8045     if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8046       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8047       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8048       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8049       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8050       // Ensure both shuffles have a zero input.
8051       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8052         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8053         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8054         bool CanFold = true;
8055         int NumElts = VT.getVectorNumElements();
8056         SmallVector<int, 4> Mask(NumElts, -1);
8057 
8058         for (int i = 0; i != NumElts; ++i) {
8059           int M0 = SV0->getMaskElt(i);
8060           int M1 = SV1->getMaskElt(i);
8061 
8062           // Determine if either index is pointing to a zero vector.
8063           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8064           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8065 
8066           // If one element is zero and the otherside is undef, keep undef.
8067           // This also handles the case that both are undef.
8068           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8069             continue;
8070 
8071           // Make sure only one of the elements is zero.
8072           if (M0Zero == M1Zero) {
8073             CanFold = false;
8074             break;
8075           }
8076 
8077           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8078 
8079           // We have a zero and non-zero element. If the non-zero came from
8080           // SV0 make the index a LHS index. If it came from SV1, make it
8081           // a RHS index. We need to mod by NumElts because we don't care
8082           // which operand it came from in the original shuffles.
8083           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8084         }
8085 
8086         if (CanFold) {
8087           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8088           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8089           SDValue LegalShuffle =
8090               TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8091           if (LegalShuffle)
8092             return LegalShuffle;
8093         }
8094       }
8095     }
8096   }
8097 
8098   // fold (or x, 0) -> x
8099   if (isNullConstant(N1))
8100     return N0;
8101 
8102   // fold (or x, -1) -> -1
8103   if (isAllOnesConstant(N1))
8104     return N1;
8105 
8106   if (SDValue NewSel = foldBinOpIntoSelect(N))
8107     return NewSel;
8108 
8109   // fold (or x, c) -> c iff (x & ~c) == 0
8110   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8111   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8112     return N1;
8113 
8114   if (SDValue R = foldAndOrOfSETCC(N, DAG))
8115     return R;
8116 
8117   if (SDValue Combined = visitORLike(N0, N1, DL))
8118     return Combined;
8119 
8120   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8121     return Combined;
8122 
8123   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8124   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8125     return BSwap;
8126   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8127     return BSwap;
8128 
8129   // reassociate or
8130   if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8131     return ROR;
8132 
8133   // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8134   if (SDValue SD =
8135           reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8136     return SD;
8137 
8138   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8139   // iff (c1 & c2) != 0 or c1/c2 are undef.
8140   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8141     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8142   };
8143   if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8144       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8145     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8146                                                  {N1, N0.getOperand(1)})) {
8147       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8148       AddToWorklist(IOR.getNode());
8149       return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8150     }
8151   }
8152 
8153   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8154     return Combined;
8155   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8156     return Combined;
8157 
8158   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
8159   if (N0.getOpcode() == N1.getOpcode())
8160     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8161       return V;
8162 
8163   // See if this is some rotate idiom.
8164   if (SDValue Rot = MatchRotate(N0, N1, DL))
8165     return Rot;
8166 
8167   if (SDValue Load = MatchLoadCombine(N))
8168     return Load;
8169 
8170   // Simplify the operands using demanded-bits information.
8171   if (SimplifyDemandedBits(SDValue(N, 0)))
8172     return SDValue(N, 0);
8173 
8174   // If OR can be rewritten into ADD, try combines based on ADD.
8175   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8176       DAG.isADDLike(SDValue(N, 0)))
8177     if (SDValue Combined = visitADDLike(N))
8178       return Combined;
8179 
8180   // Postpone until legalization completed to avoid interference with bswap
8181   // folding
8182   if (LegalOperations || VT.isVector())
8183     if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8184       return R;
8185 
8186   return SDValue();
8187 }
8188 
8189 static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op,
8190                                  SDValue &Mask) {
8191   if (Op.getOpcode() == ISD::AND &&
8192       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8193     Mask = Op.getOperand(1);
8194     return Op.getOperand(0);
8195   }
8196   return Op;
8197 }
8198 
8199 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
8200 static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8201                             SDValue &Mask) {
8202   Op = stripConstantMask(DAG, Op, Mask);
8203   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8204     Shift = Op;
8205     return true;
8206   }
8207   return false;
8208 }
8209 
8210 /// Helper function for visitOR to extract the needed side of a rotate idiom
8211 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
8212 /// InstCombine merged some outside op with one of the shifts from
8213 /// the rotate pattern.
8214 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8215 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
8216 /// patterns:
8217 ///
8218 ///   (or (add v v) (shrl v bitwidth-1)):
8219 ///     expands (add v v) -> (shl v 1)
8220 ///
8221 ///   (or (mul v c0) (shrl (mul v c1) c2)):
8222 ///     expands (mul v c0) -> (shl (mul v c1) c3)
8223 ///
8224 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
8225 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
8226 ///
8227 ///   (or (shl v c0) (shrl (shl v c1) c2)):
8228 ///     expands (shl v c0) -> (shl (shl v c1) c3)
8229 ///
8230 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
8231 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
8232 ///
8233 /// Such that in all cases, c3+c2==bitwidth(op v c1).
8234 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
8235                                      SDValue ExtractFrom, SDValue &Mask,
8236                                      const SDLoc &DL) {
8237   assert(OppShift && ExtractFrom && "Empty SDValue");
8238   if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8239     return SDValue();
8240 
8241   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8242 
8243   // Value and Type of the shift.
8244   SDValue OppShiftLHS = OppShift.getOperand(0);
8245   EVT ShiftedVT = OppShiftLHS.getValueType();
8246 
8247   // Amount of the existing shift.
8248   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8249 
8250   // (add v v) -> (shl v 1)
8251   // TODO: Should this be a general DAG canonicalization?
8252   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8253       ExtractFrom.getOpcode() == ISD::ADD &&
8254       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8255       ExtractFrom.getOperand(0) == OppShiftLHS &&
8256       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8257     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8258                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8259 
8260   // Preconditions:
8261   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8262   //
8263   // Find opcode of the needed shift to be extracted from (op0 v c0).
8264   unsigned Opcode = ISD::DELETED_NODE;
8265   bool IsMulOrDiv = false;
8266   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8267   // opcode or its arithmetic (mul or udiv) variant.
8268   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8269     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8270     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8271       return false;
8272     Opcode = NeededShift;
8273     return true;
8274   };
8275   // op0 must be either the needed shift opcode or the mul/udiv equivalent
8276   // that the needed shift can be extracted from.
8277   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8278       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8279     return SDValue();
8280 
8281   // op0 must be the same opcode on both sides, have the same LHS argument,
8282   // and produce the same value type.
8283   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8284       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8285       ShiftedVT != ExtractFrom.getValueType())
8286     return SDValue();
8287 
8288   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8289   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8290   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8291   ConstantSDNode *ExtractFromCst =
8292       isConstOrConstSplat(ExtractFrom.getOperand(1));
8293   // TODO: We should be able to handle non-uniform constant vectors for these values
8294   // Check that we have constant values.
8295   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8296       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8297       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8298     return SDValue();
8299 
8300   // Compute the shift amount we need to extract to complete the rotate.
8301   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8302   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8303     return SDValue();
8304   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8305   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8306   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8307   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8308   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8309 
8310   // Now try extract the needed shift from the ExtractFrom op and see if the
8311   // result matches up with the existing shift's LHS op.
8312   if (IsMulOrDiv) {
8313     // Op to extract from is a mul or udiv by a constant.
8314     // Check:
8315     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8316     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8317     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8318                                                  NeededShiftAmt.getZExtValue());
8319     APInt ResultAmt;
8320     APInt Rem;
8321     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8322     if (Rem != 0 || ResultAmt != OppLHSAmt)
8323       return SDValue();
8324   } else {
8325     // Op to extract from is a shift by a constant.
8326     // Check:
8327     //      c2 - (bitwidth(op0 v c0) - c1) == c0
8328     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8329                                           ExtractFromAmt.getBitWidth()))
8330       return SDValue();
8331   }
8332 
8333   // Return the expanded shift op that should allow a rotate to be formed.
8334   EVT ShiftVT = OppShift.getOperand(1).getValueType();
8335   EVT ResVT = ExtractFrom.getValueType();
8336   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8337   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8338 }
8339 
8340 // Return true if we can prove that, whenever Neg and Pos are both in the
8341 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
8342 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8343 //
8344 //     (or (shift1 X, Neg), (shift2 X, Pos))
8345 //
8346 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8347 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
8348 // to consider shift amounts with defined behavior.
8349 //
8350 // The IsRotate flag should be set when the LHS of both shifts is the same.
8351 // Otherwise if matching a general funnel shift, it should be clear.
8352 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8353                            SelectionDAG &DAG, bool IsRotate) {
8354   const auto &TLI = DAG.getTargetLoweringInfo();
8355   // If EltSize is a power of 2 then:
8356   //
8357   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8358   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8359   //
8360   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8361   // for the stronger condition:
8362   //
8363   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
8364   //
8365   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8366   // we can just replace Neg with Neg' for the rest of the function.
8367   //
8368   // In other cases we check for the even stronger condition:
8369   //
8370   //     Neg == EltSize - Pos                                    [B]
8371   //
8372   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
8373   // behavior if Pos == 0 (and consequently Neg == EltSize).
8374   //
8375   // We could actually use [A] whenever EltSize is a power of 2, but the
8376   // only extra cases that it would match are those uninteresting ones
8377   // where Neg and Pos are never in range at the same time.  E.g. for
8378   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8379   // as well as (sub 32, Pos), but:
8380   //
8381   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8382   //
8383   // always invokes undefined behavior for 32-bit X.
8384   //
8385   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8386   // This allows us to peek through any operations that only affect Mask's
8387   // un-demanded bits.
8388   //
8389   // NOTE: We can only do this when matching operations which won't modify the
8390   // least Log2(EltSize) significant bits and not a general funnel shift.
8391   unsigned MaskLoBits = 0;
8392   if (IsRotate && isPowerOf2_64(EltSize)) {
8393     unsigned Bits = Log2_64(EltSize);
8394     unsigned NegBits = Neg.getScalarValueSizeInBits();
8395     if (NegBits >= Bits) {
8396       APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8397       if (SDValue Inner =
8398               TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) {
8399         Neg = Inner;
8400         MaskLoBits = Bits;
8401       }
8402     }
8403   }
8404 
8405   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8406   if (Neg.getOpcode() != ISD::SUB)
8407     return false;
8408   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
8409   if (!NegC)
8410     return false;
8411   SDValue NegOp1 = Neg.getOperand(1);
8412 
8413   // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8414   // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8415   // are redundant for the purpose of the equality.
8416   if (MaskLoBits) {
8417     unsigned PosBits = Pos.getScalarValueSizeInBits();
8418     if (PosBits >= MaskLoBits) {
8419       APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8420       if (SDValue Inner =
8421               TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) {
8422         Pos = Inner;
8423       }
8424     }
8425   }
8426 
8427   // The condition we need is now:
8428   //
8429   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8430   //
8431   // If NegOp1 == Pos then we need:
8432   //
8433   //              EltSize & Mask == NegC & Mask
8434   //
8435   // (because "x & Mask" is a truncation and distributes through subtraction).
8436   //
8437   // We also need to account for a potential truncation of NegOp1 if the amount
8438   // has already been legalized to a shift amount type.
8439   APInt Width;
8440   if ((Pos == NegOp1) ||
8441       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8442     Width = NegC->getAPIntValue();
8443 
8444   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8445   // Then the condition we want to prove becomes:
8446   //
8447   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8448   //
8449   // which, again because "x & Mask" is a truncation, becomes:
8450   //
8451   //                NegC & Mask == (EltSize - PosC) & Mask
8452   //             EltSize & Mask == (NegC + PosC) & Mask
8453   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8454     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8455       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8456     else
8457       return false;
8458   } else
8459     return false;
8460 
8461   // Now we just need to check that EltSize & Mask == Width & Mask.
8462   if (MaskLoBits)
8463     // EltSize & Mask is 0 since Mask is EltSize - 1.
8464     return Width.getLoBits(MaskLoBits) == 0;
8465   return Width == EltSize;
8466 }
8467 
8468 // A subroutine of MatchRotate used once we have found an OR of two opposite
8469 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
8470 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8471 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
8472 // Neg with outer conversions stripped away.
8473 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8474                                        SDValue Neg, SDValue InnerPos,
8475                                        SDValue InnerNeg, bool HasPos,
8476                                        unsigned PosOpcode, unsigned NegOpcode,
8477                                        const SDLoc &DL) {
8478   // fold (or (shl x, (*ext y)),
8479   //          (srl x, (*ext (sub 32, y)))) ->
8480   //   (rotl x, y) or (rotr x, (sub 32, y))
8481   //
8482   // fold (or (shl x, (*ext (sub 32, y))),
8483   //          (srl x, (*ext y))) ->
8484   //   (rotr x, y) or (rotl x, (sub 32, y))
8485   EVT VT = Shifted.getValueType();
8486   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8487                      /*IsRotate*/ true)) {
8488     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8489                        HasPos ? Pos : Neg);
8490   }
8491 
8492   return SDValue();
8493 }
8494 
8495 // A subroutine of MatchRotate used once we have found an OR of two opposite
8496 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
8497 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8498 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
8499 // Neg with outer conversions stripped away.
8500 // TODO: Merge with MatchRotatePosNeg.
8501 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8502                                        SDValue Neg, SDValue InnerPos,
8503                                        SDValue InnerNeg, bool HasPos,
8504                                        unsigned PosOpcode, unsigned NegOpcode,
8505                                        const SDLoc &DL) {
8506   EVT VT = N0.getValueType();
8507   unsigned EltBits = VT.getScalarSizeInBits();
8508 
8509   // fold (or (shl x0, (*ext y)),
8510   //          (srl x1, (*ext (sub 32, y)))) ->
8511   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8512   //
8513   // fold (or (shl x0, (*ext (sub 32, y))),
8514   //          (srl x1, (*ext y))) ->
8515   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8516   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8517     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8518                        HasPos ? Pos : Neg);
8519   }
8520 
8521   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8522   // so for now just use the PosOpcode case if its legal.
8523   // TODO: When can we use the NegOpcode case?
8524   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8525     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8526       if (Op.getOpcode() != BinOpc)
8527         return false;
8528       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8529       return Cst && (Cst->getAPIntValue() == Imm);
8530     };
8531 
8532     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8533     //   -> (fshl x0, x1, y)
8534     if (IsBinOpImm(N1, ISD::SRL, 1) &&
8535         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8536         InnerPos == InnerNeg.getOperand(0) &&
8537         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
8538       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8539     }
8540 
8541     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8542     //   -> (fshr x0, x1, y)
8543     if (IsBinOpImm(N0, ISD::SHL, 1) &&
8544         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8545         InnerNeg == InnerPos.getOperand(0) &&
8546         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
8547       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8548     }
8549 
8550     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8551     //   -> (fshr x0, x1, y)
8552     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8553     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8554         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8555         InnerNeg == InnerPos.getOperand(0) &&
8556         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
8557       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8558     }
8559   }
8560 
8561   return SDValue();
8562 }
8563 
8564 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
8565 // idioms for rotate, and if the target supports rotation instructions, generate
8566 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8567 // with different shifted sources.
8568 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8569   EVT VT = LHS.getValueType();
8570 
8571   // The target must have at least one rotate/funnel flavor.
8572   // We still try to match rotate by constant pre-legalization.
8573   // TODO: Support pre-legalization funnel-shift by constant.
8574   bool HasROTL = hasOperation(ISD::ROTL, VT);
8575   bool HasROTR = hasOperation(ISD::ROTR, VT);
8576   bool HasFSHL = hasOperation(ISD::FSHL, VT);
8577   bool HasFSHR = hasOperation(ISD::FSHR, VT);
8578 
8579   // If the type is going to be promoted and the target has enabled custom
8580   // lowering for rotate, allow matching rotate by non-constants. Only allow
8581   // this for scalar types.
8582   if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8583                                   TargetLowering::TypePromoteInteger) {
8584     HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom;
8585     HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom;
8586   }
8587 
8588   if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8589     return SDValue();
8590 
8591   // Check for truncated rotate.
8592   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8593       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8594     assert(LHS.getValueType() == RHS.getValueType());
8595     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8596       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8597     }
8598   }
8599 
8600   // Match "(X shl/srl V1) & V2" where V2 may not be present.
8601   SDValue LHSShift;   // The shift.
8602   SDValue LHSMask;    // AND value if any.
8603   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8604 
8605   SDValue RHSShift;   // The shift.
8606   SDValue RHSMask;    // AND value if any.
8607   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8608 
8609   // If neither side matched a rotate half, bail
8610   if (!LHSShift && !RHSShift)
8611     return SDValue();
8612 
8613   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8614   // side of the rotate, so try to handle that here. In all cases we need to
8615   // pass the matched shift from the opposite side to compute the opcode and
8616   // needed shift amount to extract.  We still want to do this if both sides
8617   // matched a rotate half because one half may be a potential overshift that
8618   // can be broken down (ie if InstCombine merged two shl or srl ops into a
8619   // single one).
8620 
8621   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8622   if (LHSShift)
8623     if (SDValue NewRHSShift =
8624             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8625       RHSShift = NewRHSShift;
8626   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8627   if (RHSShift)
8628     if (SDValue NewLHSShift =
8629             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8630       LHSShift = NewLHSShift;
8631 
8632   // If a side is still missing, nothing else we can do.
8633   if (!RHSShift || !LHSShift)
8634     return SDValue();
8635 
8636   // At this point we've matched or extracted a shift op on each side.
8637 
8638   if (LHSShift.getOpcode() == RHSShift.getOpcode())
8639     return SDValue(); // Shifts must disagree.
8640 
8641   // Canonicalize shl to left side in a shl/srl pair.
8642   if (RHSShift.getOpcode() == ISD::SHL) {
8643     std::swap(LHS, RHS);
8644     std::swap(LHSShift, RHSShift);
8645     std::swap(LHSMask, RHSMask);
8646   }
8647 
8648   // Something has gone wrong - we've lost the shl/srl pair - bail.
8649   if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8650     return SDValue();
8651 
8652   unsigned EltSizeInBits = VT.getScalarSizeInBits();
8653   SDValue LHSShiftArg = LHSShift.getOperand(0);
8654   SDValue LHSShiftAmt = LHSShift.getOperand(1);
8655   SDValue RHSShiftArg = RHSShift.getOperand(0);
8656   SDValue RHSShiftAmt = RHSShift.getOperand(1);
8657 
8658   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8659                                         ConstantSDNode *RHS) {
8660     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8661   };
8662 
8663   auto ApplyMasks = [&](SDValue Res) {
8664     // If there is an AND of either shifted operand, apply it to the result.
8665     if (LHSMask.getNode() || RHSMask.getNode()) {
8666       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
8667       SDValue Mask = AllOnes;
8668 
8669       if (LHSMask.getNode()) {
8670         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8671         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8672                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8673       }
8674       if (RHSMask.getNode()) {
8675         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8676         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8677                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8678       }
8679 
8680       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8681     }
8682 
8683     return Res;
8684   };
8685 
8686   // TODO: Support pre-legalization funnel-shift by constant.
8687   bool IsRotate = LHSShiftArg == RHSShiftArg;
8688   if (!IsRotate && !(HasFSHL || HasFSHR)) {
8689     if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8690         ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8691       // Look for a disguised rotate by constant.
8692       // The common shifted operand X may be hidden inside another 'or'.
8693       SDValue X, Y;
8694       auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8695         if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8696           return false;
8697         if (CommonOp == Or.getOperand(0)) {
8698           X = CommonOp;
8699           Y = Or.getOperand(1);
8700           return true;
8701         }
8702         if (CommonOp == Or.getOperand(1)) {
8703           X = CommonOp;
8704           Y = Or.getOperand(0);
8705           return true;
8706         }
8707         return false;
8708       };
8709 
8710       SDValue Res;
8711       if (matchOr(LHSShiftArg, RHSShiftArg)) {
8712         // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8713         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8714         SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8715         Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8716       } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8717         // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8718         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8719         SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8720         Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8721       } else {
8722         return SDValue();
8723       }
8724 
8725       return ApplyMasks(Res);
8726     }
8727 
8728     return SDValue(); // Requires funnel shift support.
8729   }
8730 
8731   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8732   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8733   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8734   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8735   // iff C1+C2 == EltSizeInBits
8736   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8737     SDValue Res;
8738     if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8739       bool UseROTL = !LegalOperations || HasROTL;
8740       Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8741                         UseROTL ? LHSShiftAmt : RHSShiftAmt);
8742     } else {
8743       bool UseFSHL = !LegalOperations || HasFSHL;
8744       Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8745                         RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8746     }
8747 
8748     return ApplyMasks(Res);
8749   }
8750 
8751   // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8752   // shift.
8753   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8754     return SDValue();
8755 
8756   // If there is a mask here, and we have a variable shift, we can't be sure
8757   // that we're masking out the right stuff.
8758   if (LHSMask.getNode() || RHSMask.getNode())
8759     return SDValue();
8760 
8761   // If the shift amount is sign/zext/any-extended just peel it off.
8762   SDValue LExtOp0 = LHSShiftAmt;
8763   SDValue RExtOp0 = RHSShiftAmt;
8764   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8765        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8766        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8767        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8768       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8769        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8770        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8771        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8772     LExtOp0 = LHSShiftAmt.getOperand(0);
8773     RExtOp0 = RHSShiftAmt.getOperand(0);
8774   }
8775 
8776   if (IsRotate && (HasROTL || HasROTR)) {
8777     SDValue TryL =
8778         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8779                           RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8780     if (TryL)
8781       return TryL;
8782 
8783     SDValue TryR =
8784         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8785                           LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8786     if (TryR)
8787       return TryR;
8788   }
8789 
8790   SDValue TryL =
8791       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8792                         LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8793   if (TryL)
8794     return TryL;
8795 
8796   SDValue TryR =
8797       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8798                         RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8799   if (TryR)
8800     return TryR;
8801 
8802   return SDValue();
8803 }
8804 
8805 /// Recursively traverses the expression calculating the origin of the requested
8806 /// byte of the given value. Returns std::nullopt if the provider can't be
8807 /// calculated.
8808 ///
8809 /// For all the values except the root of the expression, we verify that the
8810 /// value has exactly one use and if not then return std::nullopt. This way if
8811 /// the origin of the byte is returned it's guaranteed that the values which
8812 /// contribute to the byte are not used outside of this expression.
8813 
8814 /// However, there is a special case when dealing with vector loads -- we allow
8815 /// more than one use if the load is a vector type.  Since the values that
8816 /// contribute to the byte ultimately come from the ExtractVectorElements of the
8817 /// Load, we don't care if the Load has uses other than ExtractVectorElements,
8818 /// because those operations are independent from the pattern to be combined.
8819 /// For vector loads, we simply care that the ByteProviders are adjacent
8820 /// positions of the same vector, and their index matches the byte that is being
8821 /// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8822 /// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8823 /// byte position we are trying to provide for the LoadCombine. If these do
8824 /// not match, then we can not combine the vector loads. \p Index uses the
8825 /// byte position we are trying to provide for and is matched against the
8826 /// shl and load size. The \p Index algorithm ensures the requested byte is
8827 /// provided for by the pattern, and the pattern does not over provide bytes.
8828 ///
8829 ///
8830 /// The supported LoadCombine pattern for vector loads is as follows
8831 ///                              or
8832 ///                          /        \
8833 ///                         or        shl
8834 ///                       /     \      |
8835 ///                     or      shl   zext
8836 ///                   /    \     |     |
8837 ///                 shl   zext  zext  EVE*
8838 ///                  |     |     |     |
8839 ///                 zext  EVE*  EVE*  LOAD
8840 ///                  |     |     |
8841 ///                 EVE*  LOAD  LOAD
8842 ///                  |
8843 ///                 LOAD
8844 ///
8845 /// *ExtractVectorElement
8846 using SDByteProvider = ByteProvider<SDNode *>;
8847 
8848 static std::optional<SDByteProvider>
8849 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
8850                       std::optional<uint64_t> VectorIndex,
8851                       unsigned StartingIndex = 0) {
8852 
8853   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8854   if (Depth == 10)
8855     return std::nullopt;
8856 
8857   // Only allow multiple uses if the instruction is a vector load (in which
8858   // case we will use the load for every ExtractVectorElement)
8859   if (Depth && !Op.hasOneUse() &&
8860       (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8861     return std::nullopt;
8862 
8863   // Fail to combine if we have encountered anything but a LOAD after handling
8864   // an ExtractVectorElement.
8865   if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8866     return std::nullopt;
8867 
8868   unsigned BitWidth = Op.getValueSizeInBits();
8869   if (BitWidth % 8 != 0)
8870     return std::nullopt;
8871   unsigned ByteWidth = BitWidth / 8;
8872   assert(Index < ByteWidth && "invalid index requested");
8873   (void) ByteWidth;
8874 
8875   switch (Op.getOpcode()) {
8876   case ISD::OR: {
8877     auto LHS =
8878         calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8879     if (!LHS)
8880       return std::nullopt;
8881     auto RHS =
8882         calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8883     if (!RHS)
8884       return std::nullopt;
8885 
8886     if (LHS->isConstantZero())
8887       return RHS;
8888     if (RHS->isConstantZero())
8889       return LHS;
8890     return std::nullopt;
8891   }
8892   case ISD::SHL: {
8893     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8894     if (!ShiftOp)
8895       return std::nullopt;
8896 
8897     uint64_t BitShift = ShiftOp->getZExtValue();
8898 
8899     if (BitShift % 8 != 0)
8900       return std::nullopt;
8901     uint64_t ByteShift = BitShift / 8;
8902 
8903     // If we are shifting by an amount greater than the index we are trying to
8904     // provide, then do not provide anything. Otherwise, subtract the index by
8905     // the amount we shifted by.
8906     return Index < ByteShift
8907                ? SDByteProvider::getConstantZero()
8908                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8909                                        Depth + 1, VectorIndex, Index);
8910   }
8911   case ISD::ANY_EXTEND:
8912   case ISD::SIGN_EXTEND:
8913   case ISD::ZERO_EXTEND: {
8914     SDValue NarrowOp = Op->getOperand(0);
8915     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8916     if (NarrowBitWidth % 8 != 0)
8917       return std::nullopt;
8918     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8919 
8920     if (Index >= NarrowByteWidth)
8921       return Op.getOpcode() == ISD::ZERO_EXTEND
8922                  ? std::optional<SDByteProvider>(
8923                        SDByteProvider::getConstantZero())
8924                  : std::nullopt;
8925     return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8926                                  StartingIndex);
8927   }
8928   case ISD::BSWAP:
8929     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8930                                  Depth + 1, VectorIndex, StartingIndex);
8931   case ISD::EXTRACT_VECTOR_ELT: {
8932     auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8933     if (!OffsetOp)
8934       return std::nullopt;
8935 
8936     VectorIndex = OffsetOp->getZExtValue();
8937 
8938     SDValue NarrowOp = Op->getOperand(0);
8939     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8940     if (NarrowBitWidth % 8 != 0)
8941       return std::nullopt;
8942     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8943     // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8944     // type, leaving the high bits undefined.
8945     if (Index >= NarrowByteWidth)
8946       return std::nullopt;
8947 
8948     // Check to see if the position of the element in the vector corresponds
8949     // with the byte we are trying to provide for. In the case of a vector of
8950     // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8951     // the element will provide a range of bytes. For example, if we have a
8952     // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8953     // 3).
8954     if (*VectorIndex * NarrowByteWidth > StartingIndex)
8955       return std::nullopt;
8956     if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8957       return std::nullopt;
8958 
8959     return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8960                                  VectorIndex, StartingIndex);
8961   }
8962   case ISD::LOAD: {
8963     auto L = cast<LoadSDNode>(Op.getNode());
8964     if (!L->isSimple() || L->isIndexed())
8965       return std::nullopt;
8966 
8967     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8968     if (NarrowBitWidth % 8 != 0)
8969       return std::nullopt;
8970     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8971 
8972     // If the width of the load does not reach byte we are trying to provide for
8973     // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8974     // question
8975     if (Index >= NarrowByteWidth)
8976       return L->getExtensionType() == ISD::ZEXTLOAD
8977                  ? std::optional<SDByteProvider>(
8978                        SDByteProvider::getConstantZero())
8979                  : std::nullopt;
8980 
8981     unsigned BPVectorIndex = VectorIndex.value_or(0U);
8982     return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8983   }
8984   }
8985 
8986   return std::nullopt;
8987 }
8988 
8989 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8990   return i;
8991 }
8992 
8993 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8994   return BW - i - 1;
8995 }
8996 
8997 // Check if the bytes offsets we are looking at match with either big or
8998 // little endian value loaded. Return true for big endian, false for little
8999 // endian, and std::nullopt if match failed.
9000 static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
9001                                        int64_t FirstOffset) {
9002   // The endian can be decided only when it is 2 bytes at least.
9003   unsigned Width = ByteOffsets.size();
9004   if (Width < 2)
9005     return std::nullopt;
9006 
9007   bool BigEndian = true, LittleEndian = true;
9008   for (unsigned i = 0; i < Width; i++) {
9009     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9010     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9011     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9012     if (!BigEndian && !LittleEndian)
9013       return std::nullopt;
9014   }
9015 
9016   assert((BigEndian != LittleEndian) && "It should be either big endian or"
9017                                         "little endian");
9018   return BigEndian;
9019 }
9020 
9021 // Look through one layer of truncate or extend.
9022 static SDValue stripTruncAndExt(SDValue Value) {
9023   switch (Value.getOpcode()) {
9024   case ISD::TRUNCATE:
9025   case ISD::ZERO_EXTEND:
9026   case ISD::SIGN_EXTEND:
9027   case ISD::ANY_EXTEND:
9028     return Value.getOperand(0);
9029   }
9030   return SDValue();
9031 }
9032 
9033 /// Match a pattern where a wide type scalar value is stored by several narrow
9034 /// stores. Fold it into a single store or a BSWAP and a store if the targets
9035 /// supports it.
9036 ///
9037 /// Assuming little endian target:
9038 ///  i8 *p = ...
9039 ///  i32 val = ...
9040 ///  p[0] = (val >> 0) & 0xFF;
9041 ///  p[1] = (val >> 8) & 0xFF;
9042 ///  p[2] = (val >> 16) & 0xFF;
9043 ///  p[3] = (val >> 24) & 0xFF;
9044 /// =>
9045 ///  *((i32)p) = val;
9046 ///
9047 ///  i8 *p = ...
9048 ///  i32 val = ...
9049 ///  p[0] = (val >> 24) & 0xFF;
9050 ///  p[1] = (val >> 16) & 0xFF;
9051 ///  p[2] = (val >> 8) & 0xFF;
9052 ///  p[3] = (val >> 0) & 0xFF;
9053 /// =>
9054 ///  *((i32)p) = BSWAP(val);
9055 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9056   // The matching looks for "store (trunc x)" patterns that appear early but are
9057   // likely to be replaced by truncating store nodes during combining.
9058   // TODO: If there is evidence that running this later would help, this
9059   //       limitation could be removed. Legality checks may need to be added
9060   //       for the created store and optional bswap/rotate.
9061   if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9062     return SDValue();
9063 
9064   // We only handle merging simple stores of 1-4 bytes.
9065   // TODO: Allow unordered atomics when wider type is legal (see D66309)
9066   EVT MemVT = N->getMemoryVT();
9067   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9068       !N->isSimple() || N->isIndexed())
9069     return SDValue();
9070 
9071   // Collect all of the stores in the chain, upto the maximum store width (i64).
9072   SDValue Chain = N->getChain();
9073   SmallVector<StoreSDNode *, 8> Stores = {N};
9074   unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9075   unsigned MaxWideNumBits = 64;
9076   unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9077   while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9078     // All stores must be the same size to ensure that we are writing all of the
9079     // bytes in the wide value.
9080     // This store should have exactly one use as a chain operand for another
9081     // store in the merging set. If there are other chain uses, then the
9082     // transform may not be safe because order of loads/stores outside of this
9083     // set may not be preserved.
9084     // TODO: We could allow multiple sizes by tracking each stored byte.
9085     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9086         Store->isIndexed() || !Store->hasOneUse())
9087       return SDValue();
9088     Stores.push_back(Store);
9089     Chain = Store->getChain();
9090     if (MaxStores < Stores.size())
9091       return SDValue();
9092   }
9093   // There is no reason to continue if we do not have at least a pair of stores.
9094   if (Stores.size() < 2)
9095     return SDValue();
9096 
9097   // Handle simple types only.
9098   LLVMContext &Context = *DAG.getContext();
9099   unsigned NumStores = Stores.size();
9100   unsigned WideNumBits = NumStores * NarrowNumBits;
9101   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9102   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
9103     return SDValue();
9104 
9105   // Check if all bytes of the source value that we are looking at are stored
9106   // to the same base address. Collect offsets from Base address into OffsetMap.
9107   SDValue SourceValue;
9108   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9109   int64_t FirstOffset = INT64_MAX;
9110   StoreSDNode *FirstStore = nullptr;
9111   std::optional<BaseIndexOffset> Base;
9112   for (auto *Store : Stores) {
9113     // All the stores store different parts of the CombinedValue. A truncate is
9114     // required to get the partial value.
9115     SDValue Trunc = Store->getValue();
9116     if (Trunc.getOpcode() != ISD::TRUNCATE)
9117       return SDValue();
9118     // Other than the first/last part, a shift operation is required to get the
9119     // offset.
9120     int64_t Offset = 0;
9121     SDValue WideVal = Trunc.getOperand(0);
9122     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9123         isa<ConstantSDNode>(WideVal.getOperand(1))) {
9124       // The shift amount must be a constant multiple of the narrow type.
9125       // It is translated to the offset address in the wide source value "y".
9126       //
9127       // x = srl y, ShiftAmtC
9128       // i8 z = trunc x
9129       // store z, ...
9130       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9131       if (ShiftAmtC % NarrowNumBits != 0)
9132         return SDValue();
9133 
9134       // Make sure we aren't reading bits that are shifted in.
9135       if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9136         return SDValue();
9137 
9138       Offset = ShiftAmtC / NarrowNumBits;
9139       WideVal = WideVal.getOperand(0);
9140     }
9141 
9142     // Stores must share the same source value with different offsets.
9143     if (!SourceValue)
9144       SourceValue = WideVal;
9145     else if (SourceValue != WideVal) {
9146       // Truncate and extends can be stripped to see if the values are related.
9147       if (stripTruncAndExt(SourceValue) != WideVal &&
9148           stripTruncAndExt(WideVal) != SourceValue)
9149         return SDValue();
9150 
9151       if (WideVal.getScalarValueSizeInBits() >
9152           SourceValue.getScalarValueSizeInBits())
9153         SourceValue = WideVal;
9154 
9155       // Give up if the source value type is smaller than the store size.
9156       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
9157         return SDValue();
9158     }
9159 
9160     // Stores must share the same base address.
9161     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
9162     int64_t ByteOffsetFromBase = 0;
9163     if (!Base)
9164       Base = Ptr;
9165     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9166       return SDValue();
9167 
9168     // Remember the first store.
9169     if (ByteOffsetFromBase < FirstOffset) {
9170       FirstStore = Store;
9171       FirstOffset = ByteOffsetFromBase;
9172     }
9173     // Map the offset in the store and the offset in the combined value, and
9174     // early return if it has been set before.
9175     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9176       return SDValue();
9177     OffsetMap[Offset] = ByteOffsetFromBase;
9178   }
9179 
9180   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9181   assert(FirstStore && "First store must be set");
9182 
9183   // Check that a store of the wide type is both allowed and fast on the target
9184   const DataLayout &Layout = DAG.getDataLayout();
9185   unsigned Fast = 0;
9186   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9187                                         *FirstStore->getMemOperand(), &Fast);
9188   if (!Allowed || !Fast)
9189     return SDValue();
9190 
9191   // Check if the pieces of the value are going to the expected places in memory
9192   // to merge the stores.
9193   auto checkOffsets = [&](bool MatchLittleEndian) {
9194     if (MatchLittleEndian) {
9195       for (unsigned i = 0; i != NumStores; ++i)
9196         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9197           return false;
9198     } else { // MatchBigEndian by reversing loop counter.
9199       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9200         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9201           return false;
9202     }
9203     return true;
9204   };
9205 
9206   // Check if the offsets line up for the native data layout of this target.
9207   bool NeedBswap = false;
9208   bool NeedRotate = false;
9209   if (!checkOffsets(Layout.isLittleEndian())) {
9210     // Special-case: check if byte offsets line up for the opposite endian.
9211     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9212       NeedBswap = true;
9213     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9214       NeedRotate = true;
9215     else
9216       return SDValue();
9217   }
9218 
9219   SDLoc DL(N);
9220   if (WideVT != SourceValue.getValueType()) {
9221     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9222            "Unexpected store value to merge");
9223     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9224   }
9225 
9226   // Before legalize we can introduce illegal bswaps/rotates which will be later
9227   // converted to an explicit bswap sequence. This way we end up with a single
9228   // store and byte shuffling instead of several stores and byte shuffling.
9229   if (NeedBswap) {
9230     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9231   } else if (NeedRotate) {
9232     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9233     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9234     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9235   }
9236 
9237   SDValue NewStore =
9238       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9239                    FirstStore->getPointerInfo(), FirstStore->getAlign());
9240 
9241   // Rely on other DAG combine rules to remove the other individual stores.
9242   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9243   return NewStore;
9244 }
9245 
9246 /// Match a pattern where a wide type scalar value is loaded by several narrow
9247 /// loads and combined by shifts and ors. Fold it into a single load or a load
9248 /// and a BSWAP if the targets supports it.
9249 ///
9250 /// Assuming little endian target:
9251 ///  i8 *a = ...
9252 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9253 /// =>
9254 ///  i32 val = *((i32)a)
9255 ///
9256 ///  i8 *a = ...
9257 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9258 /// =>
9259 ///  i32 val = BSWAP(*((i32)a))
9260 ///
9261 /// TODO: This rule matches complex patterns with OR node roots and doesn't
9262 /// interact well with the worklist mechanism. When a part of the pattern is
9263 /// updated (e.g. one of the loads) its direct users are put into the worklist,
9264 /// but the root node of the pattern which triggers the load combine is not
9265 /// necessarily a direct user of the changed node. For example, once the address
9266 /// of t28 load is reassociated load combine won't be triggered:
9267 ///             t25: i32 = add t4, Constant:i32<2>
9268 ///           t26: i64 = sign_extend t25
9269 ///        t27: i64 = add t2, t26
9270 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9271 ///     t29: i32 = zero_extend t28
9272 ///   t32: i32 = shl t29, Constant:i8<8>
9273 /// t33: i32 = or t23, t32
9274 /// As a possible fix visitLoad can check if the load can be a part of a load
9275 /// combine pattern and add corresponding OR roots to the worklist.
9276 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9277   assert(N->getOpcode() == ISD::OR &&
9278          "Can only match load combining against OR nodes");
9279 
9280   // Handles simple types only
9281   EVT VT = N->getValueType(0);
9282   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9283     return SDValue();
9284   unsigned ByteWidth = VT.getSizeInBits() / 8;
9285 
9286   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9287   auto MemoryByteOffset = [&](SDByteProvider P) {
9288     assert(P.hasSrc() && "Must be a memory byte provider");
9289     auto *Load = cast<LoadSDNode>(P.Src.value());
9290 
9291     unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9292 
9293     assert(LoadBitWidth % 8 == 0 &&
9294            "can only analyze providers for individual bytes not bit");
9295     unsigned LoadByteWidth = LoadBitWidth / 8;
9296     return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9297                              : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9298   };
9299 
9300   std::optional<BaseIndexOffset> Base;
9301   SDValue Chain;
9302 
9303   SmallPtrSet<LoadSDNode *, 8> Loads;
9304   std::optional<SDByteProvider> FirstByteProvider;
9305   int64_t FirstOffset = INT64_MAX;
9306 
9307   // Check if all the bytes of the OR we are looking at are loaded from the same
9308   // base address. Collect bytes offsets from Base address in ByteOffsets.
9309   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9310   unsigned ZeroExtendedBytes = 0;
9311   for (int i = ByteWidth - 1; i >= 0; --i) {
9312     auto P =
9313         calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9314                               /*StartingIndex*/ i);
9315     if (!P)
9316       return SDValue();
9317 
9318     if (P->isConstantZero()) {
9319       // It's OK for the N most significant bytes to be 0, we can just
9320       // zero-extend the load.
9321       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9322         return SDValue();
9323       continue;
9324     }
9325     assert(P->hasSrc() && "provenance should either be memory or zero");
9326     auto *L = cast<LoadSDNode>(P->Src.value());
9327 
9328     // All loads must share the same chain
9329     SDValue LChain = L->getChain();
9330     if (!Chain)
9331       Chain = LChain;
9332     else if (Chain != LChain)
9333       return SDValue();
9334 
9335     // Loads must share the same base address
9336     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
9337     int64_t ByteOffsetFromBase = 0;
9338 
9339     // For vector loads, the expected load combine pattern will have an
9340     // ExtractElement for each index in the vector. While each of these
9341     // ExtractElements will be accessing the same base address as determined
9342     // by the load instruction, the actual bytes they interact with will differ
9343     // due to different ExtractElement indices. To accurately determine the
9344     // byte position of an ExtractElement, we offset the base load ptr with
9345     // the index multiplied by the byte size of each element in the vector.
9346     if (L->getMemoryVT().isVector()) {
9347       unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9348       if (LoadWidthInBit % 8 != 0)
9349         return SDValue();
9350       unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9351       Ptr.addToOffset(ByteOffsetFromVector);
9352     }
9353 
9354     if (!Base)
9355       Base = Ptr;
9356 
9357     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9358       return SDValue();
9359 
9360     // Calculate the offset of the current byte from the base address
9361     ByteOffsetFromBase += MemoryByteOffset(*P);
9362     ByteOffsets[i] = ByteOffsetFromBase;
9363 
9364     // Remember the first byte load
9365     if (ByteOffsetFromBase < FirstOffset) {
9366       FirstByteProvider = P;
9367       FirstOffset = ByteOffsetFromBase;
9368     }
9369 
9370     Loads.insert(L);
9371   }
9372 
9373   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9374          "memory, so there must be at least one load which produces the value");
9375   assert(Base && "Base address of the accessed memory location must be set");
9376   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9377 
9378   bool NeedsZext = ZeroExtendedBytes > 0;
9379 
9380   EVT MemVT =
9381       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9382 
9383   if (!MemVT.isSimple())
9384     return SDValue();
9385 
9386   // Before legalize we can introduce too wide illegal loads which will be later
9387   // split into legal sized loads. This enables us to combine i64 load by i8
9388   // patterns to a couple of i32 loads on 32 bit targets.
9389   if (LegalOperations &&
9390       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
9391                             MemVT))
9392     return SDValue();
9393 
9394   // Check if the bytes of the OR we are looking at match with either big or
9395   // little endian value load
9396   std::optional<bool> IsBigEndian = isBigEndian(
9397       ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9398   if (!IsBigEndian)
9399     return SDValue();
9400 
9401   assert(FirstByteProvider && "must be set");
9402 
9403   // Ensure that the first byte is loaded from zero offset of the first load.
9404   // So the combined value can be loaded from the first load address.
9405   if (MemoryByteOffset(*FirstByteProvider) != 0)
9406     return SDValue();
9407   auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9408 
9409   // The node we are looking at matches with the pattern, check if we can
9410   // replace it with a single (possibly zero-extended) load and bswap + shift if
9411   // needed.
9412 
9413   // If the load needs byte swap check if the target supports it
9414   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9415 
9416   // Before legalize we can introduce illegal bswaps which will be later
9417   // converted to an explicit bswap sequence. This way we end up with a single
9418   // load and byte shuffling instead of several loads and byte shuffling.
9419   // We do not introduce illegal bswaps when zero-extending as this tends to
9420   // introduce too many arithmetic instructions.
9421   if (NeedsBswap && (LegalOperations || NeedsZext) &&
9422       !TLI.isOperationLegal(ISD::BSWAP, VT))
9423     return SDValue();
9424 
9425   // If we need to bswap and zero extend, we have to insert a shift. Check that
9426   // it is legal.
9427   if (NeedsBswap && NeedsZext && LegalOperations &&
9428       !TLI.isOperationLegal(ISD::SHL, VT))
9429     return SDValue();
9430 
9431   // Check that a load of the wide type is both allowed and fast on the target
9432   unsigned Fast = 0;
9433   bool Allowed =
9434       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9435                              *FirstLoad->getMemOperand(), &Fast);
9436   if (!Allowed || !Fast)
9437     return SDValue();
9438 
9439   SDValue NewLoad =
9440       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9441                      Chain, FirstLoad->getBasePtr(),
9442                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9443 
9444   // Transfer chain users from old loads to the new load.
9445   for (LoadSDNode *L : Loads)
9446     DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9447 
9448   if (!NeedsBswap)
9449     return NewLoad;
9450 
9451   SDValue ShiftedLoad =
9452       NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9453                               DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9454                                                          VT, SDLoc(N)))
9455                 : NewLoad;
9456   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9457 }
9458 
9459 // If the target has andn, bsl, or a similar bit-select instruction,
9460 // we want to unfold masked merge, with canonical pattern of:
9461 //   |        A  |  |B|
9462 //   ((x ^ y) & m) ^ y
9463 //    |  D  |
9464 // Into:
9465 //   (x & m) | (y & ~m)
9466 // If y is a constant, m is not a 'not', and the 'andn' does not work with
9467 // immediates, we unfold into a different pattern:
9468 //   ~(~x & m) & (m | y)
9469 // If x is a constant, m is a 'not', and the 'andn' does not work with
9470 // immediates, we unfold into a different pattern:
9471 //   (x | ~m) & ~(~m & ~y)
9472 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9473 //       the very least that breaks andnpd / andnps patterns, and because those
9474 //       patterns are simplified in IR and shouldn't be created in the DAG
9475 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9476   assert(N->getOpcode() == ISD::XOR);
9477 
9478   // Don't touch 'not' (i.e. where y = -1).
9479   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9480     return SDValue();
9481 
9482   EVT VT = N->getValueType(0);
9483 
9484   // There are 3 commutable operators in the pattern,
9485   // so we have to deal with 8 possible variants of the basic pattern.
9486   SDValue X, Y, M;
9487   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9488     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9489       return false;
9490     SDValue Xor = And.getOperand(XorIdx);
9491     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9492       return false;
9493     SDValue Xor0 = Xor.getOperand(0);
9494     SDValue Xor1 = Xor.getOperand(1);
9495     // Don't touch 'not' (i.e. where y = -1).
9496     if (isAllOnesOrAllOnesSplat(Xor1))
9497       return false;
9498     if (Other == Xor0)
9499       std::swap(Xor0, Xor1);
9500     if (Other != Xor1)
9501       return false;
9502     X = Xor0;
9503     Y = Xor1;
9504     M = And.getOperand(XorIdx ? 0 : 1);
9505     return true;
9506   };
9507 
9508   SDValue N0 = N->getOperand(0);
9509   SDValue N1 = N->getOperand(1);
9510   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9511       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9512     return SDValue();
9513 
9514   // Don't do anything if the mask is constant. This should not be reachable.
9515   // InstCombine should have already unfolded this pattern, and DAGCombiner
9516   // probably shouldn't produce it, too.
9517   if (isa<ConstantSDNode>(M.getNode()))
9518     return SDValue();
9519 
9520   // We can transform if the target has AndNot
9521   if (!TLI.hasAndNot(M))
9522     return SDValue();
9523 
9524   SDLoc DL(N);
9525 
9526   // If Y is a constant, check that 'andn' works with immediates. Unless M is
9527   // a bitwise not that would already allow ANDN to be used.
9528   if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9529     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9530     // If not, we need to do a bit more work to make sure andn is still used.
9531     SDValue NotX = DAG.getNOT(DL, X, VT);
9532     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9533     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9534     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9535     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9536   }
9537 
9538   // If X is a constant and M is a bitwise not, check that 'andn' works with
9539   // immediates.
9540   if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9541     assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9542     // If not, we need to do a bit more work to make sure andn is still used.
9543     SDValue NotM = M.getOperand(0);
9544     SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9545     SDValue NotY = DAG.getNOT(DL, Y, VT);
9546     SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9547     SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9548     return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9549   }
9550 
9551   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9552   SDValue NotM = DAG.getNOT(DL, M, VT);
9553   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9554 
9555   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9556 }
9557 
9558 SDValue DAGCombiner::visitXOR(SDNode *N) {
9559   SDValue N0 = N->getOperand(0);
9560   SDValue N1 = N->getOperand(1);
9561   EVT VT = N0.getValueType();
9562   SDLoc DL(N);
9563 
9564   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9565   if (N0.isUndef() && N1.isUndef())
9566     return DAG.getConstant(0, DL, VT);
9567 
9568   // fold (xor x, undef) -> undef
9569   if (N0.isUndef())
9570     return N0;
9571   if (N1.isUndef())
9572     return N1;
9573 
9574   // fold (xor c1, c2) -> c1^c2
9575   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9576     return C;
9577 
9578   // canonicalize constant to RHS
9579   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
9580       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
9581     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9582 
9583   // fold vector ops
9584   if (VT.isVector()) {
9585     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9586       return FoldedVOp;
9587 
9588     // fold (xor x, 0) -> x, vector edition
9589     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
9590       return N0;
9591   }
9592 
9593   // fold (xor x, 0) -> x
9594   if (isNullConstant(N1))
9595     return N0;
9596 
9597   if (SDValue NewSel = foldBinOpIntoSelect(N))
9598     return NewSel;
9599 
9600   // reassociate xor
9601   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9602     return RXOR;
9603 
9604   // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9605   if (SDValue SD =
9606           reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9607     return SD;
9608 
9609   // fold (a^b) -> (a|b) iff a and b share no bits.
9610   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9611       DAG.haveNoCommonBitsSet(N0, N1))
9612     return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
9613 
9614   // look for 'add-like' folds:
9615   // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9616   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9617       isMinSignedConstant(N1))
9618     if (SDValue Combined = visitADDLike(N))
9619       return Combined;
9620 
9621   // fold !(x cc y) -> (x !cc y)
9622   unsigned N0Opcode = N0.getOpcode();
9623   SDValue LHS, RHS, CC;
9624   if (TLI.isConstTrueVal(N1) &&
9625       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9626     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9627                                                LHS.getValueType());
9628     if (!LegalOperations ||
9629         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9630       switch (N0Opcode) {
9631       default:
9632         llvm_unreachable("Unhandled SetCC Equivalent!");
9633       case ISD::SETCC:
9634         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9635       case ISD::SELECT_CC:
9636         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9637                                N0.getOperand(3), NotCC);
9638       case ISD::STRICT_FSETCC:
9639       case ISD::STRICT_FSETCCS: {
9640         if (N0.hasOneUse()) {
9641           // FIXME Can we handle multiple uses? Could we token factor the chain
9642           // results from the new/old setcc?
9643           SDValue SetCC =
9644               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9645                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9646           CombineTo(N, SetCC);
9647           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9648           recursivelyDeleteUnusedNodes(N0.getNode());
9649           return SDValue(N, 0); // Return N so it doesn't get rechecked!
9650         }
9651         break;
9652       }
9653       }
9654     }
9655   }
9656 
9657   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9658   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9659       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9660     SDValue V = N0.getOperand(0);
9661     SDLoc DL0(N0);
9662     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9663                     DAG.getConstant(1, DL0, V.getValueType()));
9664     AddToWorklist(V.getNode());
9665     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9666   }
9667 
9668   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9669   // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
9670   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9671       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9672     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9673     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9674       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9675       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9676       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9677       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9678       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9679     }
9680   }
9681   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9682   // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
9683   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9684       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9685     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9686     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9687       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9688       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9689       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9690       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9691       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9692     }
9693   }
9694 
9695   // fold (not (neg x)) -> (add X, -1)
9696   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9697   // Y is a constant or the subtract has a single use.
9698   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9699       isNullConstant(N0.getOperand(0))) {
9700     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9701                        DAG.getAllOnesConstant(DL, VT));
9702   }
9703 
9704   // fold (not (add X, -1)) -> (neg X)
9705   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9706       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
9707     return DAG.getNegative(N0.getOperand(0), DL, VT);
9708   }
9709 
9710   // fold (xor (and x, y), y) -> (and (not x), y)
9711   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9712     SDValue X = N0.getOperand(0);
9713     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9714     AddToWorklist(NotX.getNode());
9715     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9716   }
9717 
9718   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9719   if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
9720     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9721     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9722     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9723       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9724       SDValue S0 = S.getOperand(0);
9725       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9726         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
9727           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9728             return DAG.getNode(ISD::ABS, DL, VT, S0);
9729     }
9730   }
9731 
9732   // fold (xor x, x) -> 0
9733   if (N0 == N1)
9734     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9735 
9736   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9737   // Here is a concrete example of this equivalence:
9738   // i16   x ==  14
9739   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
9740   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9741   //
9742   // =>
9743   //
9744   // i16     ~1      == 0b1111111111111110
9745   // i16 rol(~1, 14) == 0b1011111111111111
9746   //
9747   // Some additional tips to help conceptualize this transform:
9748   // - Try to see the operation as placing a single zero in a value of all ones.
9749   // - There exists no value for x which would allow the result to contain zero.
9750   // - Values of x larger than the bitwidth are undefined and do not require a
9751   //   consistent result.
9752   // - Pushing the zero left requires shifting one bits in from the right.
9753   // A rotate left of ~1 is a nice way of achieving the desired result.
9754   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9755       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
9756     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
9757                        N0.getOperand(1));
9758   }
9759 
9760   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
9761   if (N0Opcode == N1.getOpcode())
9762     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9763       return V;
9764 
9765   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9766     return R;
9767   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9768     return R;
9769   if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9770     return R;
9771 
9772   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
9773   if (SDValue MM = unfoldMaskedMerge(N))
9774     return MM;
9775 
9776   // Simplify the expression using non-local knowledge.
9777   if (SimplifyDemandedBits(SDValue(N, 0)))
9778     return SDValue(N, 0);
9779 
9780   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9781     return Combined;
9782 
9783   return SDValue();
9784 }
9785 
9786 /// If we have a shift-by-constant of a bitwise logic op that itself has a
9787 /// shift-by-constant operand with identical opcode, we may be able to convert
9788 /// that into 2 independent shifts followed by the logic op. This is a
9789 /// throughput improvement.
9790 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
9791   // Match a one-use bitwise logic op.
9792   SDValue LogicOp = Shift->getOperand(0);
9793   if (!LogicOp.hasOneUse())
9794     return SDValue();
9795 
9796   unsigned LogicOpcode = LogicOp.getOpcode();
9797   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9798       LogicOpcode != ISD::XOR)
9799     return SDValue();
9800 
9801   // Find a matching one-use shift by constant.
9802   unsigned ShiftOpcode = Shift->getOpcode();
9803   SDValue C1 = Shift->getOperand(1);
9804   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9805   assert(C1Node && "Expected a shift with constant operand");
9806   const APInt &C1Val = C1Node->getAPIntValue();
9807   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9808                              const APInt *&ShiftAmtVal) {
9809     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9810       return false;
9811 
9812     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9813     if (!ShiftCNode)
9814       return false;
9815 
9816     // Capture the shifted operand and shift amount value.
9817     ShiftOp = V.getOperand(0);
9818     ShiftAmtVal = &ShiftCNode->getAPIntValue();
9819 
9820     // Shift amount types do not have to match their operand type, so check that
9821     // the constants are the same width.
9822     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9823       return false;
9824 
9825     // The fold is not valid if the sum of the shift values doesn't fit in the
9826     // given shift amount type.
9827     bool Overflow = false;
9828     APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9829     if (Overflow)
9830       return false;
9831 
9832     // The fold is not valid if the sum of the shift values exceeds bitwidth.
9833     if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9834       return false;
9835 
9836     return true;
9837   };
9838 
9839   // Logic ops are commutative, so check each operand for a match.
9840   SDValue X, Y;
9841   const APInt *C0Val;
9842   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9843     Y = LogicOp.getOperand(1);
9844   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9845     Y = LogicOp.getOperand(0);
9846   else
9847     return SDValue();
9848 
9849   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9850   SDLoc DL(Shift);
9851   EVT VT = Shift->getValueType(0);
9852   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9853   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9854   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9855   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9856   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9857                      LogicOp->getFlags());
9858 }
9859 
9860 /// Handle transforms common to the three shifts, when the shift amount is a
9861 /// constant.
9862 /// We are looking for: (shift being one of shl/sra/srl)
9863 ///   shift (binop X, C0), C1
9864 /// And want to transform into:
9865 ///   binop (shift X, C1), (shift C0, C1)
9866 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9867   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9868 
9869   // Do not turn a 'not' into a regular xor.
9870   if (isBitwiseNot(N->getOperand(0)))
9871     return SDValue();
9872 
9873   // The inner binop must be one-use, since we want to replace it.
9874   SDValue LHS = N->getOperand(0);
9875   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9876     return SDValue();
9877 
9878   // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9879   if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9880     return R;
9881 
9882   // We want to pull some binops through shifts, so that we have (and (shift))
9883   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
9884   // thing happens with address calculations, so it's important to canonicalize
9885   // it.
9886   switch (LHS.getOpcode()) {
9887   default:
9888     return SDValue();
9889   case ISD::OR:
9890   case ISD::XOR:
9891   case ISD::AND:
9892     break;
9893   case ISD::ADD:
9894     if (N->getOpcode() != ISD::SHL)
9895       return SDValue(); // only shl(add) not sr[al](add).
9896     break;
9897   }
9898 
9899   // FIXME: disable this unless the input to the binop is a shift by a constant
9900   // or is copy/select. Enable this in other cases when figure out it's exactly
9901   // profitable.
9902   SDValue BinOpLHSVal = LHS.getOperand(0);
9903   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9904                             BinOpLHSVal.getOpcode() == ISD::SRA ||
9905                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
9906                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9907   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9908                         BinOpLHSVal.getOpcode() == ISD::SELECT;
9909 
9910   if (!IsShiftByConstant && !IsCopyOrSelect)
9911     return SDValue();
9912 
9913   if (IsCopyOrSelect && N->hasOneUse())
9914     return SDValue();
9915 
9916   // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9917   SDLoc DL(N);
9918   EVT VT = N->getValueType(0);
9919   if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9920           N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9921     SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9922                                    N->getOperand(1));
9923     return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9924   }
9925 
9926   return SDValue();
9927 }
9928 
9929 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9930   assert(N->getOpcode() == ISD::TRUNCATE);
9931   assert(N->getOperand(0).getOpcode() == ISD::AND);
9932 
9933   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9934   EVT TruncVT = N->getValueType(0);
9935   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9936       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9937     SDValue N01 = N->getOperand(0).getOperand(1);
9938     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9939       SDLoc DL(N);
9940       SDValue N00 = N->getOperand(0).getOperand(0);
9941       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9942       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9943       AddToWorklist(Trunc00.getNode());
9944       AddToWorklist(Trunc01.getNode());
9945       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9946     }
9947   }
9948 
9949   return SDValue();
9950 }
9951 
9952 SDValue DAGCombiner::visitRotate(SDNode *N) {
9953   SDLoc dl(N);
9954   SDValue N0 = N->getOperand(0);
9955   SDValue N1 = N->getOperand(1);
9956   EVT VT = N->getValueType(0);
9957   unsigned Bitsize = VT.getScalarSizeInBits();
9958 
9959   // fold (rot x, 0) -> x
9960   if (isNullOrNullSplat(N1))
9961     return N0;
9962 
9963   // fold (rot x, c) -> x iff (c % BitSize) == 0
9964   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9965     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9966     if (DAG.MaskedValueIsZero(N1, ModuloMask))
9967       return N0;
9968   }
9969 
9970   // fold (rot x, c) -> (rot x, c % BitSize)
9971   bool OutOfRange = false;
9972   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9973     OutOfRange |= C->getAPIntValue().uge(Bitsize);
9974     return true;
9975   };
9976   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9977     EVT AmtVT = N1.getValueType();
9978     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9979     if (SDValue Amt =
9980             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9981       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9982   }
9983 
9984   // rot i16 X, 8 --> bswap X
9985   auto *RotAmtC = isConstOrConstSplat(N1);
9986   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9987       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9988     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9989 
9990   // Simplify the operands using demanded-bits information.
9991   if (SimplifyDemandedBits(SDValue(N, 0)))
9992     return SDValue(N, 0);
9993 
9994   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9995   if (N1.getOpcode() == ISD::TRUNCATE &&
9996       N1.getOperand(0).getOpcode() == ISD::AND) {
9997     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9998       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9999   }
10000 
10001   unsigned NextOp = N0.getOpcode();
10002 
10003   // fold (rot* (rot* x, c2), c1)
10004   //   -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10005   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10006     bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10007     bool C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
10008     if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10009       EVT ShiftVT = N1.getValueType();
10010       bool SameSide = (N->getOpcode() == NextOp);
10011       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10012       SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10013       SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10014                                                  {N1, BitsizeC});
10015       SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10016                                                  {N0.getOperand(1), BitsizeC});
10017       if (Norm1 && Norm2)
10018         if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10019                 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10020           CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10021                                                      {CombinedShift, BitsizeC});
10022           SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10023               ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10024           return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10025                              CombinedShiftNorm);
10026         }
10027     }
10028   }
10029   return SDValue();
10030 }
10031 
10032 SDValue DAGCombiner::visitSHL(SDNode *N) {
10033   SDValue N0 = N->getOperand(0);
10034   SDValue N1 = N->getOperand(1);
10035   if (SDValue V = DAG.simplifyShift(N0, N1))
10036     return V;
10037 
10038   SDLoc DL(N);
10039   EVT VT = N0.getValueType();
10040   EVT ShiftVT = N1.getValueType();
10041   unsigned OpSizeInBits = VT.getScalarSizeInBits();
10042 
10043   // fold (shl c1, c2) -> c1<<c2
10044   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10045     return C;
10046 
10047   // fold vector ops
10048   if (VT.isVector()) {
10049     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10050       return FoldedVOp;
10051 
10052     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10053     // If setcc produces all-one true value then:
10054     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10055     if (N1CV && N1CV->isConstant()) {
10056       if (N0.getOpcode() == ISD::AND) {
10057         SDValue N00 = N0->getOperand(0);
10058         SDValue N01 = N0->getOperand(1);
10059         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10060 
10061         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10062             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
10063                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
10064           if (SDValue C =
10065                   DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10066             return DAG.getNode(ISD::AND, DL, VT, N00, C);
10067         }
10068       }
10069     }
10070   }
10071 
10072   if (SDValue NewSel = foldBinOpIntoSelect(N))
10073     return NewSel;
10074 
10075   // if (shl x, c) is known to be zero, return 0
10076   if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10077     return DAG.getConstant(0, DL, VT);
10078 
10079   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10080   if (N1.getOpcode() == ISD::TRUNCATE &&
10081       N1.getOperand(0).getOpcode() == ISD::AND) {
10082     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10083       return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10084   }
10085 
10086   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10087   if (N0.getOpcode() == ISD::SHL) {
10088     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10089                                           ConstantSDNode *RHS) {
10090       APInt c1 = LHS->getAPIntValue();
10091       APInt c2 = RHS->getAPIntValue();
10092       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10093       return (c1 + c2).uge(OpSizeInBits);
10094     };
10095     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10096       return DAG.getConstant(0, DL, VT);
10097 
10098     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10099                                        ConstantSDNode *RHS) {
10100       APInt c1 = LHS->getAPIntValue();
10101       APInt c2 = RHS->getAPIntValue();
10102       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10103       return (c1 + c2).ult(OpSizeInBits);
10104     };
10105     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10106       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10107       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10108     }
10109   }
10110 
10111   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10112   // For this to be valid, the second form must not preserve any of the bits
10113   // that are shifted out by the inner shift in the first form.  This means
10114   // the outer shift size must be >= the number of bits added by the ext.
10115   // As a corollary, we don't care what kind of ext it is.
10116   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10117        N0.getOpcode() == ISD::ANY_EXTEND ||
10118        N0.getOpcode() == ISD::SIGN_EXTEND) &&
10119       N0.getOperand(0).getOpcode() == ISD::SHL) {
10120     SDValue N0Op0 = N0.getOperand(0);
10121     SDValue InnerShiftAmt = N0Op0.getOperand(1);
10122     EVT InnerVT = N0Op0.getValueType();
10123     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10124 
10125     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10126                                                          ConstantSDNode *RHS) {
10127       APInt c1 = LHS->getAPIntValue();
10128       APInt c2 = RHS->getAPIntValue();
10129       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10130       return c2.uge(OpSizeInBits - InnerBitwidth) &&
10131              (c1 + c2).uge(OpSizeInBits);
10132     };
10133     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10134                                   /*AllowUndefs*/ false,
10135                                   /*AllowTypeMismatch*/ true))
10136       return DAG.getConstant(0, DL, VT);
10137 
10138     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10139                                                       ConstantSDNode *RHS) {
10140       APInt c1 = LHS->getAPIntValue();
10141       APInt c2 = RHS->getAPIntValue();
10142       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10143       return c2.uge(OpSizeInBits - InnerBitwidth) &&
10144              (c1 + c2).ult(OpSizeInBits);
10145     };
10146     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10147                                   /*AllowUndefs*/ false,
10148                                   /*AllowTypeMismatch*/ true)) {
10149       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10150       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10151       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10152       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10153     }
10154   }
10155 
10156   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10157   // Only fold this if the inner zext has no other uses to avoid increasing
10158   // the total number of instructions.
10159   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10160       N0.getOperand(0).getOpcode() == ISD::SRL) {
10161     SDValue N0Op0 = N0.getOperand(0);
10162     SDValue InnerShiftAmt = N0Op0.getOperand(1);
10163 
10164     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10165       APInt c1 = LHS->getAPIntValue();
10166       APInt c2 = RHS->getAPIntValue();
10167       zeroExtendToMatch(c1, c2);
10168       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10169     };
10170     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10171                                   /*AllowUndefs*/ false,
10172                                   /*AllowTypeMismatch*/ true)) {
10173       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10174       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10175       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10176       AddToWorklist(NewSHL.getNode());
10177       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10178     }
10179   }
10180 
10181   if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10182     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10183                                            ConstantSDNode *RHS) {
10184       const APInt &LHSC = LHS->getAPIntValue();
10185       const APInt &RHSC = RHS->getAPIntValue();
10186       return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10187              LHSC.getZExtValue() <= RHSC.getZExtValue();
10188     };
10189 
10190     // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
10191     // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10192     if (N0->getFlags().hasExact()) {
10193       if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10194                                     /*AllowUndefs*/ false,
10195                                     /*AllowTypeMismatch*/ true)) {
10196         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10197         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10198         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10199       }
10200       if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10201                                     /*AllowUndefs*/ false,
10202                                     /*AllowTypeMismatch*/ true)) {
10203         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10204         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10205         return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10206       }
10207     }
10208 
10209     // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10210     //                               (and (srl x, (sub c1, c2), MASK)
10211     // Only fold this if the inner shift has no other uses -- if it does,
10212     // folding this will increase the total number of instructions.
10213     if (N0.getOpcode() == ISD::SRL &&
10214         (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10215         TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
10216       if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10217                                     /*AllowUndefs*/ false,
10218                                     /*AllowTypeMismatch*/ true)) {
10219         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10220         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10221         SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10222         Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10223         Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10224         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10225         return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10226       }
10227       if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10228                                     /*AllowUndefs*/ false,
10229                                     /*AllowTypeMismatch*/ true)) {
10230         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10231         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10232         SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10233         Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10234         SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10235         return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10236       }
10237     }
10238   }
10239 
10240   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10241   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10242       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10243     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10244     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10245     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10246   }
10247 
10248   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10249   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10250   // Variant of version done on multiply, except mul by a power of 2 is turned
10251   // into a shift.
10252   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10253       TLI.isDesirableToCommuteWithShift(N, Level)) {
10254     SDValue N01 = N0.getOperand(1);
10255     if (SDValue Shl1 =
10256             DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10257       SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10258       AddToWorklist(Shl0.getNode());
10259       SDNodeFlags Flags;
10260       // Preserve the disjoint flag for Or.
10261       if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10262         Flags |= SDNodeFlags::Disjoint;
10263       return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10264     }
10265   }
10266 
10267   // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10268   // TODO: Add zext/add_nuw variant with suitable test coverage
10269   // TODO: Should we limit this with isLegalAddImmediate?
10270   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10271       N0.getOperand(0).getOpcode() == ISD::ADD &&
10272       N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10273       TLI.isDesirableToCommuteWithShift(N, Level)) {
10274     SDValue Add = N0.getOperand(0);
10275     SDLoc DL(N0);
10276     if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10277                                                   {Add.getOperand(1)})) {
10278       if (SDValue ShlC =
10279               DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10280         SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10281         SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10282         return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10283       }
10284     }
10285   }
10286 
10287   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10288   if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10289     SDValue N01 = N0.getOperand(1);
10290     if (SDValue Shl =
10291             DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10292       return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10293   }
10294 
10295   ConstantSDNode *N1C = isConstOrConstSplat(N1);
10296   if (N1C && !N1C->isOpaque())
10297     if (SDValue NewSHL = visitShiftByConstant(N))
10298       return NewSHL;
10299 
10300   // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10301   // target.
10302   if (((N1.getOpcode() == ISD::CTTZ &&
10303         VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10304        N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10305       N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10306       TLI.isOperationLegalOrCustom(ISD::MUL, VT)) {
10307     SDValue Y = N1.getOperand(0);
10308     SDLoc DL(N);
10309     SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10310     SDValue And =
10311         DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10312     return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10313   }
10314 
10315   if (SimplifyDemandedBits(SDValue(N, 0)))
10316     return SDValue(N, 0);
10317 
10318   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10319   if (N0.getOpcode() == ISD::VSCALE && N1C) {
10320     const APInt &C0 = N0.getConstantOperandAPInt(0);
10321     const APInt &C1 = N1C->getAPIntValue();
10322     return DAG.getVScale(DL, VT, C0 << C1);
10323   }
10324 
10325   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10326   APInt ShlVal;
10327   if (N0.getOpcode() == ISD::STEP_VECTOR &&
10328       ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10329     const APInt &C0 = N0.getConstantOperandAPInt(0);
10330     if (ShlVal.ult(C0.getBitWidth())) {
10331       APInt NewStep = C0 << ShlVal;
10332       return DAG.getStepVector(DL, VT, NewStep);
10333     }
10334   }
10335 
10336   return SDValue();
10337 }
10338 
10339 // Transform a right shift of a multiply into a multiply-high.
10340 // Examples:
10341 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10342 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10343 static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
10344                                   const TargetLowering &TLI) {
10345   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10346          "SRL or SRA node is required here!");
10347 
10348   // Check the shift amount. Proceed with the transformation if the shift
10349   // amount is constant.
10350   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10351   if (!ShiftAmtSrc)
10352     return SDValue();
10353 
10354   // The operation feeding into the shift must be a multiply.
10355   SDValue ShiftOperand = N->getOperand(0);
10356   if (ShiftOperand.getOpcode() != ISD::MUL)
10357     return SDValue();
10358 
10359   // Both operands must be equivalent extend nodes.
10360   SDValue LeftOp = ShiftOperand.getOperand(0);
10361   SDValue RightOp = ShiftOperand.getOperand(1);
10362 
10363   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10364   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10365 
10366   if (!IsSignExt && !IsZeroExt)
10367     return SDValue();
10368 
10369   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10370   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10371 
10372   // return true if U may use the lower bits of its operands
10373   auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10374     if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10375       return true;
10376     }
10377     ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10378     if (!UShiftAmtSrc) {
10379       return true;
10380     }
10381     unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10382     return UShiftAmt < NarrowVTSize;
10383   };
10384 
10385   // If the lower part of the MUL is also used and MUL_LOHI is supported
10386   // do not introduce the MULH in favor of MUL_LOHI
10387   unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10388   if (!ShiftOperand.hasOneUse() &&
10389       TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10390       llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10391     return SDValue();
10392   }
10393 
10394   SDValue MulhRightOp;
10395   if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
10396     unsigned ActiveBits = IsSignExt
10397                               ? Constant->getAPIntValue().getSignificantBits()
10398                               : Constant->getAPIntValue().getActiveBits();
10399     if (ActiveBits > NarrowVTSize)
10400       return SDValue();
10401     MulhRightOp = DAG.getConstant(
10402         Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10403         NarrowVT);
10404   } else {
10405     if (LeftOp.getOpcode() != RightOp.getOpcode())
10406       return SDValue();
10407     // Check that the two extend nodes are the same type.
10408     if (NarrowVT != RightOp.getOperand(0).getValueType())
10409       return SDValue();
10410     MulhRightOp = RightOp.getOperand(0);
10411   }
10412 
10413   EVT WideVT = LeftOp.getValueType();
10414   // Proceed with the transformation if the wide types match.
10415   assert((WideVT == RightOp.getValueType()) &&
10416          "Cannot have a multiply node with two different operand types.");
10417 
10418   // Proceed with the transformation if the wide type is twice as large
10419   // as the narrow type.
10420   if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10421     return SDValue();
10422 
10423   // Check the shift amount with the narrow type size.
10424   // Proceed with the transformation if the shift amount is the width
10425   // of the narrow type.
10426   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10427   if (ShiftAmt != NarrowVTSize)
10428     return SDValue();
10429 
10430   // If the operation feeding into the MUL is a sign extend (sext),
10431   // we use mulhs. Othewise, zero extends (zext) use mulhu.
10432   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10433 
10434   // Combine to mulh if mulh is legal/custom for the narrow type on the target
10435   // or if it is a vector type then we could transform to an acceptable type and
10436   // rely on legalization to split/combine the result.
10437   if (NarrowVT.isVector()) {
10438     EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10439     if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10440         !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10441       return SDValue();
10442   } else {
10443     if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10444       return SDValue();
10445   }
10446 
10447   SDValue Result =
10448       DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10449   bool IsSigned = N->getOpcode() == ISD::SRA;
10450   return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10451 }
10452 
10453 // fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10454 // This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10455 static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG) {
10456   unsigned Opcode = N->getOpcode();
10457   if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10458     return SDValue();
10459 
10460   SDValue N0 = N->getOperand(0);
10461   EVT VT = N->getValueType(0);
10462   SDLoc DL(N);
10463   if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10464     SDValue OldLHS = N0.getOperand(0);
10465     SDValue OldRHS = N0.getOperand(1);
10466 
10467     // If both operands are bswap/bitreverse, ignore the multiuse
10468     // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10469     if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10470       return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10471                          OldRHS.getOperand(0));
10472     }
10473 
10474     if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10475       SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10476       return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10477                          NewBitReorder);
10478     }
10479 
10480     if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10481       SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10482       return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10483                          OldRHS.getOperand(0));
10484     }
10485   }
10486   return SDValue();
10487 }
10488 
10489 SDValue DAGCombiner::visitSRA(SDNode *N) {
10490   SDValue N0 = N->getOperand(0);
10491   SDValue N1 = N->getOperand(1);
10492   if (SDValue V = DAG.simplifyShift(N0, N1))
10493     return V;
10494 
10495   SDLoc DL(N);
10496   EVT VT = N0.getValueType();
10497   unsigned OpSizeInBits = VT.getScalarSizeInBits();
10498 
10499   // fold (sra c1, c2) -> (sra c1, c2)
10500   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10501     return C;
10502 
10503   // Arithmetic shifting an all-sign-bit value is a no-op.
10504   // fold (sra 0, x) -> 0
10505   // fold (sra -1, x) -> -1
10506   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10507     return N0;
10508 
10509   // fold vector ops
10510   if (VT.isVector())
10511     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10512       return FoldedVOp;
10513 
10514   if (SDValue NewSel = foldBinOpIntoSelect(N))
10515     return NewSel;
10516 
10517   ConstantSDNode *N1C = isConstOrConstSplat(N1);
10518 
10519   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10520   // clamp (add c1, c2) to max shift.
10521   if (N0.getOpcode() == ISD::SRA) {
10522     EVT ShiftVT = N1.getValueType();
10523     EVT ShiftSVT = ShiftVT.getScalarType();
10524     SmallVector<SDValue, 16> ShiftValues;
10525 
10526     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10527       APInt c1 = LHS->getAPIntValue();
10528       APInt c2 = RHS->getAPIntValue();
10529       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10530       APInt Sum = c1 + c2;
10531       unsigned ShiftSum =
10532           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10533       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10534       return true;
10535     };
10536     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10537       SDValue ShiftValue;
10538       if (N1.getOpcode() == ISD::BUILD_VECTOR)
10539         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10540       else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10541         assert(ShiftValues.size() == 1 &&
10542                "Expected matchBinaryPredicate to return one element for "
10543                "SPLAT_VECTORs");
10544         ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10545       } else
10546         ShiftValue = ShiftValues[0];
10547       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10548     }
10549   }
10550 
10551   // fold (sra (shl X, m), (sub result_size, n))
10552   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10553   // result_size - n != m.
10554   // If truncate is free for the target sext(shl) is likely to result in better
10555   // code.
10556   if (N0.getOpcode() == ISD::SHL && N1C) {
10557     // Get the two constants of the shifts, CN0 = m, CN = n.
10558     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10559     if (N01C) {
10560       LLVMContext &Ctx = *DAG.getContext();
10561       // Determine what the truncate's result bitsize and type would be.
10562       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10563 
10564       if (VT.isVector())
10565         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10566 
10567       // Determine the residual right-shift amount.
10568       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10569 
10570       // If the shift is not a no-op (in which case this should be just a sign
10571       // extend already), the truncated to type is legal, sign_extend is legal
10572       // on that type, and the truncate to that type is both legal and free,
10573       // perform the transform.
10574       if ((ShiftAmt > 0) &&
10575           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
10576           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
10577           TLI.isTruncateFree(VT, TruncVT)) {
10578         SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
10579         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10580                                     N0.getOperand(0), Amt);
10581         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10582                                     Shift);
10583         return DAG.getNode(ISD::SIGN_EXTEND, DL,
10584                            N->getValueType(0), Trunc);
10585       }
10586     }
10587   }
10588 
10589   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10590   //   sra (add (shl X, N1C), AddC), N1C -->
10591   //   sext (add (trunc X to (width - N1C)), AddC')
10592   //   sra (sub AddC, (shl X, N1C)), N1C -->
10593   //   sext (sub AddC1',(trunc X to (width - N1C)))
10594   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10595       N0.hasOneUse()) {
10596     bool IsAdd = N0.getOpcode() == ISD::ADD;
10597     SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10598     if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10599         Shl.hasOneUse()) {
10600       // TODO: AddC does not need to be a splat.
10601       if (ConstantSDNode *AddC =
10602               isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10603         // Determine what the truncate's type would be and ask the target if
10604         // that is a free operation.
10605         LLVMContext &Ctx = *DAG.getContext();
10606         unsigned ShiftAmt = N1C->getZExtValue();
10607         EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10608         if (VT.isVector())
10609           TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10610 
10611         // TODO: The simple type check probably belongs in the default hook
10612         //       implementation and/or target-specific overrides (because
10613         //       non-simple types likely require masking when legalized), but
10614         //       that restriction may conflict with other transforms.
10615         if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10616             TLI.isTruncateFree(VT, TruncVT)) {
10617           SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10618           SDValue ShiftC =
10619               DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10620                                   TruncVT.getScalarSizeInBits()),
10621                               DL, TruncVT);
10622           SDValue Add;
10623           if (IsAdd)
10624             Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10625           else
10626             Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10627           return DAG.getSExtOrTrunc(Add, DL, VT);
10628         }
10629       }
10630     }
10631   }
10632 
10633   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10634   if (N1.getOpcode() == ISD::TRUNCATE &&
10635       N1.getOperand(0).getOpcode() == ISD::AND) {
10636     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10637       return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10638   }
10639 
10640   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10641   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10642   //      if c1 is equal to the number of bits the trunc removes
10643   // TODO - support non-uniform vector shift amounts.
10644   if (N0.getOpcode() == ISD::TRUNCATE &&
10645       (N0.getOperand(0).getOpcode() == ISD::SRL ||
10646        N0.getOperand(0).getOpcode() == ISD::SRA) &&
10647       N0.getOperand(0).hasOneUse() &&
10648       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10649     SDValue N0Op0 = N0.getOperand(0);
10650     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10651       EVT LargeVT = N0Op0.getValueType();
10652       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10653       if (LargeShift->getAPIntValue() == TruncBits) {
10654         EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10655         SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10656         Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10657                           DAG.getConstant(TruncBits, DL, LargeShiftVT));
10658         SDValue SRA =
10659             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10660         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10661       }
10662     }
10663   }
10664 
10665   // Simplify, based on bits shifted out of the LHS.
10666   if (SimplifyDemandedBits(SDValue(N, 0)))
10667     return SDValue(N, 0);
10668 
10669   // If the sign bit is known to be zero, switch this to a SRL.
10670   if (DAG.SignBitIsZero(N0))
10671     return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10672 
10673   if (N1C && !N1C->isOpaque())
10674     if (SDValue NewSRA = visitShiftByConstant(N))
10675       return NewSRA;
10676 
10677   // Try to transform this shift into a multiply-high if
10678   // it matches the appropriate pattern detected in combineShiftToMULH.
10679   if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10680     return MULH;
10681 
10682   // Attempt to convert a sra of a load into a narrower sign-extending load.
10683   if (SDValue NarrowLoad = reduceLoadWidth(N))
10684     return NarrowLoad;
10685 
10686   if (SDValue AVG = foldShiftToAvg(N))
10687     return AVG;
10688 
10689   return SDValue();
10690 }
10691 
10692 SDValue DAGCombiner::visitSRL(SDNode *N) {
10693   SDValue N0 = N->getOperand(0);
10694   SDValue N1 = N->getOperand(1);
10695   if (SDValue V = DAG.simplifyShift(N0, N1))
10696     return V;
10697 
10698   SDLoc DL(N);
10699   EVT VT = N0.getValueType();
10700   EVT ShiftVT = N1.getValueType();
10701   unsigned OpSizeInBits = VT.getScalarSizeInBits();
10702 
10703   // fold (srl c1, c2) -> c1 >>u c2
10704   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
10705     return C;
10706 
10707   // fold vector ops
10708   if (VT.isVector())
10709     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10710       return FoldedVOp;
10711 
10712   if (SDValue NewSel = foldBinOpIntoSelect(N))
10713     return NewSel;
10714 
10715   // if (srl x, c) is known to be zero, return 0
10716   ConstantSDNode *N1C = isConstOrConstSplat(N1);
10717   if (N1C &&
10718       DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10719     return DAG.getConstant(0, DL, VT);
10720 
10721   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10722   if (N0.getOpcode() == ISD::SRL) {
10723     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10724                                           ConstantSDNode *RHS) {
10725       APInt c1 = LHS->getAPIntValue();
10726       APInt c2 = RHS->getAPIntValue();
10727       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10728       return (c1 + c2).uge(OpSizeInBits);
10729     };
10730     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10731       return DAG.getConstant(0, DL, VT);
10732 
10733     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10734                                        ConstantSDNode *RHS) {
10735       APInt c1 = LHS->getAPIntValue();
10736       APInt c2 = RHS->getAPIntValue();
10737       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10738       return (c1 + c2).ult(OpSizeInBits);
10739     };
10740     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10741       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10742       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10743     }
10744   }
10745 
10746   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10747       N0.getOperand(0).getOpcode() == ISD::SRL) {
10748     SDValue InnerShift = N0.getOperand(0);
10749     // TODO - support non-uniform vector shift amounts.
10750     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10751       uint64_t c1 = N001C->getZExtValue();
10752       uint64_t c2 = N1C->getZExtValue();
10753       EVT InnerShiftVT = InnerShift.getValueType();
10754       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10755       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10756       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10757       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10758       if (c1 + OpSizeInBits == InnerShiftSize) {
10759         if (c1 + c2 >= InnerShiftSize)
10760           return DAG.getConstant(0, DL, VT);
10761         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10762         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10763                                        InnerShift.getOperand(0), NewShiftAmt);
10764         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10765       }
10766       // In the more general case, we can clear the high bits after the shift:
10767       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10768       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10769           c1 + c2 < InnerShiftSize) {
10770         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10771         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10772                                        InnerShift.getOperand(0), NewShiftAmt);
10773         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10774                                                             OpSizeInBits - c2),
10775                                        DL, InnerShiftVT);
10776         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10777         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10778       }
10779     }
10780   }
10781 
10782   // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10783   //                               (and (srl x, (sub c2, c1), MASK)
10784   if (N0.getOpcode() == ISD::SHL &&
10785       (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10786       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
10787     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10788                                            ConstantSDNode *RHS) {
10789       const APInt &LHSC = LHS->getAPIntValue();
10790       const APInt &RHSC = RHS->getAPIntValue();
10791       return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10792              LHSC.getZExtValue() <= RHSC.getZExtValue();
10793     };
10794     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10795                                   /*AllowUndefs*/ false,
10796                                   /*AllowTypeMismatch*/ true)) {
10797       SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10798       SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10799       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10800       Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10801       Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10802       SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10803       return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10804     }
10805     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10806                                   /*AllowUndefs*/ false,
10807                                   /*AllowTypeMismatch*/ true)) {
10808       SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10809       SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10810       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10811       Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10812       SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10813       return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10814     }
10815   }
10816 
10817   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10818   // TODO - support non-uniform vector shift amounts.
10819   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10820     // Shifting in all undef bits?
10821     EVT SmallVT = N0.getOperand(0).getValueType();
10822     unsigned BitSize = SmallVT.getScalarSizeInBits();
10823     if (N1C->getAPIntValue().uge(BitSize))
10824       return DAG.getUNDEF(VT);
10825 
10826     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10827       uint64_t ShiftAmt = N1C->getZExtValue();
10828       SDLoc DL0(N0);
10829       SDValue SmallShift =
10830           DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
10831                       DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
10832       AddToWorklist(SmallShift.getNode());
10833       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10834       return DAG.getNode(ISD::AND, DL, VT,
10835                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10836                          DAG.getConstant(Mask, DL, VT));
10837     }
10838   }
10839 
10840   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
10841   // bit, which is unmodified by sra.
10842   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10843     if (N0.getOpcode() == ISD::SRA)
10844       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
10845   }
10846 
10847   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit), and x has a power
10848   // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10849   if (N1C && N0.getOpcode() == ISD::CTLZ &&
10850       isPowerOf2_32(OpSizeInBits) &&
10851       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10852     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10853 
10854     // If any of the input bits are KnownOne, then the input couldn't be all
10855     // zeros, thus the result of the srl will always be zero.
10856     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10857 
10858     // If all of the bits input the to ctlz node are known to be zero, then
10859     // the result of the ctlz is "32" and the result of the shift is one.
10860     APInt UnknownBits = ~Known.Zero;
10861     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10862 
10863     // Otherwise, check to see if there is exactly one bit input to the ctlz.
10864     if (UnknownBits.isPowerOf2()) {
10865       // Okay, we know that only that the single bit specified by UnknownBits
10866       // could be set on input to the CTLZ node. If this bit is set, the SRL
10867       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10868       // to an SRL/XOR pair, which is likely to simplify more.
10869       unsigned ShAmt = UnknownBits.countr_zero();
10870       SDValue Op = N0.getOperand(0);
10871 
10872       if (ShAmt) {
10873         SDLoc DL(N0);
10874         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10875                          DAG.getShiftAmountConstant(ShAmt, VT, DL));
10876         AddToWorklist(Op.getNode());
10877       }
10878       return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
10879     }
10880   }
10881 
10882   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10883   if (N1.getOpcode() == ISD::TRUNCATE &&
10884       N1.getOperand(0).getOpcode() == ISD::AND) {
10885     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10886       return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
10887   }
10888 
10889   // fold operands of srl based on knowledge that the low bits are not
10890   // demanded.
10891   if (SimplifyDemandedBits(SDValue(N, 0)))
10892     return SDValue(N, 0);
10893 
10894   if (N1C && !N1C->isOpaque())
10895     if (SDValue NewSRL = visitShiftByConstant(N))
10896       return NewSRL;
10897 
10898   // Attempt to convert a srl of a load into a narrower zero-extending load.
10899   if (SDValue NarrowLoad = reduceLoadWidth(N))
10900     return NarrowLoad;
10901 
10902   // Here is a common situation. We want to optimize:
10903   //
10904   //   %a = ...
10905   //   %b = and i32 %a, 2
10906   //   %c = srl i32 %b, 1
10907   //   brcond i32 %c ...
10908   //
10909   // into
10910   //
10911   //   %a = ...
10912   //   %b = and %a, 2
10913   //   %c = setcc eq %b, 0
10914   //   brcond %c ...
10915   //
10916   // However when after the source operand of SRL is optimized into AND, the SRL
10917   // itself may not be optimized further. Look for it and add the BRCOND into
10918   // the worklist.
10919   //
10920   // The also tends to happen for binary operations when SimplifyDemandedBits
10921   // is involved.
10922   //
10923   // FIXME: This is unecessary if we process the DAG in topological order,
10924   // which we plan to do. This workaround can be removed once the DAG is
10925   // processed in topological order.
10926   if (N->hasOneUse()) {
10927     SDNode *User = *N->user_begin();
10928 
10929     // Look pass the truncate.
10930     if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
10931       User = *User->user_begin();
10932 
10933     if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
10934         User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
10935       AddToWorklist(User);
10936   }
10937 
10938   // Try to transform this shift into a multiply-high if
10939   // it matches the appropriate pattern detected in combineShiftToMULH.
10940   if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10941     return MULH;
10942 
10943   if (SDValue AVG = foldShiftToAvg(N))
10944     return AVG;
10945 
10946   return SDValue();
10947 }
10948 
10949 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10950   EVT VT = N->getValueType(0);
10951   SDValue N0 = N->getOperand(0);
10952   SDValue N1 = N->getOperand(1);
10953   SDValue N2 = N->getOperand(2);
10954   bool IsFSHL = N->getOpcode() == ISD::FSHL;
10955   unsigned BitWidth = VT.getScalarSizeInBits();
10956   SDLoc DL(N);
10957 
10958   // fold (fshl N0, N1, 0) -> N0
10959   // fold (fshr N0, N1, 0) -> N1
10960   if (isPowerOf2_32(BitWidth))
10961     if (DAG.MaskedValueIsZero(
10962             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10963       return IsFSHL ? N0 : N1;
10964 
10965   auto IsUndefOrZero = [](SDValue V) {
10966     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10967   };
10968 
10969   // TODO - support non-uniform vector shift amounts.
10970   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10971     EVT ShAmtTy = N2.getValueType();
10972 
10973     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10974     if (Cst->getAPIntValue().uge(BitWidth)) {
10975       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10976       return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
10977                          DAG.getConstant(RotAmt, DL, ShAmtTy));
10978     }
10979 
10980     unsigned ShAmt = Cst->getZExtValue();
10981     if (ShAmt == 0)
10982       return IsFSHL ? N0 : N1;
10983 
10984     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10985     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10986     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10987     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10988     if (IsUndefOrZero(N0))
10989       return DAG.getNode(
10990           ISD::SRL, DL, VT, N1,
10991           DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
10992     if (IsUndefOrZero(N1))
10993       return DAG.getNode(
10994           ISD::SHL, DL, VT, N0,
10995           DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
10996 
10997     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10998     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10999     // TODO - bigendian support once we have test coverage.
11000     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11001     // TODO - permit LHS EXTLOAD if extensions are shifted out.
11002     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11003         !DAG.getDataLayout().isBigEndian()) {
11004       auto *LHS = dyn_cast<LoadSDNode>(N0);
11005       auto *RHS = dyn_cast<LoadSDNode>(N1);
11006       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11007           LHS->getAddressSpace() == RHS->getAddressSpace() &&
11008           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
11009           ISD::isNON_EXTLoad(LHS)) {
11010         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11011           SDLoc DL(RHS);
11012           uint64_t PtrOff =
11013               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11014           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11015           unsigned Fast = 0;
11016           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11017                                      RHS->getAddressSpace(), NewAlign,
11018                                      RHS->getMemOperand()->getFlags(), &Fast) &&
11019               Fast) {
11020             SDValue NewPtr = DAG.getMemBasePlusOffset(
11021                 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11022             AddToWorklist(NewPtr.getNode());
11023             SDValue Load = DAG.getLoad(
11024                 VT, DL, RHS->getChain(), NewPtr,
11025                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11026                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11027             // Replace the old load's chain with the new load's chain.
11028             WorklistRemover DeadNodes(*this);
11029             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
11030             return Load;
11031           }
11032         }
11033       }
11034     }
11035   }
11036 
11037   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11038   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11039   // iff We know the shift amount is in range.
11040   // TODO: when is it worth doing SUB(BW, N2) as well?
11041   if (isPowerOf2_32(BitWidth)) {
11042     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11043     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11044       return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11045     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11046       return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11047   }
11048 
11049   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11050   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11051   // TODO: Investigate flipping this rotate if only one is legal.
11052   // If funnel shift is legal as well we might be better off avoiding
11053   // non-constant (BW - N2).
11054   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11055   if (N0 == N1 && hasOperation(RotOpc, VT))
11056     return DAG.getNode(RotOpc, DL, VT, N0, N2);
11057 
11058   // Simplify, based on bits shifted out of N0/N1.
11059   if (SimplifyDemandedBits(SDValue(N, 0)))
11060     return SDValue(N, 0);
11061 
11062   return SDValue();
11063 }
11064 
11065 SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11066   SDValue N0 = N->getOperand(0);
11067   SDValue N1 = N->getOperand(1);
11068   if (SDValue V = DAG.simplifyShift(N0, N1))
11069     return V;
11070 
11071   SDLoc DL(N);
11072   EVT VT = N0.getValueType();
11073 
11074   // fold (*shlsat c1, c2) -> c1<<c2
11075   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11076     return C;
11077 
11078   ConstantSDNode *N1C = isConstOrConstSplat(N1);
11079 
11080   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11081     // fold (sshlsat x, c) -> (shl x, c)
11082     if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11083         N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11084       return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11085 
11086     // fold (ushlsat x, c) -> (shl x, c)
11087     if (N->getOpcode() == ISD::USHLSAT && N1C &&
11088         N1C->getAPIntValue().ule(
11089             DAG.computeKnownBits(N0).countMinLeadingZeros()))
11090       return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11091   }
11092 
11093   return SDValue();
11094 }
11095 
11096 // Given a ABS node, detect the following patterns:
11097 // (ABS (SUB (EXTEND a), (EXTEND b))).
11098 // (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11099 // Generates UABD/SABD instruction.
11100 SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11101   EVT SrcVT = N->getValueType(0);
11102 
11103   if (N->getOpcode() == ISD::TRUNCATE)
11104     N = N->getOperand(0).getNode();
11105 
11106   if (N->getOpcode() != ISD::ABS)
11107     return SDValue();
11108 
11109   EVT VT = N->getValueType(0);
11110   SDValue AbsOp1 = N->getOperand(0);
11111   SDValue Op0, Op1;
11112 
11113   if (AbsOp1.getOpcode() != ISD::SUB)
11114     return SDValue();
11115 
11116   Op0 = AbsOp1.getOperand(0);
11117   Op1 = AbsOp1.getOperand(1);
11118 
11119   unsigned Opc0 = Op0.getOpcode();
11120 
11121   // Check if the operands of the sub are (zero|sign)-extended.
11122   // TODO: Should we use ValueTracking instead?
11123   if (Opc0 != Op1.getOpcode() ||
11124       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11125        Opc0 != ISD::SIGN_EXTEND_INREG)) {
11126     // fold (abs (sub nsw x, y)) -> abds(x, y)
11127     // Don't fold this for unsupported types as we lose the NSW handling.
11128     if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
11129         TLI.preferABDSToABSWithNSW(VT)) {
11130       SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11131       return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11132     }
11133     return SDValue();
11134   }
11135 
11136   EVT VT0, VT1;
11137   if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11138     VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11139     VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11140   } else {
11141     VT0 = Op0.getOperand(0).getValueType();
11142     VT1 = Op1.getOperand(0).getValueType();
11143   }
11144   unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11145 
11146   // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11147   // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11148   EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11149   if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11150       (VT1 == MaxVT || Op1->hasOneUse()) &&
11151       (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11152     SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11153                               DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11154                               DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11155     ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11156     return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11157   }
11158 
11159   // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11160   // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11161   if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11162     SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11163     return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11164   }
11165 
11166   return SDValue();
11167 }
11168 
11169 SDValue DAGCombiner::visitABS(SDNode *N) {
11170   SDValue N0 = N->getOperand(0);
11171   EVT VT = N->getValueType(0);
11172   SDLoc DL(N);
11173 
11174   // fold (abs c1) -> c2
11175   if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11176     return C;
11177   // fold (abs (abs x)) -> (abs x)
11178   if (N0.getOpcode() == ISD::ABS)
11179     return N0;
11180   // fold (abs x) -> x iff not-negative
11181   if (DAG.SignBitIsZero(N0))
11182     return N0;
11183 
11184   if (SDValue ABD = foldABSToABD(N, DL))
11185     return ABD;
11186 
11187   // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11188   // iff zero_extend/truncate are free.
11189   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11190     EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11191     if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11192         TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11193         hasOperation(ISD::ABS, ExtVT)) {
11194       return DAG.getNode(
11195           ISD::ZERO_EXTEND, DL, VT,
11196           DAG.getNode(ISD::ABS, DL, ExtVT,
11197                       DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11198     }
11199   }
11200 
11201   return SDValue();
11202 }
11203 
11204 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11205   SDValue N0 = N->getOperand(0);
11206   EVT VT = N->getValueType(0);
11207   SDLoc DL(N);
11208 
11209   // fold (bswap c1) -> c2
11210   if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11211     return C;
11212   // fold (bswap (bswap x)) -> x
11213   if (N0.getOpcode() == ISD::BSWAP)
11214     return N0.getOperand(0);
11215 
11216   // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11217   // isn't supported, it will be expanded to bswap followed by a manual reversal
11218   // of bits in each byte. By placing bswaps before bitreverse, we can remove
11219   // the two bswaps if the bitreverse gets expanded.
11220   if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11221     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11222     return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11223   }
11224 
11225   // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11226   // iff x >= bw/2 (i.e. lower half is known zero)
11227   unsigned BW = VT.getScalarSizeInBits();
11228   if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11229     auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11230     EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11231     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11232         ShAmt->getZExtValue() >= (BW / 2) &&
11233         (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11234         TLI.isTruncateFree(VT, HalfVT) &&
11235         (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11236       SDValue Res = N0.getOperand(0);
11237       if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11238         Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11239                           DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11240       Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11241       Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11242       return DAG.getZExtOrTrunc(Res, DL, VT);
11243     }
11244   }
11245 
11246   // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11247   // inverse-shift-of-bswap:
11248   // bswap (X u<< C) --> (bswap X) u>> C
11249   // bswap (X u>> C) --> (bswap X) u<< C
11250   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11251       N0.hasOneUse()) {
11252     auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11253     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11254         ShAmt->getZExtValue() % 8 == 0) {
11255       SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11256       unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11257       return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11258     }
11259   }
11260 
11261   if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11262     return V;
11263 
11264   return SDValue();
11265 }
11266 
11267 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11268   SDValue N0 = N->getOperand(0);
11269   EVT VT = N->getValueType(0);
11270   SDLoc DL(N);
11271 
11272   // fold (bitreverse c1) -> c2
11273   if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11274     return C;
11275 
11276   // fold (bitreverse (bitreverse x)) -> x
11277   if (N0.getOpcode() == ISD::BITREVERSE)
11278     return N0.getOperand(0);
11279 
11280   SDValue X, Y;
11281 
11282   // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11283   if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11284       sd_match(N, m_BitReverse(m_Srl(m_BitReverse(m_Value(X)), m_Value(Y)))))
11285     return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11286 
11287   // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11288   if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11289       sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y)))))
11290     return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11291 
11292   return SDValue();
11293 }
11294 
11295 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11296   SDValue N0 = N->getOperand(0);
11297   EVT VT = N->getValueType(0);
11298   SDLoc DL(N);
11299 
11300   // fold (ctlz c1) -> c2
11301   if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11302     return C;
11303 
11304   // If the value is known never to be zero, switch to the undef version.
11305   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11306     if (DAG.isKnownNeverZero(N0))
11307       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11308 
11309   return SDValue();
11310 }
11311 
11312 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11313   SDValue N0 = N->getOperand(0);
11314   EVT VT = N->getValueType(0);
11315   SDLoc DL(N);
11316 
11317   // fold (ctlz_zero_undef c1) -> c2
11318   if (SDValue C =
11319           DAG.FoldConstantArithmetic(ISD::CTLZ_ZERO_UNDEF, DL, VT, {N0}))
11320     return C;
11321   return SDValue();
11322 }
11323 
11324 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11325   SDValue N0 = N->getOperand(0);
11326   EVT VT = N->getValueType(0);
11327   SDLoc DL(N);
11328 
11329   // fold (cttz c1) -> c2
11330   if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11331     return C;
11332 
11333   // If the value is known never to be zero, switch to the undef version.
11334   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11335     if (DAG.isKnownNeverZero(N0))
11336       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11337 
11338   return SDValue();
11339 }
11340 
11341 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11342   SDValue N0 = N->getOperand(0);
11343   EVT VT = N->getValueType(0);
11344   SDLoc DL(N);
11345 
11346   // fold (cttz_zero_undef c1) -> c2
11347   if (SDValue C =
11348           DAG.FoldConstantArithmetic(ISD::CTTZ_ZERO_UNDEF, DL, VT, {N0}))
11349     return C;
11350   return SDValue();
11351 }
11352 
11353 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11354   SDValue N0 = N->getOperand(0);
11355   EVT VT = N->getValueType(0);
11356   unsigned NumBits = VT.getScalarSizeInBits();
11357   SDLoc DL(N);
11358 
11359   // fold (ctpop c1) -> c2
11360   if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11361     return C;
11362 
11363   // If the source is being shifted, but doesn't affect any active bits,
11364   // then we can call CTPOP on the shift source directly.
11365   if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11366     if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11367       const APInt &Amt = AmtC->getAPIntValue();
11368       if (Amt.ult(NumBits)) {
11369         KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11370         if ((N0.getOpcode() == ISD::SRL &&
11371              Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11372             (N0.getOpcode() == ISD::SHL &&
11373              Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11374           return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11375         }
11376       }
11377     }
11378   }
11379 
11380   // If the upper bits are known to be zero, then see if its profitable to
11381   // only count the lower bits.
11382   if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11383     EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11384     if (hasOperation(ISD::CTPOP, HalfVT) &&
11385         TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11386         TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11387       APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11388       if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11389         SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11390                                      DAG.getZExtOrTrunc(N0, DL, HalfVT));
11391         return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11392       }
11393     }
11394   }
11395 
11396   return SDValue();
11397 }
11398 
11399 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
11400                                          SDValue RHS, const SDNodeFlags Flags,
11401                                          const TargetLowering &TLI) {
11402   EVT VT = LHS.getValueType();
11403   if (!VT.isFloatingPoint())
11404     return false;
11405 
11406   const TargetOptions &Options = DAG.getTarget().Options;
11407 
11408   return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11409          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
11410          (Flags.hasNoNaNs() ||
11411           (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11412 }
11413 
11414 static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
11415                                        SDValue RHS, SDValue True, SDValue False,
11416                                        ISD::CondCode CC,
11417                                        const TargetLowering &TLI,
11418                                        SelectionDAG &DAG) {
11419   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11420   switch (CC) {
11421   case ISD::SETOLT:
11422   case ISD::SETOLE:
11423   case ISD::SETLT:
11424   case ISD::SETLE:
11425   case ISD::SETULT:
11426   case ISD::SETULE: {
11427     // Since it's known never nan to get here already, either fminnum or
11428     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11429     // expanded in terms of it.
11430     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11431     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11432       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11433 
11434     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11435     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11436       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11437     return SDValue();
11438   }
11439   case ISD::SETOGT:
11440   case ISD::SETOGE:
11441   case ISD::SETGT:
11442   case ISD::SETGE:
11443   case ISD::SETUGT:
11444   case ISD::SETUGE: {
11445     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11446     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11447       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11448 
11449     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11450     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11451       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11452     return SDValue();
11453   }
11454   default:
11455     return SDValue();
11456   }
11457 }
11458 
11459 SDValue DAGCombiner::foldShiftToAvg(SDNode *N) {
11460   const unsigned Opcode = N->getOpcode();
11461 
11462   // Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11463   if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11464     return SDValue();
11465 
11466   unsigned FloorISD = 0;
11467   auto VT = N->getValueType(0);
11468   bool IsUnsigned = false;
11469 
11470   // Decide wether signed or unsigned.
11471   switch (Opcode) {
11472   case ISD::SRA:
11473     if (!hasOperation(ISD::AVGFLOORS, VT))
11474       return SDValue();
11475     FloorISD = ISD::AVGFLOORS;
11476     break;
11477   case ISD::SRL:
11478     IsUnsigned = true;
11479     if (!hasOperation(ISD::AVGFLOORU, VT))
11480       return SDValue();
11481     FloorISD = ISD::AVGFLOORU;
11482     break;
11483   default:
11484     return SDValue();
11485   }
11486 
11487   // Captured values.
11488   SDValue A, B, Add;
11489 
11490   // Match floor average as it is common to both floor/ceil avgs.
11491   if (!sd_match(N, m_BinOp(Opcode,
11492                            m_AllOf(m_Value(Add), m_Add(m_Value(A), m_Value(B))),
11493                            m_One())))
11494     return SDValue();
11495 
11496   // Can't optimize adds that may wrap.
11497   if (IsUnsigned && !Add->getFlags().hasNoUnsignedWrap())
11498     return SDValue();
11499 
11500   if (!IsUnsigned && !Add->getFlags().hasNoSignedWrap())
11501     return SDValue();
11502 
11503   return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
11504 }
11505 
11506 /// Generate Min/Max node
11507 SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11508                                          SDValue RHS, SDValue True,
11509                                          SDValue False, ISD::CondCode CC) {
11510   if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11511     return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11512 
11513   // If we can't directly match this, try to see if we can pull an fneg out of
11514   // the select.
11515   SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression(
11516       True, DAG, LegalOperations, ForCodeSize);
11517   if (!NegTrue)
11518     return SDValue();
11519 
11520   HandleSDNode NegTrueHandle(NegTrue);
11521 
11522   // Try to unfold an fneg from the select if we are comparing the negated
11523   // constant.
11524   //
11525   // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11526   //
11527   // TODO: Handle fabs
11528   if (LHS == NegTrue) {
11529     // If we can't directly match this, try to see if we can pull an fneg out of
11530     // the select.
11531     SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression(
11532         RHS, DAG, LegalOperations, ForCodeSize);
11533     if (NegRHS) {
11534       HandleSDNode NegRHSHandle(NegRHS);
11535       if (NegRHS == False) {
11536         SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11537                                                    False, CC, TLI, DAG);
11538         if (Combined)
11539           return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11540       }
11541     }
11542   }
11543 
11544   return SDValue();
11545 }
11546 
11547 /// If a (v)select has a condition value that is a sign-bit test, try to smear
11548 /// the condition operand sign-bit across the value width and use it as a mask.
11549 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL,
11550                                              SelectionDAG &DAG) {
11551   SDValue Cond = N->getOperand(0);
11552   SDValue C1 = N->getOperand(1);
11553   SDValue C2 = N->getOperand(2);
11554   if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
11555     return SDValue();
11556 
11557   EVT VT = N->getValueType(0);
11558   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11559       VT != Cond.getOperand(0).getValueType())
11560     return SDValue();
11561 
11562   // The inverted-condition + commuted-select variants of these patterns are
11563   // canonicalized to these forms in IR.
11564   SDValue X = Cond.getOperand(0);
11565   SDValue CondC = Cond.getOperand(1);
11566   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11567   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11568       isAllOnesOrAllOnesSplat(C2)) {
11569     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11570     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11571     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11572     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11573   }
11574   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11575     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11576     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11577     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11578     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11579   }
11580   return SDValue();
11581 }
11582 
11583 static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT,
11584                                                  const TargetLowering &TLI) {
11585   if (!TLI.convertSelectOfConstantsToMath(VT))
11586     return false;
11587 
11588   if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11589     return true;
11590   if (!TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
11591     return true;
11592 
11593   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11594   if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11595     return true;
11596   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11597     return true;
11598 
11599   return false;
11600 }
11601 
11602 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11603   SDValue Cond = N->getOperand(0);
11604   SDValue N1 = N->getOperand(1);
11605   SDValue N2 = N->getOperand(2);
11606   EVT VT = N->getValueType(0);
11607   EVT CondVT = Cond.getValueType();
11608   SDLoc DL(N);
11609 
11610   if (!VT.isInteger())
11611     return SDValue();
11612 
11613   auto *C1 = dyn_cast<ConstantSDNode>(N1);
11614   auto *C2 = dyn_cast<ConstantSDNode>(N2);
11615   if (!C1 || !C2)
11616     return SDValue();
11617 
11618   if (CondVT != MVT::i1 || LegalOperations) {
11619     // fold (select Cond, 0, 1) -> (xor Cond, 1)
11620     // We can't do this reliably if integer based booleans have different contents
11621     // to floating point based booleans. This is because we can't tell whether we
11622     // have an integer-based boolean or a floating-point-based boolean unless we
11623     // can find the SETCC that produced it and inspect its operands. This is
11624     // fairly easy if C is the SETCC node, but it can potentially be
11625     // undiscoverable (or not reasonably discoverable). For example, it could be
11626     // in another basic block or it could require searching a complicated
11627     // expression.
11628     if (CondVT.isInteger() &&
11629         TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11630             TargetLowering::ZeroOrOneBooleanContent &&
11631         TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11632             TargetLowering::ZeroOrOneBooleanContent &&
11633         C1->isZero() && C2->isOne()) {
11634       SDValue NotCond =
11635           DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11636       if (VT.bitsEq(CondVT))
11637         return NotCond;
11638       return DAG.getZExtOrTrunc(NotCond, DL, VT);
11639     }
11640 
11641     return SDValue();
11642   }
11643 
11644   // Only do this before legalization to avoid conflicting with target-specific
11645   // transforms in the other direction (create a select from a zext/sext). There
11646   // is also a target-independent combine here in DAGCombiner in the other
11647   // direction for (select Cond, -1, 0) when the condition is not i1.
11648   assert(CondVT == MVT::i1 && !LegalOperations);
11649 
11650   // select Cond, 1, 0 --> zext (Cond)
11651   if (C1->isOne() && C2->isZero())
11652     return DAG.getZExtOrTrunc(Cond, DL, VT);
11653 
11654   // select Cond, -1, 0 --> sext (Cond)
11655   if (C1->isAllOnes() && C2->isZero())
11656     return DAG.getSExtOrTrunc(Cond, DL, VT);
11657 
11658   // select Cond, 0, 1 --> zext (!Cond)
11659   if (C1->isZero() && C2->isOne()) {
11660     SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11661     NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11662     return NotCond;
11663   }
11664 
11665   // select Cond, 0, -1 --> sext (!Cond)
11666   if (C1->isZero() && C2->isAllOnes()) {
11667     SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11668     NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11669     return NotCond;
11670   }
11671 
11672   // Use a target hook because some targets may prefer to transform in the
11673   // other direction.
11674   if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI))
11675     return SDValue();
11676 
11677   // For any constants that differ by 1, we can transform the select into
11678   // an extend and add.
11679   const APInt &C1Val = C1->getAPIntValue();
11680   const APInt &C2Val = C2->getAPIntValue();
11681 
11682   // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11683   if (C1Val - 1 == C2Val) {
11684     Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11685     return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11686   }
11687 
11688   // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11689   if (C1Val + 1 == C2Val) {
11690     Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11691     return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11692   }
11693 
11694   // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11695   if (C1Val.isPowerOf2() && C2Val.isZero()) {
11696     Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11697     SDValue ShAmtC =
11698         DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11699     return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11700   }
11701 
11702   // select Cond, -1, C --> or (sext Cond), C
11703   if (C1->isAllOnes()) {
11704     Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11705     return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11706   }
11707 
11708   // select Cond, C, -1 --> or (sext (not Cond)), C
11709   if (C2->isAllOnes()) {
11710     SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11711     NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11712     return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11713   }
11714 
11715   if (SDValue V = foldSelectOfConstantsUsingSra(N, DL, DAG))
11716     return V;
11717 
11718   return SDValue();
11719 }
11720 
11721 template <class MatchContextClass>
11722 static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
11723                                      SelectionDAG &DAG) {
11724   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11725           N->getOpcode() == ISD::VP_SELECT) &&
11726          "Expected a (v)(vp.)select");
11727   SDValue Cond = N->getOperand(0);
11728   SDValue T = N->getOperand(1), F = N->getOperand(2);
11729   EVT VT = N->getValueType(0);
11730   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11731   MatchContextClass matcher(DAG, TLI, N);
11732 
11733   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11734     return SDValue();
11735 
11736   // select Cond, Cond, F --> or Cond, freeze(F)
11737   // select Cond, 1, F    --> or Cond, freeze(F)
11738   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11739     return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
11740 
11741   // select Cond, T, Cond --> and Cond, freeze(T)
11742   // select Cond, T, 0    --> and Cond, freeze(T)
11743   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11744     return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
11745 
11746   // select Cond, T, 1 --> or (not Cond), freeze(T)
11747   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11748     SDValue NotCond =
11749         matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11750     return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
11751   }
11752 
11753   // select Cond, 0, F --> and (not Cond), freeze(F)
11754   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11755     SDValue NotCond =
11756         matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11757     return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
11758   }
11759 
11760   return SDValue();
11761 }
11762 
11763 static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
11764   SDValue N0 = N->getOperand(0);
11765   SDValue N1 = N->getOperand(1);
11766   SDValue N2 = N->getOperand(2);
11767   EVT VT = N->getValueType(0);
11768   unsigned EltSizeInBits = VT.getScalarSizeInBits();
11769 
11770   SDValue Cond0, Cond1;
11771   ISD::CondCode CC;
11772   if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
11773                                      m_CondCode(CC)))) ||
11774       VT != Cond0.getValueType())
11775     return SDValue();
11776 
11777   // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11778   // compare is inverted from that pattern ("Cond0 s> -1").
11779   if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11780     ; // This is the pattern we are looking for.
11781   else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11782     std::swap(N1, N2);
11783   else
11784     return SDValue();
11785 
11786   // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
11787   if (isNullOrNullSplat(N2)) {
11788     SDLoc DL(N);
11789     SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11790     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11791     return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
11792   }
11793 
11794   // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
11795   if (isAllOnesOrAllOnesSplat(N1)) {
11796     SDLoc DL(N);
11797     SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11798     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11799     return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
11800   }
11801 
11802   // If we have to invert the sign bit mask, only do that transform if the
11803   // target has a bitwise 'and not' instruction (the invert is free).
11804   // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
11805   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11806   if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11807     SDLoc DL(N);
11808     SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11809     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11810     SDValue Not = DAG.getNOT(DL, Sra, VT);
11811     return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
11812   }
11813 
11814   // TODO: There's another pattern in this family, but it may require
11815   //       implementing hasOrNot() to check for profitability:
11816   //       (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
11817 
11818   return SDValue();
11819 }
11820 
11821 // Match SELECTs with absolute difference patterns.
11822 // (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
11823 // (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
11824 // (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
11825 // (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
11826 SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
11827                                      SDValue False, ISD::CondCode CC,
11828                                      const SDLoc &DL) {
11829   bool IsSigned = isSignedIntSetCC(CC);
11830   unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
11831   EVT VT = LHS.getValueType();
11832 
11833   if (LegalOperations && !hasOperation(ABDOpc, VT))
11834     return SDValue();
11835 
11836   switch (CC) {
11837   case ISD::SETGT:
11838   case ISD::SETGE:
11839   case ISD::SETUGT:
11840   case ISD::SETUGE:
11841     if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11842         sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
11843       return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11844     if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11845         sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11846         hasOperation(ABDOpc, VT))
11847       return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
11848     break;
11849   case ISD::SETLT:
11850   case ISD::SETLE:
11851   case ISD::SETULT:
11852   case ISD::SETULE:
11853     if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11854         sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
11855       return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11856     if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11857         sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11858         hasOperation(ABDOpc, VT))
11859       return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
11860     break;
11861   default:
11862     break;
11863   }
11864 
11865   return SDValue();
11866 }
11867 
11868 SDValue DAGCombiner::visitSELECT(SDNode *N) {
11869   SDValue N0 = N->getOperand(0);
11870   SDValue N1 = N->getOperand(1);
11871   SDValue N2 = N->getOperand(2);
11872   EVT VT = N->getValueType(0);
11873   EVT VT0 = N0.getValueType();
11874   SDLoc DL(N);
11875   SDNodeFlags Flags = N->getFlags();
11876 
11877   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11878     return V;
11879 
11880   if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
11881     return V;
11882 
11883   // select (not Cond), N1, N2 -> select Cond, N2, N1
11884   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11885     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11886     SelectOp->setFlags(Flags);
11887     return SelectOp;
11888   }
11889 
11890   if (SDValue V = foldSelectOfConstants(N))
11891     return V;
11892 
11893   // If we can fold this based on the true/false value, do so.
11894   if (SimplifySelectOps(N, N1, N2))
11895     return SDValue(N, 0); // Don't revisit N.
11896 
11897   if (VT0 == MVT::i1) {
11898     // The code in this block deals with the following 2 equivalences:
11899     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11900     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11901     // The target can specify its preferred form with the
11902     // shouldNormalizeToSelectSequence() callback. However we always transform
11903     // to the right anyway if we find the inner select exists in the DAG anyway
11904     // and we always transform to the left side if we know that we can further
11905     // optimize the combination of the conditions.
11906     bool normalizeToSequence =
11907         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
11908     // select (and Cond0, Cond1), X, Y
11909     //   -> select Cond0, (select Cond1, X, Y), Y
11910     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11911       SDValue Cond0 = N0->getOperand(0);
11912       SDValue Cond1 = N0->getOperand(1);
11913       SDValue InnerSelect =
11914           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11915       if (normalizeToSequence || !InnerSelect.use_empty())
11916         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11917                            InnerSelect, N2, Flags);
11918       // Cleanup on failure.
11919       if (InnerSelect.use_empty())
11920         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11921     }
11922     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11923     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11924       SDValue Cond0 = N0->getOperand(0);
11925       SDValue Cond1 = N0->getOperand(1);
11926       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11927                                         Cond1, N1, N2, Flags);
11928       if (normalizeToSequence || !InnerSelect.use_empty())
11929         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11930                            InnerSelect, Flags);
11931       // Cleanup on failure.
11932       if (InnerSelect.use_empty())
11933         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11934     }
11935 
11936     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11937     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11938       SDValue N1_0 = N1->getOperand(0);
11939       SDValue N1_1 = N1->getOperand(1);
11940       SDValue N1_2 = N1->getOperand(2);
11941       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11942         // Create the actual and node if we can generate good code for it.
11943         if (!normalizeToSequence) {
11944           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11945           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11946                              N2, Flags);
11947         }
11948         // Otherwise see if we can optimize the "and" to a better pattern.
11949         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11950           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11951                              N2, Flags);
11952         }
11953       }
11954     }
11955     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11956     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11957       SDValue N2_0 = N2->getOperand(0);
11958       SDValue N2_1 = N2->getOperand(1);
11959       SDValue N2_2 = N2->getOperand(2);
11960       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11961         // Create the actual or node if we can generate good code for it.
11962         if (!normalizeToSequence) {
11963           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11964           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11965                              N2_2, Flags);
11966         }
11967         // Otherwise see if we can optimize to a better pattern.
11968         if (SDValue Combined = visitORLike(N0, N2_0, DL))
11969           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11970                              N2_2, Flags);
11971       }
11972     }
11973 
11974     // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
11975     if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
11976         N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
11977         N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
11978         N2.getOperand(1) == N1.getOperand(0) &&
11979         (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
11980       return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
11981 
11982     // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
11983     if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
11984         N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
11985         N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
11986         N2.getOperand(1) == N1.getOperand(0) &&
11987         (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
11988       return DAG.getNegative(
11989           DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
11990           DL, VT);
11991   }
11992 
11993   // Fold selects based on a setcc into other things, such as min/max/abs.
11994   if (N0.getOpcode() == ISD::SETCC) {
11995     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11996     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11997 
11998     // select (fcmp lt x, y), x, y -> fminnum x, y
11999     // select (fcmp gt x, y), x, y -> fmaxnum x, y
12000     //
12001     // This is OK if we don't care what happens if either operand is a NaN.
12002     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12003       if (SDValue FMinMax =
12004               combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12005         return FMinMax;
12006 
12007     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12008     // This is conservatively limited to pre-legal-operations to give targets
12009     // a chance to reverse the transform if they want to do that. Also, it is
12010     // unlikely that the pattern would be formed late, so it's probably not
12011     // worth going through the other checks.
12012     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12013         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12014         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12015       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12016       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12017       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12018         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12019         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12020         //
12021         // The IR equivalent of this transform would have this form:
12022         //   %a = add %x, C
12023         //   %c = icmp ugt %x, ~C
12024         //   %r = select %c, -1, %a
12025         //   =>
12026         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12027         //   %u0 = extractvalue %u, 0
12028         //   %u1 = extractvalue %u, 1
12029         //   %r = select %u1, -1, %u0
12030         SDVTList VTs = DAG.getVTList(VT, VT0);
12031         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12032         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12033       }
12034     }
12035 
12036     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12037         (!LegalOperations &&
12038          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
12039       // Any flags available in a select/setcc fold will be on the setcc as they
12040       // migrated from fcmp
12041       Flags = N0->getFlags();
12042       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
12043                                        N2, N0.getOperand(2));
12044       SelectNode->setFlags(Flags);
12045       return SelectNode;
12046     }
12047 
12048     if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12049       return ABD;
12050 
12051     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12052       return NewSel;
12053   }
12054 
12055   if (!VT.isVector())
12056     if (SDValue BinOp = foldSelectOfBinops(N))
12057       return BinOp;
12058 
12059   if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12060     return R;
12061 
12062   return SDValue();
12063 }
12064 
12065 // This function assumes all the vselect's arguments are CONCAT_VECTOR
12066 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12067 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
12068   SDLoc DL(N);
12069   SDValue Cond = N->getOperand(0);
12070   SDValue LHS = N->getOperand(1);
12071   SDValue RHS = N->getOperand(2);
12072   EVT VT = N->getValueType(0);
12073   int NumElems = VT.getVectorNumElements();
12074   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12075          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12076          Cond.getOpcode() == ISD::BUILD_VECTOR);
12077 
12078   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12079   // binary ones here.
12080   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12081     return SDValue();
12082 
12083   // We're sure we have an even number of elements due to the
12084   // concat_vectors we have as arguments to vselect.
12085   // Skip BV elements until we find one that's not an UNDEF
12086   // After we find an UNDEF element, keep looping until we get to half the
12087   // length of the BV and see if all the non-undef nodes are the same.
12088   ConstantSDNode *BottomHalf = nullptr;
12089   for (int i = 0; i < NumElems / 2; ++i) {
12090     if (Cond->getOperand(i)->isUndef())
12091       continue;
12092 
12093     if (BottomHalf == nullptr)
12094       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12095     else if (Cond->getOperand(i).getNode() != BottomHalf)
12096       return SDValue();
12097   }
12098 
12099   // Do the same for the second half of the BuildVector
12100   ConstantSDNode *TopHalf = nullptr;
12101   for (int i = NumElems / 2; i < NumElems; ++i) {
12102     if (Cond->getOperand(i)->isUndef())
12103       continue;
12104 
12105     if (TopHalf == nullptr)
12106       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12107     else if (Cond->getOperand(i).getNode() != TopHalf)
12108       return SDValue();
12109   }
12110 
12111   assert(TopHalf && BottomHalf &&
12112          "One half of the selector was all UNDEFs and the other was all the "
12113          "same value. This should have been addressed before this function.");
12114   return DAG.getNode(
12115       ISD::CONCAT_VECTORS, DL, VT,
12116       BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12117       TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12118 }
12119 
12120 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12121                        SelectionDAG &DAG, const SDLoc &DL) {
12122 
12123   // Only perform the transformation when existing operands can be reused.
12124   if (IndexIsScaled)
12125     return false;
12126 
12127   if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12128     return false;
12129 
12130   EVT VT = BasePtr.getValueType();
12131 
12132   if (SDValue SplatVal = DAG.getSplatValue(Index);
12133       SplatVal && !isNullConstant(SplatVal) &&
12134       SplatVal.getValueType() == VT) {
12135     BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12136     Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12137     return true;
12138   }
12139 
12140   if (Index.getOpcode() != ISD::ADD)
12141     return false;
12142 
12143   if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12144       SplatVal && SplatVal.getValueType() == VT) {
12145     BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12146     Index = Index.getOperand(1);
12147     return true;
12148   }
12149   if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12150       SplatVal && SplatVal.getValueType() == VT) {
12151     BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12152     Index = Index.getOperand(0);
12153     return true;
12154   }
12155   return false;
12156 }
12157 
12158 // Fold sext/zext of index into index type.
12159 bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12160                      SelectionDAG &DAG) {
12161   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12162 
12163   // It's always safe to look through zero extends.
12164   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12165     if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12166       IndexType = ISD::UNSIGNED_SCALED;
12167       Index = Index.getOperand(0);
12168       return true;
12169     }
12170     if (ISD::isIndexTypeSigned(IndexType)) {
12171       IndexType = ISD::UNSIGNED_SCALED;
12172       return true;
12173     }
12174   }
12175 
12176   // It's only safe to look through sign extends when Index is signed.
12177   if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12178       ISD::isIndexTypeSigned(IndexType) &&
12179       TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12180     Index = Index.getOperand(0);
12181     return true;
12182   }
12183 
12184   return false;
12185 }
12186 
12187 SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12188   VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12189   SDValue Mask = MSC->getMask();
12190   SDValue Chain = MSC->getChain();
12191   SDValue Index = MSC->getIndex();
12192   SDValue Scale = MSC->getScale();
12193   SDValue StoreVal = MSC->getValue();
12194   SDValue BasePtr = MSC->getBasePtr();
12195   SDValue VL = MSC->getVectorLength();
12196   ISD::MemIndexType IndexType = MSC->getIndexType();
12197   SDLoc DL(N);
12198 
12199   // Zap scatters with a zero mask.
12200   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
12201     return Chain;
12202 
12203   if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12204     SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12205     return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12206                             DL, Ops, MSC->getMemOperand(), IndexType);
12207   }
12208 
12209   if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12210     SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12211     return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12212                             DL, Ops, MSC->getMemOperand(), IndexType);
12213   }
12214 
12215   return SDValue();
12216 }
12217 
12218 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12219   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12220   SDValue Mask = MSC->getMask();
12221   SDValue Chain = MSC->getChain();
12222   SDValue Index = MSC->getIndex();
12223   SDValue Scale = MSC->getScale();
12224   SDValue StoreVal = MSC->getValue();
12225   SDValue BasePtr = MSC->getBasePtr();
12226   ISD::MemIndexType IndexType = MSC->getIndexType();
12227   SDLoc DL(N);
12228 
12229   // Zap scatters with a zero mask.
12230   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
12231     return Chain;
12232 
12233   if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12234     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12235     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12236                                 DL, Ops, MSC->getMemOperand(), IndexType,
12237                                 MSC->isTruncatingStore());
12238   }
12239 
12240   if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12241     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12242     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12243                                 DL, Ops, MSC->getMemOperand(), IndexType,
12244                                 MSC->isTruncatingStore());
12245   }
12246 
12247   return SDValue();
12248 }
12249 
12250 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12251   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12252   SDValue Mask = MST->getMask();
12253   SDValue Chain = MST->getChain();
12254   SDValue Value = MST->getValue();
12255   SDValue Ptr = MST->getBasePtr();
12256   SDLoc DL(N);
12257 
12258   // Zap masked stores with a zero mask.
12259   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
12260     return Chain;
12261 
12262   // Remove a masked store if base pointers and masks are equal.
12263   if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12264     if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12265         MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12266         !MST->getBasePtr().isUndef() &&
12267         ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12268                                          MST1->getMemoryVT().getStoreSize()) ||
12269          ISD::isConstantSplatVectorAllOnes(Mask.getNode())) &&
12270         TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12271                             MST->getMemoryVT().getStoreSize())) {
12272       CombineTo(MST1, MST1->getChain());
12273       if (N->getOpcode() != ISD::DELETED_NODE)
12274         AddToWorklist(N);
12275       return SDValue(N, 0);
12276     }
12277   }
12278 
12279   // If this is a masked load with an all ones mask, we can use a unmasked load.
12280   // FIXME: Can we do this for indexed, compressing, or truncating stores?
12281   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12282       !MST->isCompressingStore() && !MST->isTruncatingStore())
12283     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12284                         MST->getBasePtr(), MST->getPointerInfo(),
12285                         MST->getOriginalAlign(),
12286                         MST->getMemOperand()->getFlags(), MST->getAAInfo());
12287 
12288   // Try transforming N to an indexed store.
12289   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12290     return SDValue(N, 0);
12291 
12292   if (MST->isTruncatingStore() && MST->isUnindexed() &&
12293       Value.getValueType().isInteger() &&
12294       (!isa<ConstantSDNode>(Value) ||
12295        !cast<ConstantSDNode>(Value)->isOpaque())) {
12296     APInt TruncDemandedBits =
12297         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12298                              MST->getMemoryVT().getScalarSizeInBits());
12299 
12300     // See if we can simplify the operation with
12301     // SimplifyDemandedBits, which only works if the value has a single use.
12302     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12303       // Re-visit the store if anything changed and the store hasn't been merged
12304       // with another node (N is deleted) SimplifyDemandedBits will add Value's
12305       // node back to the worklist if necessary, but we also need to re-visit
12306       // the Store node itself.
12307       if (N->getOpcode() != ISD::DELETED_NODE)
12308         AddToWorklist(N);
12309       return SDValue(N, 0);
12310     }
12311   }
12312 
12313   // If this is a TRUNC followed by a masked store, fold this into a masked
12314   // truncating store.  We can do this even if this is already a masked
12315   // truncstore.
12316   // TODO: Try combine to masked compress store if possiable.
12317   if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12318       MST->isUnindexed() && !MST->isCompressingStore() &&
12319       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12320                                MST->getMemoryVT(), LegalOperations)) {
12321     auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12322                                          Value.getOperand(0).getValueType());
12323     return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12324                               MST->getOffset(), Mask, MST->getMemoryVT(),
12325                               MST->getMemOperand(), MST->getAddressingMode(),
12326                               /*IsTruncating=*/true);
12327   }
12328 
12329   return SDValue();
12330 }
12331 
12332 SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12333   auto *SST = cast<VPStridedStoreSDNode>(N);
12334   EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12335   // Combine strided stores with unit-stride to a regular VP store.
12336   if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12337       CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12338     return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12339                           SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12340                           SST->getVectorLength(), SST->getMemoryVT(),
12341                           SST->getMemOperand(), SST->getAddressingMode(),
12342                           SST->isTruncatingStore(), SST->isCompressingStore());
12343   }
12344   return SDValue();
12345 }
12346 
12347 SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12348   SDLoc DL(N);
12349   SDValue Vec = N->getOperand(0);
12350   SDValue Mask = N->getOperand(1);
12351   SDValue Passthru = N->getOperand(2);
12352   EVT VecVT = Vec.getValueType();
12353 
12354   bool HasPassthru = !Passthru.isUndef();
12355 
12356   APInt SplatVal;
12357   if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12358     return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12359 
12360   if (Vec.isUndef() || Mask.isUndef())
12361     return Passthru;
12362 
12363   // No need for potentially expensive compress if the mask is constant.
12364   if (ISD::isBuildVectorOfConstantSDNodes(Mask.getNode())) {
12365     SmallVector<SDValue, 16> Ops;
12366     EVT ScalarVT = VecVT.getVectorElementType();
12367     unsigned NumSelected = 0;
12368     unsigned NumElmts = VecVT.getVectorNumElements();
12369     for (unsigned I = 0; I < NumElmts; ++I) {
12370       SDValue MaskI = Mask.getOperand(I);
12371       // We treat undef mask entries as "false".
12372       if (MaskI.isUndef())
12373         continue;
12374 
12375       if (TLI.isConstTrueVal(MaskI)) {
12376         SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12377                                    DAG.getVectorIdxConstant(I, DL));
12378         Ops.push_back(VecI);
12379         NumSelected++;
12380       }
12381     }
12382     for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12383       SDValue Val =
12384           HasPassthru
12385               ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12386                             DAG.getVectorIdxConstant(Rest, DL))
12387               : DAG.getUNDEF(ScalarVT);
12388       Ops.push_back(Val);
12389     }
12390     return DAG.getBuildVector(VecVT, DL, Ops);
12391   }
12392 
12393   return SDValue();
12394 }
12395 
12396 SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12397   VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12398   SDValue Mask = MGT->getMask();
12399   SDValue Chain = MGT->getChain();
12400   SDValue Index = MGT->getIndex();
12401   SDValue Scale = MGT->getScale();
12402   SDValue BasePtr = MGT->getBasePtr();
12403   SDValue VL = MGT->getVectorLength();
12404   ISD::MemIndexType IndexType = MGT->getIndexType();
12405   SDLoc DL(N);
12406 
12407   if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12408     SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12409     return DAG.getGatherVP(
12410         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12411         Ops, MGT->getMemOperand(), IndexType);
12412   }
12413 
12414   if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12415     SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12416     return DAG.getGatherVP(
12417         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12418         Ops, MGT->getMemOperand(), IndexType);
12419   }
12420 
12421   return SDValue();
12422 }
12423 
12424 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12425   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12426   SDValue Mask = MGT->getMask();
12427   SDValue Chain = MGT->getChain();
12428   SDValue Index = MGT->getIndex();
12429   SDValue Scale = MGT->getScale();
12430   SDValue PassThru = MGT->getPassThru();
12431   SDValue BasePtr = MGT->getBasePtr();
12432   ISD::MemIndexType IndexType = MGT->getIndexType();
12433   SDLoc DL(N);
12434 
12435   // Zap gathers with a zero mask.
12436   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
12437     return CombineTo(N, PassThru, MGT->getChain());
12438 
12439   if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12440     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12441     return DAG.getMaskedGather(
12442         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12443         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12444   }
12445 
12446   if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12447     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12448     return DAG.getMaskedGather(
12449         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12450         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12451   }
12452 
12453   return SDValue();
12454 }
12455 
12456 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12457   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12458   SDValue Mask = MLD->getMask();
12459   SDLoc DL(N);
12460 
12461   // Zap masked loads with a zero mask.
12462   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
12463     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12464 
12465   // If this is a masked load with an all ones mask, we can use a unmasked load.
12466   // FIXME: Can we do this for indexed, expanding, or extending loads?
12467   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12468       !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12469     SDValue NewLd = DAG.getLoad(
12470         N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12471         MLD->getPointerInfo(), MLD->getOriginalAlign(),
12472         MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12473     return CombineTo(N, NewLd, NewLd.getValue(1));
12474   }
12475 
12476   // Try transforming N to an indexed load.
12477   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12478     return SDValue(N, 0);
12479 
12480   return SDValue();
12481 }
12482 
12483 SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
12484   MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
12485   SDValue Chain = HG->getChain();
12486   SDValue Inc = HG->getInc();
12487   SDValue Mask = HG->getMask();
12488   SDValue BasePtr = HG->getBasePtr();
12489   SDValue Index = HG->getIndex();
12490   SDLoc DL(HG);
12491 
12492   EVT MemVT = HG->getMemoryVT();
12493   MachineMemOperand *MMO = HG->getMemOperand();
12494   ISD::MemIndexType IndexType = HG->getIndexType();
12495 
12496   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
12497     return Chain;
12498 
12499   SDValue Ops[] = {Chain,          Inc,           Mask, BasePtr, Index,
12500                    HG->getScale(), HG->getIntID()};
12501   if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL))
12502     return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12503                                   MMO, IndexType);
12504 
12505   EVT DataVT = Index.getValueType();
12506   if (refineIndexType(Index, IndexType, DataVT, DAG))
12507     return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12508                                   MMO, IndexType);
12509   return SDValue();
12510 }
12511 
12512 SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12513   auto *SLD = cast<VPStridedLoadSDNode>(N);
12514   EVT EltVT = SLD->getValueType(0).getVectorElementType();
12515   // Combine strided loads with unit-stride to a regular VP load.
12516   if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12517       CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12518     SDValue NewLd = DAG.getLoadVP(
12519         SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12520         SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12521         SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12522         SLD->getMemOperand(), SLD->isExpandingLoad());
12523     return CombineTo(N, NewLd, NewLd.getValue(1));
12524   }
12525   return SDValue();
12526 }
12527 
12528 /// A vector select of 2 constant vectors can be simplified to math/logic to
12529 /// avoid a variable select instruction and possibly avoid constant loads.
12530 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12531   SDValue Cond = N->getOperand(0);
12532   SDValue N1 = N->getOperand(1);
12533   SDValue N2 = N->getOperand(2);
12534   EVT VT = N->getValueType(0);
12535   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12536       !shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) ||
12537       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
12538       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
12539     return SDValue();
12540 
12541   // Check if we can use the condition value to increment/decrement a single
12542   // constant value. This simplifies a select to an add and removes a constant
12543   // load/materialization from the general case.
12544   bool AllAddOne = true;
12545   bool AllSubOne = true;
12546   unsigned Elts = VT.getVectorNumElements();
12547   for (unsigned i = 0; i != Elts; ++i) {
12548     SDValue N1Elt = N1.getOperand(i);
12549     SDValue N2Elt = N2.getOperand(i);
12550     if (N1Elt.isUndef() || N2Elt.isUndef())
12551       continue;
12552     if (N1Elt.getValueType() != N2Elt.getValueType()) {
12553       AllAddOne = false;
12554       AllSubOne = false;
12555       break;
12556     }
12557 
12558     const APInt &C1 = N1Elt->getAsAPIntVal();
12559     const APInt &C2 = N2Elt->getAsAPIntVal();
12560     if (C1 != C2 + 1)
12561       AllAddOne = false;
12562     if (C1 != C2 - 1)
12563       AllSubOne = false;
12564   }
12565 
12566   // Further simplifications for the extra-special cases where the constants are
12567   // all 0 or all -1 should be implemented as folds of these patterns.
12568   SDLoc DL(N);
12569   if (AllAddOne || AllSubOne) {
12570     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12571     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12572     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12573     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12574     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12575   }
12576 
12577   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12578   APInt Pow2C;
12579   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12580       isNullOrNullSplat(N2)) {
12581     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12582     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12583     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12584   }
12585 
12586   if (SDValue V = foldSelectOfConstantsUsingSra(N, DL, DAG))
12587     return V;
12588 
12589   // The general case for select-of-constants:
12590   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12591   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12592   // leave that to a machine-specific pass.
12593   return SDValue();
12594 }
12595 
12596 SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12597   SDValue N0 = N->getOperand(0);
12598   SDValue N1 = N->getOperand(1);
12599   SDValue N2 = N->getOperand(2);
12600   SDLoc DL(N);
12601 
12602   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12603     return V;
12604 
12605   if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DL, DAG))
12606     return V;
12607 
12608   return SDValue();
12609 }
12610 
12611 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12612   SDValue N0 = N->getOperand(0);
12613   SDValue N1 = N->getOperand(1);
12614   SDValue N2 = N->getOperand(2);
12615   EVT VT = N->getValueType(0);
12616   SDLoc DL(N);
12617 
12618   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12619     return V;
12620 
12621   if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
12622     return V;
12623 
12624   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12625   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12626     return DAG.getSelect(DL, VT, F, N2, N1);
12627 
12628   // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12629   if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12630       DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
12631       N0.getScalarValueSizeInBits() == N1.getScalarValueSizeInBits() &&
12632       TLI.getBooleanContents(N0.getValueType()) ==
12633           TargetLowering::ZeroOrNegativeOneBooleanContent) {
12634     return DAG.getNode(
12635         ISD::ADD, DL, N1.getValueType(), N2,
12636         DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12637   }
12638 
12639   // Canonicalize integer abs.
12640   // vselect (setg[te] X,  0),  X, -X ->
12641   // vselect (setgt    X, -1),  X, -X ->
12642   // vselect (setl[te] X,  0), -X,  X ->
12643   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12644   if (N0.getOpcode() == ISD::SETCC) {
12645     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12646     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12647     bool isAbs = false;
12648     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12649 
12650     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12651          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12652         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12653       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
12654     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12655              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12656       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
12657 
12658     if (isAbs) {
12659       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
12660         return DAG.getNode(ISD::ABS, DL, VT, LHS);
12661 
12662       SDValue Shift = DAG.getNode(
12663           ISD::SRA, DL, VT, LHS,
12664           DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
12665       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12666       AddToWorklist(Shift.getNode());
12667       AddToWorklist(Add.getNode());
12668       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12669     }
12670 
12671     // vselect x, y (fcmp lt x, y) -> fminnum x, y
12672     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12673     //
12674     // This is OK if we don't care about what happens if either operand is a
12675     // NaN.
12676     //
12677     if (N0.hasOneUse() &&
12678         isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
12679       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12680         return FMinMax;
12681     }
12682 
12683     if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12684       return S;
12685     if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12686       return S;
12687 
12688     // If this select has a condition (setcc) with narrower operands than the
12689     // select, try to widen the compare to match the select width.
12690     // TODO: This should be extended to handle any constant.
12691     // TODO: This could be extended to handle non-loading patterns, but that
12692     //       requires thorough testing to avoid regressions.
12693     if (isNullOrNullSplat(RHS)) {
12694       EVT NarrowVT = LHS.getValueType();
12695       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
12696       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12697       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12698       unsigned WideWidth = WideVT.getScalarSizeInBits();
12699       bool IsSigned = isSignedIntSetCC(CC);
12700       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12701       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12702           SetCCWidth != 1 && SetCCWidth < WideWidth &&
12703           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12704           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12705         // Both compare operands can be widened for free. The LHS can use an
12706         // extended load, and the RHS is a constant:
12707         //   vselect (ext (setcc load(X), C)), N1, N2 -->
12708         //   vselect (setcc extload(X), C'), N1, N2
12709         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12710         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12711         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12712         EVT WideSetCCVT = getSetCCResultType(WideVT);
12713         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12714         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12715       }
12716     }
12717 
12718     if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
12719       return ABD;
12720 
12721     // Match VSELECTs into add with unsigned saturation.
12722     if (hasOperation(ISD::UADDSAT, VT)) {
12723       // Check if one of the arms of the VSELECT is vector with all bits set.
12724       // If it's on the left side invert the predicate to simplify logic below.
12725       SDValue Other;
12726       ISD::CondCode SatCC = CC;
12727       if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
12728         Other = N2;
12729         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12730       } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12731         Other = N1;
12732       }
12733 
12734       if (Other && Other.getOpcode() == ISD::ADD) {
12735         SDValue CondLHS = LHS, CondRHS = RHS;
12736         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12737 
12738         // Canonicalize condition operands.
12739         if (SatCC == ISD::SETUGE) {
12740           std::swap(CondLHS, CondRHS);
12741           SatCC = ISD::SETULE;
12742         }
12743 
12744         // We can test against either of the addition operands.
12745         // x <= x+y ? x+y : ~0 --> uaddsat x, y
12746         // x+y >= x ? x+y : ~0 --> uaddsat x, y
12747         if (SatCC == ISD::SETULE && Other == CondRHS &&
12748             (OpLHS == CondLHS || OpRHS == CondLHS))
12749           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12750 
12751         if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12752             (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12753              OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12754             CondLHS == OpLHS) {
12755           // If the RHS is a constant we have to reverse the const
12756           // canonicalization.
12757           // x >= ~C ? x+C : ~0 --> uaddsat x, C
12758           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12759             return Cond->getAPIntValue() == ~Op->getAPIntValue();
12760           };
12761           if (SatCC == ISD::SETULE &&
12762               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12763             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12764         }
12765       }
12766     }
12767 
12768     // Match VSELECTs into sub with unsigned saturation.
12769     if (hasOperation(ISD::USUBSAT, VT)) {
12770       // Check if one of the arms of the VSELECT is a zero vector. If it's on
12771       // the left side invert the predicate to simplify logic below.
12772       SDValue Other;
12773       ISD::CondCode SatCC = CC;
12774       if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
12775         Other = N2;
12776         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12777       } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
12778         Other = N1;
12779       }
12780 
12781       // zext(x) >= y ? trunc(zext(x) - y) : 0
12782       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12783       // zext(x) >  y ? trunc(zext(x) - y) : 0
12784       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12785       if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12786           Other.getOperand(0).getOpcode() == ISD::SUB &&
12787           (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12788         SDValue OpLHS = Other.getOperand(0).getOperand(0);
12789         SDValue OpRHS = Other.getOperand(0).getOperand(1);
12790         if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12791           if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12792                                               DAG, DL))
12793             return R;
12794       }
12795 
12796       if (Other && Other.getNumOperands() == 2) {
12797         SDValue CondRHS = RHS;
12798         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12799 
12800         if (OpLHS == LHS) {
12801           // Look for a general sub with unsigned saturation first.
12802           // x >= y ? x-y : 0 --> usubsat x, y
12803           // x >  y ? x-y : 0 --> usubsat x, y
12804           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12805               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12806             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12807 
12808           if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12809               OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12810             if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12811                 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12812               // If the RHS is a constant we have to reverse the const
12813               // canonicalization.
12814               // x > C-1 ? x+-C : 0 --> usubsat x, C
12815               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12816                 return (!Op && !Cond) ||
12817                        (Op && Cond &&
12818                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12819               };
12820               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12821                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12822                                             /*AllowUndefs*/ true)) {
12823                 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12824                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12825               }
12826 
12827               // Another special case: If C was a sign bit, the sub has been
12828               // canonicalized into a xor.
12829               // FIXME: Would it be better to use computeKnownBits to
12830               // determine whether it's safe to decanonicalize the xor?
12831               // x s< 0 ? x^C : 0 --> usubsat x, C
12832               APInt SplatValue;
12833               if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12834                   ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12835                   ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
12836                   SplatValue.isSignMask()) {
12837                 // Note that we have to rebuild the RHS constant here to
12838                 // ensure we don't rely on particular values of undef lanes.
12839                 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12840                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12841               }
12842             }
12843           }
12844         }
12845       }
12846     }
12847   }
12848 
12849   if (SimplifySelectOps(N, N1, N2))
12850     return SDValue(N, 0);  // Don't revisit N.
12851 
12852   // Fold (vselect all_ones, N1, N2) -> N1
12853   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
12854     return N1;
12855   // Fold (vselect all_zeros, N1, N2) -> N2
12856   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
12857     return N2;
12858 
12859   // The ConvertSelectToConcatVector function is assuming both the above
12860   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12861   // and addressed.
12862   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12863       N2.getOpcode() == ISD::CONCAT_VECTORS &&
12864       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
12865     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12866       return CV;
12867   }
12868 
12869   if (SDValue V = foldVSelectOfConstants(N))
12870     return V;
12871 
12872   if (hasOperation(ISD::SRA, VT))
12873     if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
12874       return V;
12875 
12876   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12877     return SDValue(N, 0);
12878 
12879   return SDValue();
12880 }
12881 
12882 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12883   SDValue N0 = N->getOperand(0);
12884   SDValue N1 = N->getOperand(1);
12885   SDValue N2 = N->getOperand(2);
12886   SDValue N3 = N->getOperand(3);
12887   SDValue N4 = N->getOperand(4);
12888   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12889   SDLoc DL(N);
12890 
12891   // fold select_cc lhs, rhs, x, x, cc -> x
12892   if (N2 == N3)
12893     return N2;
12894 
12895   // select_cc bool, 0, x, y, seteq -> select bool, y, x
12896   if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12897       isNullConstant(N1))
12898     return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
12899 
12900   // Determine if the condition we're dealing with is constant
12901   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12902                                   CC, DL, false)) {
12903     AddToWorklist(SCC.getNode());
12904 
12905     // cond always true -> true val
12906     // cond always false -> false val
12907     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12908       return SCCC->isZero() ? N3 : N2;
12909 
12910     // When the condition is UNDEF, just return the first operand. This is
12911     // coherent the DAG creation, no setcc node is created in this case
12912     if (SCC->isUndef())
12913       return N2;
12914 
12915     // Fold to a simpler select_cc
12916     if (SCC.getOpcode() == ISD::SETCC) {
12917       SDValue SelectOp =
12918           DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), SCC.getOperand(0),
12919                       SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12920       SelectOp->setFlags(SCC->getFlags());
12921       return SelectOp;
12922     }
12923   }
12924 
12925   // If we can fold this based on the true/false value, do so.
12926   if (SimplifySelectOps(N, N2, N3))
12927     return SDValue(N, 0); // Don't revisit N.
12928 
12929   // fold select_cc into other things, such as min/max/abs
12930   return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
12931 }
12932 
12933 SDValue DAGCombiner::visitSETCC(SDNode *N) {
12934   // setcc is very commonly used as an argument to brcond. This pattern
12935   // also lend itself to numerous combines and, as a result, it is desired
12936   // we keep the argument to a brcond as a setcc as much as possible.
12937   bool PreferSetCC =
12938       N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
12939 
12940   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12941   EVT VT = N->getValueType(0);
12942   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12943   SDLoc DL(N);
12944 
12945   if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
12946     // If we prefer to have a setcc, and we don't, we'll try our best to
12947     // recreate one using rebuildSetCC.
12948     if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12949       SDValue NewSetCC = rebuildSetCC(Combined);
12950 
12951       // We don't have anything interesting to combine to.
12952       if (NewSetCC.getNode() == N)
12953         return SDValue();
12954 
12955       if (NewSetCC)
12956         return NewSetCC;
12957     }
12958     return Combined;
12959   }
12960 
12961   // Optimize
12962   //    1) (icmp eq/ne (and X, C0), (shift X, C1))
12963   // or
12964   //    2) (icmp eq/ne X, (rotate X, C1))
12965   // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12966   // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12967   // Then:
12968   // If C1 is a power of 2, then the rotate and shift+and versions are
12969   // equivilent, so we can interchange them depending on target preference.
12970   // Otherwise, if we have the shift+and version we can interchange srl/shl
12971   // which inturn affects the constant C0. We can use this to get better
12972   // constants again determined by target preference.
12973   if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12974     auto IsAndWithShift = [](SDValue A, SDValue B) {
12975       return A.getOpcode() == ISD::AND &&
12976              (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12977              A.getOperand(0) == B.getOperand(0);
12978     };
12979     auto IsRotateWithOp = [](SDValue A, SDValue B) {
12980       return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12981              B.getOperand(0) == A;
12982     };
12983     SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12984     bool IsRotate = false;
12985 
12986     // Find either shift+and or rotate pattern.
12987     if (IsAndWithShift(N0, N1)) {
12988       AndOrOp = N0;
12989       ShiftOrRotate = N1;
12990     } else if (IsAndWithShift(N1, N0)) {
12991       AndOrOp = N1;
12992       ShiftOrRotate = N0;
12993     } else if (IsRotateWithOp(N0, N1)) {
12994       IsRotate = true;
12995       AndOrOp = N0;
12996       ShiftOrRotate = N1;
12997     } else if (IsRotateWithOp(N1, N0)) {
12998       IsRotate = true;
12999       AndOrOp = N1;
13000       ShiftOrRotate = N0;
13001     }
13002 
13003     if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13004         (IsRotate || AndOrOp.hasOneUse())) {
13005       EVT OpVT = N0.getValueType();
13006       // Get constant shift/rotate amount and possibly mask (if its shift+and
13007       // variant).
13008       auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13009         ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13010                                                     /*AllowTrunc*/ false);
13011         if (CNode == nullptr)
13012           return std::nullopt;
13013         return CNode->getAPIntValue();
13014       };
13015       std::optional<APInt> AndCMask =
13016           IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13017       std::optional<APInt> ShiftCAmt =
13018           GetAPIntValue(ShiftOrRotate.getOperand(1));
13019       unsigned NumBits = OpVT.getScalarSizeInBits();
13020 
13021       // We found constants.
13022       if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13023         unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13024         // Check that the constants meet the constraints.
13025         bool CanTransform = IsRotate;
13026         if (!CanTransform) {
13027           // Check that mask and shift compliment eachother
13028           CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13029           // Check that we are comparing all bits
13030           CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13031           // Check that the and mask is correct for the shift
13032           CanTransform &=
13033               ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13034         }
13035 
13036         // See if target prefers another shift/rotate opcode.
13037         unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13038             OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13039         // Transform is valid and we have a new preference.
13040         if (CanTransform && NewShiftOpc != ShiftOpc) {
13041           SDValue NewShiftOrRotate =
13042               DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13043                           ShiftOrRotate.getOperand(1));
13044           SDValue NewAndOrOp = SDValue();
13045 
13046           if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13047             APInt NewMask =
13048                 NewShiftOpc == ISD::SHL
13049                     ? APInt::getHighBitsSet(NumBits,
13050                                             NumBits - ShiftCAmt->getZExtValue())
13051                     : APInt::getLowBitsSet(NumBits,
13052                                            NumBits - ShiftCAmt->getZExtValue());
13053             NewAndOrOp =
13054                 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13055                             DAG.getConstant(NewMask, DL, OpVT));
13056           } else {
13057             NewAndOrOp = ShiftOrRotate.getOperand(0);
13058           }
13059 
13060           return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13061         }
13062       }
13063     }
13064   }
13065   return SDValue();
13066 }
13067 
13068 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13069   SDValue LHS = N->getOperand(0);
13070   SDValue RHS = N->getOperand(1);
13071   SDValue Carry = N->getOperand(2);
13072   SDValue Cond = N->getOperand(3);
13073 
13074   // If Carry is false, fold to a regular SETCC.
13075   if (isNullConstant(Carry))
13076     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13077 
13078   return SDValue();
13079 }
13080 
13081 /// Check if N satisfies:
13082 ///   N is used once.
13083 ///   N is a Load.
13084 ///   The load is compatible with ExtOpcode. It means
13085 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
13086 ///     extension.
13087 ///     Otherwise returns true.
13088 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13089   if (!N.hasOneUse())
13090     return false;
13091 
13092   if (!isa<LoadSDNode>(N))
13093     return false;
13094 
13095   LoadSDNode *Load = cast<LoadSDNode>(N);
13096   ISD::LoadExtType LoadExt = Load->getExtensionType();
13097   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13098     return true;
13099 
13100   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13101   // extension.
13102   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13103       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13104     return false;
13105 
13106   return true;
13107 }
13108 
13109 /// Fold
13110 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13111 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13112 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13113 /// This function is called by the DAGCombiner when visiting sext/zext/aext
13114 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13115 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
13116                                          SelectionDAG &DAG, const SDLoc &DL,
13117                                          CombineLevel Level) {
13118   unsigned Opcode = N->getOpcode();
13119   SDValue N0 = N->getOperand(0);
13120   EVT VT = N->getValueType(0);
13121   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13122           Opcode == ISD::ANY_EXTEND) &&
13123          "Expected EXTEND dag node in input!");
13124 
13125   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
13126       !N0.hasOneUse())
13127     return SDValue();
13128 
13129   SDValue Op1 = N0->getOperand(1);
13130   SDValue Op2 = N0->getOperand(2);
13131   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
13132     return SDValue();
13133 
13134   auto ExtLoadOpcode = ISD::EXTLOAD;
13135   if (Opcode == ISD::SIGN_EXTEND)
13136     ExtLoadOpcode = ISD::SEXTLOAD;
13137   else if (Opcode == ISD::ZERO_EXTEND)
13138     ExtLoadOpcode = ISD::ZEXTLOAD;
13139 
13140   // Illegal VSELECT may ISel fail if happen after legalization (DAG
13141   // Combine2), so we should conservatively check the OperationAction.
13142   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
13143   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
13144   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13145       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13146       (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13147        TLI.getOperationAction(ISD::VSELECT, VT) != TargetLowering::Legal))
13148     return SDValue();
13149 
13150   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
13151   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
13152   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
13153 }
13154 
13155 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
13156 /// a build_vector of constants.
13157 /// This function is called by the DAGCombiner when visiting sext/zext/aext
13158 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13159 /// Vector extends are not folded if operations are legal; this is to
13160 /// avoid introducing illegal build_vector dag nodes.
13161 static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL,
13162                                          const TargetLowering &TLI,
13163                                          SelectionDAG &DAG, bool LegalTypes) {
13164   unsigned Opcode = N->getOpcode();
13165   SDValue N0 = N->getOperand(0);
13166   EVT VT = N->getValueType(0);
13167 
13168   assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
13169          "Expected EXTEND dag node in input!");
13170 
13171   // fold (sext c1) -> c1
13172   // fold (zext c1) -> c1
13173   // fold (aext c1) -> c1
13174   if (isa<ConstantSDNode>(N0))
13175     return DAG.getNode(Opcode, DL, VT, N0);
13176 
13177   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13178   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13179   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13180   if (N0->getOpcode() == ISD::SELECT) {
13181     SDValue Op1 = N0->getOperand(1);
13182     SDValue Op2 = N0->getOperand(2);
13183     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
13184         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
13185       // For any_extend, choose sign extension of the constants to allow a
13186       // possible further transform to sign_extend_inreg.i.e.
13187       //
13188       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13189       // t2: i64 = any_extend t1
13190       // -->
13191       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13192       // -->
13193       // t4: i64 = sign_extend_inreg t3
13194       unsigned FoldOpc = Opcode;
13195       if (FoldOpc == ISD::ANY_EXTEND)
13196         FoldOpc = ISD::SIGN_EXTEND;
13197       return DAG.getSelect(DL, VT, N0->getOperand(0),
13198                            DAG.getNode(FoldOpc, DL, VT, Op1),
13199                            DAG.getNode(FoldOpc, DL, VT, Op2));
13200     }
13201   }
13202 
13203   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13204   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13205   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13206   EVT SVT = VT.getScalarType();
13207   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
13208       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
13209     return SDValue();
13210 
13211   // We can fold this node into a build_vector.
13212   unsigned VTBits = SVT.getSizeInBits();
13213   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13214   SmallVector<SDValue, 8> Elts;
13215   unsigned NumElts = VT.getVectorNumElements();
13216 
13217   for (unsigned i = 0; i != NumElts; ++i) {
13218     SDValue Op = N0.getOperand(i);
13219     if (Op.isUndef()) {
13220       if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
13221         Elts.push_back(DAG.getUNDEF(SVT));
13222       else
13223         Elts.push_back(DAG.getConstant(0, DL, SVT));
13224       continue;
13225     }
13226 
13227     SDLoc DL(Op);
13228     // Get the constant value and if needed trunc it to the size of the type.
13229     // Nodes like build_vector might have constants wider than the scalar type.
13230     APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13231     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
13232       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
13233     else
13234       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
13235   }
13236 
13237   return DAG.getBuildVector(VT, DL, Elts);
13238 }
13239 
13240 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13241 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13242 // transformation. Returns true if extension are possible and the above
13243 // mentioned transformation is profitable.
13244 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
13245                                     unsigned ExtOpc,
13246                                     SmallVectorImpl<SDNode *> &ExtendNodes,
13247                                     const TargetLowering &TLI) {
13248   bool HasCopyToRegUses = false;
13249   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
13250   for (SDUse &Use : N0->uses()) {
13251     SDNode *User = Use.getUser();
13252     if (User == N)
13253       continue;
13254     if (Use.getResNo() != N0.getResNo())
13255       continue;
13256     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
13257     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
13258       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13259       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
13260         // Sign bits will be lost after a zext.
13261         return false;
13262       bool Add = false;
13263       for (unsigned i = 0; i != 2; ++i) {
13264         SDValue UseOp = User->getOperand(i);
13265         if (UseOp == N0)
13266           continue;
13267         if (!isa<ConstantSDNode>(UseOp))
13268           return false;
13269         Add = true;
13270       }
13271       if (Add)
13272         ExtendNodes.push_back(User);
13273       continue;
13274     }
13275     // If truncates aren't free and there are users we can't
13276     // extend, it isn't worthwhile.
13277     if (!isTruncFree)
13278       return false;
13279     // Remember if this value is live-out.
13280     if (User->getOpcode() == ISD::CopyToReg)
13281       HasCopyToRegUses = true;
13282   }
13283 
13284   if (HasCopyToRegUses) {
13285     bool BothLiveOut = false;
13286     for (SDUse &Use : N->uses()) {
13287       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
13288         BothLiveOut = true;
13289         break;
13290       }
13291     }
13292     if (BothLiveOut)
13293       // Both unextended and extended values are live out. There had better be
13294       // a good reason for the transformation.
13295       return !ExtendNodes.empty();
13296   }
13297   return true;
13298 }
13299 
13300 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
13301                                   SDValue OrigLoad, SDValue ExtLoad,
13302                                   ISD::NodeType ExtType) {
13303   // Extend SetCC uses if necessary.
13304   SDLoc DL(ExtLoad);
13305   for (SDNode *SetCC : SetCCs) {
13306     SmallVector<SDValue, 4> Ops;
13307 
13308     for (unsigned j = 0; j != 2; ++j) {
13309       SDValue SOp = SetCC->getOperand(j);
13310       if (SOp == OrigLoad)
13311         Ops.push_back(ExtLoad);
13312       else
13313         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13314     }
13315 
13316     Ops.push_back(SetCC->getOperand(2));
13317     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13318   }
13319 }
13320 
13321 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
13322 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
13323   SDValue N0 = N->getOperand(0);
13324   EVT DstVT = N->getValueType(0);
13325   EVT SrcVT = N0.getValueType();
13326 
13327   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13328           N->getOpcode() == ISD::ZERO_EXTEND) &&
13329          "Unexpected node type (not an extend)!");
13330 
13331   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
13332   // For example, on a target with legal v4i32, but illegal v8i32, turn:
13333   //   (v8i32 (sext (v8i16 (load x))))
13334   // into:
13335   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
13336   //                          (v4i32 (sextload (x + 16)))))
13337   // Where uses of the original load, i.e.:
13338   //   (v8i16 (load x))
13339   // are replaced with:
13340   //   (v8i16 (truncate
13341   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
13342   //                            (v4i32 (sextload (x + 16)))))))
13343   //
13344   // This combine is only applicable to illegal, but splittable, vectors.
13345   // All legal types, and illegal non-vector types, are handled elsewhere.
13346   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
13347   //
13348   if (N0->getOpcode() != ISD::LOAD)
13349     return SDValue();
13350 
13351   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13352 
13353   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
13354       !N0.hasOneUse() || !LN0->isSimple() ||
13355       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
13356       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13357     return SDValue();
13358 
13359   SmallVector<SDNode *, 4> SetCCs;
13360   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13361     return SDValue();
13362 
13363   ISD::LoadExtType ExtType =
13364       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13365 
13366   // Try to split the vector types to get down to legal types.
13367   EVT SplitSrcVT = SrcVT;
13368   EVT SplitDstVT = DstVT;
13369   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
13370          SplitSrcVT.getVectorNumElements() > 1) {
13371     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
13372     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
13373   }
13374 
13375   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
13376     return SDValue();
13377 
13378   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
13379 
13380   SDLoc DL(N);
13381   const unsigned NumSplits =
13382       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
13383   const unsigned Stride = SplitSrcVT.getStoreSize();
13384   SmallVector<SDValue, 4> Loads;
13385   SmallVector<SDValue, 4> Chains;
13386 
13387   SDValue BasePtr = LN0->getBasePtr();
13388   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
13389     const unsigned Offset = Idx * Stride;
13390 
13391     SDValue SplitLoad =
13392         DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
13393                        BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
13394                        SplitSrcVT, LN0->getOriginalAlign(),
13395                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13396 
13397     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
13398 
13399     Loads.push_back(SplitLoad.getValue(0));
13400     Chains.push_back(SplitLoad.getValue(1));
13401   }
13402 
13403   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
13404   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
13405 
13406   // Simplify TF.
13407   AddToWorklist(NewChain.getNode());
13408 
13409   CombineTo(N, NewValue);
13410 
13411   // Replace uses of the original load (before extension)
13412   // with a truncate of the concatenated sextloaded vectors.
13413   SDValue Trunc =
13414       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
13415   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
13416   CombineTo(N0.getNode(), Trunc, NewChain);
13417   return SDValue(N, 0); // Return N so it doesn't get rechecked!
13418 }
13419 
13420 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13421 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13422 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
13423   assert(N->getOpcode() == ISD::ZERO_EXTEND);
13424   EVT VT = N->getValueType(0);
13425   EVT OrigVT = N->getOperand(0).getValueType();
13426   if (TLI.isZExtFree(OrigVT, VT))
13427     return SDValue();
13428 
13429   // and/or/xor
13430   SDValue N0 = N->getOperand(0);
13431   if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
13432       N0.getOperand(1).getOpcode() != ISD::Constant ||
13433       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
13434     return SDValue();
13435 
13436   // shl/shr
13437   SDValue N1 = N0->getOperand(0);
13438   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
13439       N1.getOperand(1).getOpcode() != ISD::Constant ||
13440       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
13441     return SDValue();
13442 
13443   // load
13444   if (!isa<LoadSDNode>(N1.getOperand(0)))
13445     return SDValue();
13446   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
13447   EVT MemVT = Load->getMemoryVT();
13448   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
13449       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13450     return SDValue();
13451 
13452 
13453   // If the shift op is SHL, the logic op must be AND, otherwise the result
13454   // will be wrong.
13455   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
13456     return SDValue();
13457 
13458   if (!N0.hasOneUse() || !N1.hasOneUse())
13459     return SDValue();
13460 
13461   SmallVector<SDNode*, 4> SetCCs;
13462   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
13463                                ISD::ZERO_EXTEND, SetCCs, TLI))
13464     return SDValue();
13465 
13466   // Actually do the transformation.
13467   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
13468                                    Load->getChain(), Load->getBasePtr(),
13469                                    Load->getMemoryVT(), Load->getMemOperand());
13470 
13471   SDLoc DL1(N1);
13472   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
13473                               N1.getOperand(1));
13474 
13475   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
13476   SDLoc DL0(N0);
13477   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13478                             DAG.getConstant(Mask, DL0, VT));
13479 
13480   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13481   CombineTo(N, And);
13482   if (SDValue(Load, 0).hasOneUse()) {
13483     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13484   } else {
13485     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13486                                 Load->getValueType(0), ExtLoad);
13487     CombineTo(Load, Trunc, ExtLoad.getValue(1));
13488   }
13489 
13490   // N0 is dead at this point.
13491   recursivelyDeleteUnusedNodes(N0.getNode());
13492 
13493   return SDValue(N,0); // Return N so it doesn't get rechecked!
13494 }
13495 
13496 /// If we're narrowing or widening the result of a vector select and the final
13497 /// size is the same size as a setcc (compare) feeding the select, then try to
13498 /// apply the cast operation to the select's operands because matching vector
13499 /// sizes for a select condition and other operands should be more efficient.
13500 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13501   unsigned CastOpcode = Cast->getOpcode();
13502   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13503           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13504           CastOpcode == ISD::FP_ROUND) &&
13505          "Unexpected opcode for vector select narrowing/widening");
13506 
13507   // We only do this transform before legal ops because the pattern may be
13508   // obfuscated by target-specific operations after legalization. Do not create
13509   // an illegal select op, however, because that may be difficult to lower.
13510   EVT VT = Cast->getValueType(0);
13511   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13512     return SDValue();
13513 
13514   SDValue VSel = Cast->getOperand(0);
13515   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13516       VSel.getOperand(0).getOpcode() != ISD::SETCC)
13517     return SDValue();
13518 
13519   // Does the setcc have the same vector size as the casted select?
13520   SDValue SetCC = VSel.getOperand(0);
13521   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13522   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13523     return SDValue();
13524 
13525   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13526   SDValue A = VSel.getOperand(1);
13527   SDValue B = VSel.getOperand(2);
13528   SDValue CastA, CastB;
13529   SDLoc DL(Cast);
13530   if (CastOpcode == ISD::FP_ROUND) {
13531     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13532     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13533     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13534   } else {
13535     CastA = DAG.getNode(CastOpcode, DL, VT, A);
13536     CastB = DAG.getNode(CastOpcode, DL, VT, B);
13537   }
13538   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13539 }
13540 
13541 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13542 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13543 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
13544                                      const TargetLowering &TLI, EVT VT,
13545                                      bool LegalOperations, SDNode *N,
13546                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
13547   SDNode *N0Node = N0.getNode();
13548   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13549                                                    : ISD::isZEXTLoad(N0Node);
13550   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13551       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13552     return SDValue();
13553 
13554   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13555   EVT MemVT = LN0->getMemoryVT();
13556   if ((LegalOperations || !LN0->isSimple() ||
13557        VT.isVector()) &&
13558       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13559     return SDValue();
13560 
13561   SDValue ExtLoad =
13562       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13563                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13564   Combiner.CombineTo(N, ExtLoad);
13565   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13566   if (LN0->use_empty())
13567     Combiner.recursivelyDeleteUnusedNodes(LN0);
13568   return SDValue(N, 0); // Return N so it doesn't get rechecked!
13569 }
13570 
13571 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13572 // Only generate vector extloads when 1) they're legal, and 2) they are
13573 // deemed desirable by the target. NonNegZExt can be set to true if a zero
13574 // extend has the nonneg flag to allow use of sextload if profitable.
13575 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
13576                                   const TargetLowering &TLI, EVT VT,
13577                                   bool LegalOperations, SDNode *N, SDValue N0,
13578                                   ISD::LoadExtType ExtLoadType,
13579                                   ISD::NodeType ExtOpc,
13580                                   bool NonNegZExt = false) {
13581   if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode()))
13582     return {};
13583 
13584   // If this is zext nneg, see if it would make sense to treat it as a sext.
13585   if (NonNegZExt) {
13586     assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13587            "Unexpected load type or opcode");
13588     for (SDNode *User : N0->users()) {
13589       if (User->getOpcode() == ISD::SETCC) {
13590         ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13591         if (ISD::isSignedIntSetCC(CC)) {
13592           ExtLoadType = ISD::SEXTLOAD;
13593           ExtOpc = ISD::SIGN_EXTEND;
13594           break;
13595         }
13596       }
13597     }
13598   }
13599 
13600   // TODO: isFixedLengthVector() should be removed and any negative effects on
13601   // code generation being the result of that target's implementation of
13602   // isVectorLoadExtDesirable().
13603   if ((LegalOperations || VT.isFixedLengthVector() ||
13604        !cast<LoadSDNode>(N0)->isSimple()) &&
13605       !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13606     return {};
13607 
13608   bool DoXform = true;
13609   SmallVector<SDNode *, 4> SetCCs;
13610   if (!N0.hasOneUse())
13611     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13612   if (VT.isVector())
13613     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13614   if (!DoXform)
13615     return {};
13616 
13617   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13618   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13619                                    LN0->getBasePtr(), N0.getValueType(),
13620                                    LN0->getMemOperand());
13621   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13622   // If the load value is used only by N, replace it via CombineTo N.
13623   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13624   Combiner.CombineTo(N, ExtLoad);
13625   if (NoReplaceTrunc) {
13626     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13627     Combiner.recursivelyDeleteUnusedNodes(LN0);
13628   } else {
13629     SDValue Trunc =
13630         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13631     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13632   }
13633   return SDValue(N, 0); // Return N so it doesn't get rechecked!
13634 }
13635 
13636 static SDValue
13637 tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT,
13638                          bool LegalOperations, SDNode *N, SDValue N0,
13639                          ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13640   if (!N0.hasOneUse())
13641     return SDValue();
13642 
13643   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13644   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13645     return SDValue();
13646 
13647   if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13648       !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13649     return SDValue();
13650 
13651   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13652     return SDValue();
13653 
13654   SDLoc dl(Ld);
13655   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13656   SDValue NewLoad = DAG.getMaskedLoad(
13657       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13658       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13659       ExtLoadType, Ld->isExpandingLoad());
13660   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13661   return NewLoad;
13662 }
13663 
13664 // fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13665 static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG,
13666                                         const TargetLowering &TLI, EVT VT,
13667                                         SDValue N0,
13668                                         ISD::LoadExtType ExtLoadType) {
13669   auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13670   if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13671     return {};
13672   EVT MemoryVT = ALoad->getMemoryVT();
13673   if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13674     return {};
13675   // Can't fold into ALoad if it is already extending differently.
13676   ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13677   if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13678       (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13679     return {};
13680 
13681   EVT OrigVT = ALoad->getValueType(0);
13682   assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13683   auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13684       ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13685       ALoad->getBasePtr(), ALoad->getMemOperand()));
13686   NewALoad->setExtensionType(ExtLoadType);
13687   DAG.ReplaceAllUsesOfValueWith(
13688       SDValue(ALoad, 0),
13689       DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13690   // Update the chain uses.
13691   DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13692   return SDValue(NewALoad, 0);
13693 }
13694 
13695 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
13696                                        bool LegalOperations) {
13697   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13698           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13699 
13700   SDValue SetCC = N->getOperand(0);
13701   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13702       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13703     return SDValue();
13704 
13705   SDValue X = SetCC.getOperand(0);
13706   SDValue Ones = SetCC.getOperand(1);
13707   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13708   EVT VT = N->getValueType(0);
13709   EVT XVT = X.getValueType();
13710   // setge X, C is canonicalized to setgt, so we do not need to match that
13711   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13712   // not require the 'not' op.
13713   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13714     // Invert and smear/shift the sign bit:
13715     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13716     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13717     SDLoc DL(N);
13718     unsigned ShCt = VT.getSizeInBits() - 1;
13719     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13720     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13721       SDValue NotX = DAG.getNOT(DL, X, VT);
13722       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13723       auto ShiftOpcode =
13724         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13725       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13726     }
13727   }
13728   return SDValue();
13729 }
13730 
13731 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13732   SDValue N0 = N->getOperand(0);
13733   if (N0.getOpcode() != ISD::SETCC)
13734     return SDValue();
13735 
13736   SDValue N00 = N0.getOperand(0);
13737   SDValue N01 = N0.getOperand(1);
13738   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13739   EVT VT = N->getValueType(0);
13740   EVT N00VT = N00.getValueType();
13741   SDLoc DL(N);
13742 
13743   // Propagate fast-math-flags.
13744   SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13745 
13746   // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13747   // the same size as the compared operands. Try to optimize sext(setcc())
13748   // if this is the case.
13749   if (VT.isVector() && !LegalOperations &&
13750       TLI.getBooleanContents(N00VT) ==
13751           TargetLowering::ZeroOrNegativeOneBooleanContent) {
13752     EVT SVT = getSetCCResultType(N00VT);
13753 
13754     // If we already have the desired type, don't change it.
13755     if (SVT != N0.getValueType()) {
13756       // We know that the # elements of the results is the same as the
13757       // # elements of the compare (and the # elements of the compare result
13758       // for that matter).  Check to see that they are the same size.  If so,
13759       // we know that the element size of the sext'd result matches the
13760       // element size of the compare operands.
13761       if (VT.getSizeInBits() == SVT.getSizeInBits())
13762         return DAG.getSetCC(DL, VT, N00, N01, CC);
13763 
13764       // If the desired elements are smaller or larger than the source
13765       // elements, we can use a matching integer vector type and then
13766       // truncate/sign extend.
13767       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13768       if (SVT == MatchingVecType) {
13769         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13770         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13771       }
13772     }
13773 
13774     // Try to eliminate the sext of a setcc by zexting the compare operands.
13775     if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13776         !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
13777       bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13778       unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13779       unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13780 
13781       // We have an unsupported narrow vector compare op that would be legal
13782       // if extended to the destination type. See if the compare operands
13783       // can be freely extended to the destination type.
13784       auto IsFreeToExtend = [&](SDValue V) {
13785         if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13786           return true;
13787         // Match a simple, non-extended load that can be converted to a
13788         // legal {z/s}ext-load.
13789         // TODO: Allow widening of an existing {z/s}ext-load?
13790         if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13791               ISD::isUNINDEXEDLoad(V.getNode()) &&
13792               cast<LoadSDNode>(V)->isSimple() &&
13793               TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13794           return false;
13795 
13796         // Non-chain users of this value must either be the setcc in this
13797         // sequence or extends that can be folded into the new {z/s}ext-load.
13798         for (SDUse &Use : V->uses()) {
13799           // Skip uses of the chain and the setcc.
13800           SDNode *User = Use.getUser();
13801           if (Use.getResNo() != 0 || User == N0.getNode())
13802             continue;
13803           // Extra users must have exactly the same cast we are about to create.
13804           // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13805           //       is enhanced similarly.
13806           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13807             return false;
13808         }
13809         return true;
13810       };
13811 
13812       if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13813         SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13814         SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13815         return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13816       }
13817     }
13818   }
13819 
13820   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13821   // Here, T can be 1 or -1, depending on the type of the setcc and
13822   // getBooleanContents().
13823   unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13824 
13825   // To determine the "true" side of the select, we need to know the high bit
13826   // of the value returned by the setcc if it evaluates to true.
13827   // If the type of the setcc is i1, then the true case of the select is just
13828   // sext(i1 1), that is, -1.
13829   // If the type of the setcc is larger (say, i8) then the value of the high
13830   // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13831   // of the appropriate width.
13832   SDValue ExtTrueVal = (SetCCWidth == 1)
13833                            ? DAG.getAllOnesConstant(DL, VT)
13834                            : DAG.getBoolConstant(true, DL, VT, N00VT);
13835   SDValue Zero = DAG.getConstant(0, DL, VT);
13836   if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13837     return SCC;
13838 
13839   if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13840     EVT SetCCVT = getSetCCResultType(N00VT);
13841     // Don't do this transform for i1 because there's a select transform
13842     // that would reverse it.
13843     // TODO: We should not do this transform at all without a target hook
13844     // because a sext is likely cheaper than a select?
13845     if (SetCCVT.getScalarSizeInBits() != 1 &&
13846         (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13847       SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13848       return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13849     }
13850   }
13851 
13852   return SDValue();
13853 }
13854 
13855 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13856   SDValue N0 = N->getOperand(0);
13857   EVT VT = N->getValueType(0);
13858   SDLoc DL(N);
13859 
13860   if (VT.isVector())
13861     if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13862       return FoldedVOp;
13863 
13864   // sext(undef) = 0 because the top bit will all be the same.
13865   if (N0.isUndef())
13866     return DAG.getConstant(0, DL, VT);
13867 
13868   if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13869     return Res;
13870 
13871   // fold (sext (sext x)) -> (sext x)
13872   // fold (sext (aext x)) -> (sext x)
13873   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13874     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13875 
13876   // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13877   // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13878   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
13879       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
13880     return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
13881                        N0.getOperand(0));
13882 
13883   // fold (sext (sext_inreg x)) -> (sext (trunc x))
13884   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13885     SDValue N00 = N0.getOperand(0);
13886     EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13887     if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13888         (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13889       SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13890       return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13891     }
13892   }
13893 
13894   if (N0.getOpcode() == ISD::TRUNCATE) {
13895     // fold (sext (truncate (load x))) -> (sext (smaller load x))
13896     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13897     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13898       SDNode *oye = N0.getOperand(0).getNode();
13899       if (NarrowLoad.getNode() != N0.getNode()) {
13900         CombineTo(N0.getNode(), NarrowLoad);
13901         // CombineTo deleted the truncate, if needed, but not what's under it.
13902         AddToWorklist(oye);
13903       }
13904       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13905     }
13906 
13907     // See if the value being truncated is already sign extended.  If so, just
13908     // eliminate the trunc/sext pair.
13909     SDValue Op = N0.getOperand(0);
13910     unsigned OpBits   = Op.getScalarValueSizeInBits();
13911     unsigned MidBits  = N0.getScalarValueSizeInBits();
13912     unsigned DestBits = VT.getScalarSizeInBits();
13913 
13914     if (N0->getFlags().hasNoSignedWrap() ||
13915         DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
13916       if (OpBits == DestBits) {
13917         // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
13918         // bits, it is already ready.
13919         return Op;
13920       }
13921 
13922       if (OpBits < DestBits) {
13923         // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
13924         // bits, just sext from i32.
13925         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13926       }
13927 
13928       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
13929       // bits, just truncate to i32.
13930       SDNodeFlags Flags;
13931       Flags.setNoSignedWrap(true);
13932       Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
13933       return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
13934     }
13935 
13936     // fold (sext (truncate x)) -> (sextinreg x).
13937     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13938                                                  N0.getValueType())) {
13939       if (OpBits < DestBits)
13940         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13941       else if (OpBits > DestBits)
13942         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13943       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13944                          DAG.getValueType(N0.getValueType()));
13945     }
13946   }
13947 
13948   // Try to simplify (sext (load x)).
13949   if (SDValue foldedExt =
13950           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13951                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
13952     return foldedExt;
13953 
13954   if (SDValue foldedExt =
13955           tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13956                                    ISD::SEXTLOAD, ISD::SIGN_EXTEND))
13957     return foldedExt;
13958 
13959   // fold (sext (load x)) to multiple smaller sextloads.
13960   // Only on illegal but splittable vectors.
13961   if (SDValue ExtLoad = CombineExtLoad(N))
13962     return ExtLoad;
13963 
13964   // Try to simplify (sext (sextload x)).
13965   if (SDValue foldedExt = tryToFoldExtOfExtload(
13966           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13967     return foldedExt;
13968 
13969   // Try to simplify (sext (atomic_load x)).
13970   if (SDValue foldedExt =
13971           tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13972     return foldedExt;
13973 
13974   // fold (sext (and/or/xor (load x), cst)) ->
13975   //      (and/or/xor (sextload x), (sext cst))
13976   if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13977       isa<LoadSDNode>(N0.getOperand(0)) &&
13978       N0.getOperand(1).getOpcode() == ISD::Constant &&
13979       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13980     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13981     EVT MemVT = LN00->getMemoryVT();
13982     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13983       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13984       SmallVector<SDNode*, 4> SetCCs;
13985       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13986                                              ISD::SIGN_EXTEND, SetCCs, TLI);
13987       if (DoXform) {
13988         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13989                                          LN00->getChain(), LN00->getBasePtr(),
13990                                          LN00->getMemoryVT(),
13991                                          LN00->getMemOperand());
13992         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
13993         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13994                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
13995         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13996         bool NoReplaceTruncAnd = !N0.hasOneUse();
13997         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13998         CombineTo(N, And);
13999         // If N0 has multiple uses, change other uses as well.
14000         if (NoReplaceTruncAnd) {
14001           SDValue TruncAnd =
14002               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
14003           CombineTo(N0.getNode(), TruncAnd);
14004         }
14005         if (NoReplaceTrunc) {
14006           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14007         } else {
14008           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14009                                       LN00->getValueType(0), ExtLoad);
14010           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14011         }
14012         return SDValue(N,0); // Return N so it doesn't get rechecked!
14013       }
14014     }
14015   }
14016 
14017   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14018     return V;
14019 
14020   if (SDValue V = foldSextSetcc(N))
14021     return V;
14022 
14023   // fold (sext x) -> (zext x) if the sign bit is known zero.
14024   if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14025       (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14026       DAG.SignBitIsZero(N0))
14027     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14028 
14029   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14030     return NewVSel;
14031 
14032   // Eliminate this sign extend by doing a negation in the destination type:
14033   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14034   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14035       isNullOrNullSplat(N0.getOperand(0)) &&
14036       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
14037       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
14038     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14039     return DAG.getNegative(Zext, DL, VT);
14040   }
14041   // Eliminate this sign extend by doing a decrement in the destination type:
14042   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14043   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14044       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
14045       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
14046       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
14047     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14048     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14049   }
14050 
14051   // fold sext (not i1 X) -> add (zext i1 X), -1
14052   // TODO: This could be extended to handle bool vectors.
14053   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14054       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14055                             TLI.isOperationLegal(ISD::ADD, VT)))) {
14056     // If we can eliminate the 'not', the sext form should be better
14057     if (SDValue NewXor = visitXOR(N0.getNode())) {
14058       // Returning N0 is a form of in-visit replacement that may have
14059       // invalidated N0.
14060       if (NewXor.getNode() == N0.getNode()) {
14061         // Return SDValue here as the xor should have already been replaced in
14062         // this sext.
14063         return SDValue();
14064       }
14065 
14066       // Return a new sext with the new xor.
14067       return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14068     }
14069 
14070     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14071     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14072   }
14073 
14074   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14075     return Res;
14076 
14077   return SDValue();
14078 }
14079 
14080 /// Given an extending node with a pop-count operand, if the target does not
14081 /// support a pop-count in the narrow source type but does support it in the
14082 /// destination type, widen the pop-count to the destination type.
14083 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14084   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14085           Extend->getOpcode() == ISD::ANY_EXTEND) &&
14086          "Expected extend op");
14087 
14088   SDValue CtPop = Extend->getOperand(0);
14089   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14090     return SDValue();
14091 
14092   EVT VT = Extend->getValueType(0);
14093   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14094   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
14095       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
14096     return SDValue();
14097 
14098   // zext (ctpop X) --> ctpop (zext X)
14099   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14100   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14101 }
14102 
14103 // If we have (zext (abs X)) where X is a type that will be promoted by type
14104 // legalization, convert to (abs (sext X)). But don't extend past a legal type.
14105 static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14106   assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14107 
14108   EVT VT = Extend->getValueType(0);
14109   if (VT.isVector())
14110     return SDValue();
14111 
14112   SDValue Abs = Extend->getOperand(0);
14113   if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14114     return SDValue();
14115 
14116   EVT AbsVT = Abs.getValueType();
14117   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14118   if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14119       TargetLowering::TypePromoteInteger)
14120     return SDValue();
14121 
14122   EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
14123 
14124   SDValue SExt =
14125       DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
14126   SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
14127   return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
14128 }
14129 
14130 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
14131   SDValue N0 = N->getOperand(0);
14132   EVT VT = N->getValueType(0);
14133   SDLoc DL(N);
14134 
14135   if (VT.isVector())
14136     if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14137       return FoldedVOp;
14138 
14139   // zext(undef) = 0
14140   if (N0.isUndef())
14141     return DAG.getConstant(0, DL, VT);
14142 
14143   if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14144     return Res;
14145 
14146   // fold (zext (zext x)) -> (zext x)
14147   // fold (zext (aext x)) -> (zext x)
14148   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14149     SDNodeFlags Flags;
14150     if (N0.getOpcode() == ISD::ZERO_EXTEND)
14151       Flags.setNonNeg(N0->getFlags().hasNonNeg());
14152     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
14153   }
14154 
14155   // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14156   // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14157   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
14158       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)
14159     return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
14160 
14161   // fold (zext (truncate x)) -> (zext x) or
14162   //      (zext (truncate x)) -> (truncate x)
14163   // This is valid when the truncated bits of x are already zero.
14164   SDValue Op;
14165   KnownBits Known;
14166   if (isTruncateOf(DAG, N0, Op, Known)) {
14167     APInt TruncatedBits =
14168       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
14169       APInt(Op.getScalarValueSizeInBits(), 0) :
14170       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
14171                         N0.getScalarValueSizeInBits(),
14172                         std::min(Op.getScalarValueSizeInBits(),
14173                                  VT.getScalarSizeInBits()));
14174     if (TruncatedBits.isSubsetOf(Known.Zero)) {
14175       SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14176       DAG.salvageDebugInfo(*N0.getNode());
14177 
14178       return ZExtOrTrunc;
14179     }
14180   }
14181 
14182   // fold (zext (truncate x)) -> (and x, mask)
14183   if (N0.getOpcode() == ISD::TRUNCATE) {
14184     // fold (zext (truncate (load x))) -> (zext (smaller load x))
14185     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14186     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14187       SDNode *oye = N0.getOperand(0).getNode();
14188       if (NarrowLoad.getNode() != N0.getNode()) {
14189         CombineTo(N0.getNode(), NarrowLoad);
14190         // CombineTo deleted the truncate, if needed, but not what's under it.
14191         AddToWorklist(oye);
14192       }
14193       return SDValue(N, 0); // Return N so it doesn't get rechecked!
14194     }
14195 
14196     EVT SrcVT = N0.getOperand(0).getValueType();
14197     EVT MinVT = N0.getValueType();
14198 
14199     if (N->getFlags().hasNonNeg()) {
14200       SDValue Op = N0.getOperand(0);
14201       unsigned OpBits = SrcVT.getScalarSizeInBits();
14202       unsigned MidBits = MinVT.getScalarSizeInBits();
14203       unsigned DestBits = VT.getScalarSizeInBits();
14204 
14205       if (N0->getFlags().hasNoSignedWrap() ||
14206           DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14207         if (OpBits == DestBits) {
14208           // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
14209           // bits, it is already ready.
14210           return Op;
14211         }
14212 
14213         if (OpBits < DestBits) {
14214           // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
14215           // bits, just sext from i32.
14216           // FIXME: This can probably be ZERO_EXTEND nneg?
14217           return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14218         }
14219 
14220         // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
14221         // bits, just truncate to i32.
14222         SDNodeFlags Flags;
14223         Flags.setNoSignedWrap(true);
14224         Flags.setNoUnsignedWrap(true);
14225         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14226       }
14227     }
14228 
14229     // Try to mask before the extension to avoid having to generate a larger mask,
14230     // possibly over several sub-vectors.
14231     if (SrcVT.bitsLT(VT) && VT.isVector()) {
14232       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
14233                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
14234         SDValue Op = N0.getOperand(0);
14235         Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
14236         AddToWorklist(Op.getNode());
14237         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14238         // Transfer the debug info; the new node is equivalent to N0.
14239         DAG.transferDbgValues(N0, ZExtOrTrunc);
14240         return ZExtOrTrunc;
14241       }
14242     }
14243 
14244     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
14245       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14246       AddToWorklist(Op.getNode());
14247       SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
14248       // We may safely transfer the debug info describing the truncate node over
14249       // to the equivalent and operation.
14250       DAG.transferDbgValues(N0, And);
14251       return And;
14252     }
14253   }
14254 
14255   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
14256   // if either of the casts is not free.
14257   if (N0.getOpcode() == ISD::AND &&
14258       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14259       N0.getOperand(1).getOpcode() == ISD::Constant &&
14260       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
14261        !TLI.isZExtFree(N0.getValueType(), VT))) {
14262     SDValue X = N0.getOperand(0).getOperand(0);
14263     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
14264     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14265     return DAG.getNode(ISD::AND, DL, VT,
14266                        X, DAG.getConstant(Mask, DL, VT));
14267   }
14268 
14269   // Try to simplify (zext (load x)).
14270   if (SDValue foldedExt = tryToFoldExtOfLoad(
14271           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
14272           ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
14273     return foldedExt;
14274 
14275   if (SDValue foldedExt =
14276           tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14277                                    ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
14278     return foldedExt;
14279 
14280   // fold (zext (load x)) to multiple smaller zextloads.
14281   // Only on illegal but splittable vectors.
14282   if (SDValue ExtLoad = CombineExtLoad(N))
14283     return ExtLoad;
14284 
14285   // Try to simplify (zext (atomic_load x)).
14286   if (SDValue foldedExt =
14287           tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
14288     return foldedExt;
14289 
14290   // fold (zext (and/or/xor (load x), cst)) ->
14291   //      (and/or/xor (zextload x), (zext cst))
14292   // Unless (and (load x) cst) will match as a zextload already and has
14293   // additional users, or the zext is already free.
14294   if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
14295       isa<LoadSDNode>(N0.getOperand(0)) &&
14296       N0.getOperand(1).getOpcode() == ISD::Constant &&
14297       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14298     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14299     EVT MemVT = LN00->getMemoryVT();
14300     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
14301         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
14302       bool DoXform = true;
14303       SmallVector<SDNode*, 4> SetCCs;
14304       if (!N0.hasOneUse()) {
14305         if (N0.getOpcode() == ISD::AND) {
14306           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
14307           EVT LoadResultTy = AndC->getValueType(0);
14308           EVT ExtVT;
14309           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
14310             DoXform = false;
14311         }
14312       }
14313       if (DoXform)
14314         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14315                                           ISD::ZERO_EXTEND, SetCCs, TLI);
14316       if (DoXform) {
14317         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
14318                                          LN00->getChain(), LN00->getBasePtr(),
14319                                          LN00->getMemoryVT(),
14320                                          LN00->getMemOperand());
14321         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14322         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14323                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
14324         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14325         bool NoReplaceTruncAnd = !N0.hasOneUse();
14326         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14327         CombineTo(N, And);
14328         // If N0 has multiple uses, change other uses as well.
14329         if (NoReplaceTruncAnd) {
14330           SDValue TruncAnd =
14331               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
14332           CombineTo(N0.getNode(), TruncAnd);
14333         }
14334         if (NoReplaceTrunc) {
14335           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14336         } else {
14337           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14338                                       LN00->getValueType(0), ExtLoad);
14339           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14340         }
14341         return SDValue(N,0); // Return N so it doesn't get rechecked!
14342       }
14343     }
14344   }
14345 
14346   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14347   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14348   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
14349     return ZExtLoad;
14350 
14351   // Try to simplify (zext (zextload x)).
14352   if (SDValue foldedExt = tryToFoldExtOfExtload(
14353           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
14354     return foldedExt;
14355 
14356   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14357     return V;
14358 
14359   if (N0.getOpcode() == ISD::SETCC) {
14360     // Propagate fast-math-flags.
14361     SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14362 
14363     // Only do this before legalize for now.
14364     if (!LegalOperations && VT.isVector() &&
14365         N0.getValueType().getVectorElementType() == MVT::i1) {
14366       EVT N00VT = N0.getOperand(0).getValueType();
14367       if (getSetCCResultType(N00VT) == N0.getValueType())
14368         return SDValue();
14369 
14370       // We know that the # elements of the results is the same as the #
14371       // elements of the compare (and the # elements of the compare result for
14372       // that matter). Check to see that they are the same size. If so, we know
14373       // that the element size of the sext'd result matches the element size of
14374       // the compare operands.
14375       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
14376         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14377         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
14378                                      N0.getOperand(1), N0.getOperand(2));
14379         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
14380       }
14381 
14382       // If the desired elements are smaller or larger than the source
14383       // elements we can use a matching integer vector type and then
14384       // truncate/any extend followed by zext_in_reg.
14385       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14386       SDValue VsetCC =
14387           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
14388                       N0.getOperand(1), N0.getOperand(2));
14389       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
14390                                     N0.getValueType());
14391     }
14392 
14393     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
14394     EVT N0VT = N0.getValueType();
14395     EVT N00VT = N0.getOperand(0).getValueType();
14396     if (SDValue SCC = SimplifySelectCC(
14397             DL, N0.getOperand(0), N0.getOperand(1),
14398             DAG.getBoolConstant(true, DL, N0VT, N00VT),
14399             DAG.getBoolConstant(false, DL, N0VT, N00VT),
14400             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14401       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
14402   }
14403 
14404   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
14405   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
14406       !TLI.isZExtFree(N0, VT)) {
14407     SDValue ShVal = N0.getOperand(0);
14408     SDValue ShAmt = N0.getOperand(1);
14409     if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
14410       if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
14411         if (N0.getOpcode() == ISD::SHL) {
14412           // If the original shl may be shifting out bits, do not perform this
14413           // transformation.
14414           unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
14415                                    ShVal.getOperand(0).getValueSizeInBits();
14416           if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14417             // If the shift is too large, then see if we can deduce that the
14418             // shift is safe anyway.
14419 
14420             // Check if the bits being shifted out are known to be zero.
14421             KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
14422             if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
14423               return SDValue();
14424           }
14425         }
14426 
14427         // Ensure that the shift amount is wide enough for the shifted value.
14428         if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
14429           ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
14430 
14431         return DAG.getNode(N0.getOpcode(), DL, VT,
14432                            DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
14433       }
14434     }
14435   }
14436 
14437   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14438     return NewVSel;
14439 
14440   if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14441     return NewCtPop;
14442 
14443   if (SDValue V = widenAbs(N, DAG))
14444     return V;
14445 
14446   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14447     return Res;
14448 
14449   // CSE zext nneg with sext if the zext is not free.
14450   if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
14451     SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
14452     if (CSENode)
14453       return SDValue(CSENode, 0);
14454   }
14455 
14456   return SDValue();
14457 }
14458 
14459 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
14460   SDValue N0 = N->getOperand(0);
14461   EVT VT = N->getValueType(0);
14462   SDLoc DL(N);
14463 
14464   // aext(undef) = undef
14465   if (N0.isUndef())
14466     return DAG.getUNDEF(VT);
14467 
14468   if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14469     return Res;
14470 
14471   // fold (aext (aext x)) -> (aext x)
14472   // fold (aext (zext x)) -> (zext x)
14473   // fold (aext (sext x)) -> (sext x)
14474   if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
14475       N0.getOpcode() == ISD::SIGN_EXTEND) {
14476     SDNodeFlags Flags;
14477     if (N0.getOpcode() == ISD::ZERO_EXTEND)
14478       Flags.setNonNeg(N0->getFlags().hasNonNeg());
14479     return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
14480   }
14481 
14482   // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14483   // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14484   // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14485   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
14486       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG ||
14487       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
14488     return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14489 
14490   // fold (aext (truncate (load x))) -> (aext (smaller load x))
14491   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14492   if (N0.getOpcode() == ISD::TRUNCATE) {
14493     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14494       SDNode *oye = N0.getOperand(0).getNode();
14495       if (NarrowLoad.getNode() != N0.getNode()) {
14496         CombineTo(N0.getNode(), NarrowLoad);
14497         // CombineTo deleted the truncate, if needed, but not what's under it.
14498         AddToWorklist(oye);
14499       }
14500       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14501     }
14502   }
14503 
14504   // fold (aext (truncate x))
14505   if (N0.getOpcode() == ISD::TRUNCATE)
14506     return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14507 
14508   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14509   // if the trunc is not free.
14510   if (N0.getOpcode() == ISD::AND &&
14511       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14512       N0.getOperand(1).getOpcode() == ISD::Constant &&
14513       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14514     SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14515     SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14516     assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14517     return DAG.getNode(ISD::AND, DL, VT, X, Y);
14518   }
14519 
14520   // fold (aext (load x)) -> (aext (truncate (extload x)))
14521   // None of the supported targets knows how to perform load and any_ext
14522   // on vectors in one instruction, so attempt to fold to zext instead.
14523   if (VT.isVector()) {
14524     // Try to simplify (zext (load x)).
14525     if (SDValue foldedExt =
14526             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14527                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
14528       return foldedExt;
14529   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14530              ISD::isUNINDEXEDLoad(N0.getNode()) &&
14531              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14532     bool DoXform = true;
14533     SmallVector<SDNode *, 4> SetCCs;
14534     if (!N0.hasOneUse())
14535       DoXform =
14536           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14537     if (DoXform) {
14538       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14539       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14540                                        LN0->getBasePtr(), N0.getValueType(),
14541                                        LN0->getMemOperand());
14542       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14543       // If the load value is used only by N, replace it via CombineTo N.
14544       bool NoReplaceTrunc = N0.hasOneUse();
14545       CombineTo(N, ExtLoad);
14546       if (NoReplaceTrunc) {
14547         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14548         recursivelyDeleteUnusedNodes(LN0);
14549       } else {
14550         SDValue Trunc =
14551             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14552         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14553       }
14554       return SDValue(N, 0); // Return N so it doesn't get rechecked!
14555     }
14556   }
14557 
14558   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14559   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14560   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
14561   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14562       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14563     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14564     ISD::LoadExtType ExtType = LN0->getExtensionType();
14565     EVT MemVT = LN0->getMemoryVT();
14566     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14567       SDValue ExtLoad =
14568           DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14569                          MemVT, LN0->getMemOperand());
14570       CombineTo(N, ExtLoad);
14571       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14572       recursivelyDeleteUnusedNodes(LN0);
14573       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14574     }
14575   }
14576 
14577   if (N0.getOpcode() == ISD::SETCC) {
14578     // Propagate fast-math-flags.
14579     SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14580 
14581     // For vectors:
14582     // aext(setcc) -> vsetcc
14583     // aext(setcc) -> truncate(vsetcc)
14584     // aext(setcc) -> aext(vsetcc)
14585     // Only do this before legalize for now.
14586     if (VT.isVector() && !LegalOperations) {
14587       EVT N00VT = N0.getOperand(0).getValueType();
14588       if (getSetCCResultType(N00VT) == N0.getValueType())
14589         return SDValue();
14590 
14591       // We know that the # elements of the results is the same as the
14592       // # elements of the compare (and the # elements of the compare result
14593       // for that matter).  Check to see that they are the same size.  If so,
14594       // we know that the element size of the sext'd result matches the
14595       // element size of the compare operands.
14596       if (VT.getSizeInBits() == N00VT.getSizeInBits())
14597         return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14598                             cast<CondCodeSDNode>(N0.getOperand(2))->get());
14599 
14600       // If the desired elements are smaller or larger than the source
14601       // elements we can use a matching integer vector type and then
14602       // truncate/any extend
14603       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14604       SDValue VsetCC = DAG.getSetCC(
14605           DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14606           cast<CondCodeSDNode>(N0.getOperand(2))->get());
14607       return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14608     }
14609 
14610     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14611     if (SDValue SCC = SimplifySelectCC(
14612             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14613             DAG.getConstant(0, DL, VT),
14614             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14615       return SCC;
14616   }
14617 
14618   if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14619     return NewCtPop;
14620 
14621   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14622     return Res;
14623 
14624   return SDValue();
14625 }
14626 
14627 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14628   unsigned Opcode = N->getOpcode();
14629   SDValue N0 = N->getOperand(0);
14630   SDValue N1 = N->getOperand(1);
14631   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14632 
14633   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14634   if (N0.getOpcode() == Opcode &&
14635       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14636     return N0;
14637 
14638   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14639       N0.getOperand(0).getOpcode() == Opcode) {
14640     // We have an assert, truncate, assert sandwich. Make one stronger assert
14641     // by asserting on the smallest asserted type to the larger source type.
14642     // This eliminates the later assert:
14643     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14644     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14645     SDLoc DL(N);
14646     SDValue BigA = N0.getOperand(0);
14647     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14648     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14649     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14650     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14651                                     BigA.getOperand(0), MinAssertVTVal);
14652     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14653   }
14654 
14655   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14656   // than X. Just move the AssertZext in front of the truncate and drop the
14657   // AssertSExt.
14658   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14659       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
14660       Opcode == ISD::AssertZext) {
14661     SDValue BigA = N0.getOperand(0);
14662     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14663     if (AssertVT.bitsLT(BigA_AssertVT)) {
14664       SDLoc DL(N);
14665       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14666                                       BigA.getOperand(0), N1);
14667       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14668     }
14669   }
14670 
14671   return SDValue();
14672 }
14673 
14674 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14675   SDLoc DL(N);
14676 
14677   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14678   SDValue N0 = N->getOperand(0);
14679 
14680   // Fold (assertalign (assertalign x, AL0), AL1) ->
14681   // (assertalign x, max(AL0, AL1))
14682   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14683     return DAG.getAssertAlign(DL, N0.getOperand(0),
14684                               std::max(AL, AAN->getAlign()));
14685 
14686   // In rare cases, there are trivial arithmetic ops in source operands. Sink
14687   // this assert down to source operands so that those arithmetic ops could be
14688   // exposed to the DAG combining.
14689   switch (N0.getOpcode()) {
14690   default:
14691     break;
14692   case ISD::ADD:
14693   case ISD::SUB: {
14694     unsigned AlignShift = Log2(AL);
14695     SDValue LHS = N0.getOperand(0);
14696     SDValue RHS = N0.getOperand(1);
14697     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14698     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14699     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14700       if (LHSAlignShift < AlignShift)
14701         LHS = DAG.getAssertAlign(DL, LHS, AL);
14702       if (RHSAlignShift < AlignShift)
14703         RHS = DAG.getAssertAlign(DL, RHS, AL);
14704       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14705     }
14706     break;
14707   }
14708   }
14709 
14710   return SDValue();
14711 }
14712 
14713 /// If the result of a load is shifted/masked/truncated to an effectively
14714 /// narrower type, try to transform the load to a narrower type and/or
14715 /// use an extending load.
14716 SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14717   unsigned Opc = N->getOpcode();
14718 
14719   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
14720   SDValue N0 = N->getOperand(0);
14721   EVT VT = N->getValueType(0);
14722   EVT ExtVT = VT;
14723 
14724   // This transformation isn't valid for vector loads.
14725   if (VT.isVector())
14726     return SDValue();
14727 
14728   // The ShAmt variable is used to indicate that we've consumed a right
14729   // shift. I.e. we want to narrow the width of the load by skipping to load the
14730   // ShAmt least significant bits.
14731   unsigned ShAmt = 0;
14732   // A special case is when the least significant bits from the load are masked
14733   // away, but using an AND rather than a right shift. HasShiftedOffset is used
14734   // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14735   // the result.
14736   unsigned ShiftedOffset = 0;
14737   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14738   // extended to VT.
14739   if (Opc == ISD::SIGN_EXTEND_INREG) {
14740     ExtType = ISD::SEXTLOAD;
14741     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14742   } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14743     // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14744     // value, or it may be shifting a higher subword, half or byte into the
14745     // lowest bits.
14746 
14747     // Only handle shift with constant shift amount, and the shiftee must be a
14748     // load.
14749     auto *LN = dyn_cast<LoadSDNode>(N0);
14750     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14751     if (!N1C || !LN)
14752       return SDValue();
14753     // If the shift amount is larger than the memory type then we're not
14754     // accessing any of the loaded bytes.
14755     ShAmt = N1C->getZExtValue();
14756     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14757     if (MemoryWidth <= ShAmt)
14758       return SDValue();
14759     // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14760     ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14761     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14762     // If original load is a SEXTLOAD then we can't simply replace it by a
14763     // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14764     // followed by a ZEXT, but that is not handled at the moment). Similarly if
14765     // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14766     if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14767          LN->getExtensionType() == ISD::ZEXTLOAD) &&
14768         LN->getExtensionType() != ExtType)
14769       return SDValue();
14770   } else if (Opc == ISD::AND) {
14771     // An AND with a constant mask is the same as a truncate + zero-extend.
14772     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14773     if (!AndC)
14774       return SDValue();
14775 
14776     const APInt &Mask = AndC->getAPIntValue();
14777     unsigned ActiveBits = 0;
14778     if (Mask.isMask()) {
14779       ActiveBits = Mask.countr_one();
14780     } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14781       ShiftedOffset = ShAmt;
14782     } else {
14783       return SDValue();
14784     }
14785 
14786     ExtType = ISD::ZEXTLOAD;
14787     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14788   }
14789 
14790   // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14791   // a right shift. Here we redo some of those checks, to possibly adjust the
14792   // ExtVT even further based on "a masking AND". We could also end up here for
14793   // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14794   // need to be done here as well.
14795   if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14796     SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14797     // Bail out when the SRL has more than one use. This is done for historical
14798     // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14799     // check below? And maybe it could be non-profitable to do the transform in
14800     // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14801     // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14802     if (!SRL.hasOneUse())
14803       return SDValue();
14804 
14805     // Only handle shift with constant shift amount, and the shiftee must be a
14806     // load.
14807     auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14808     auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14809     if (!SRL1C || !LN)
14810       return SDValue();
14811 
14812     // If the shift amount is larger than the input type then we're not
14813     // accessing any of the loaded bytes.  If the load was a zextload/extload
14814     // then the result of the shift+trunc is zero/undef (handled elsewhere).
14815     ShAmt = SRL1C->getZExtValue();
14816     uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14817     if (ShAmt >= MemoryWidth)
14818       return SDValue();
14819 
14820     // Because a SRL must be assumed to *need* to zero-extend the high bits
14821     // (as opposed to anyext the high bits), we can't combine the zextload
14822     // lowering of SRL and an sextload.
14823     if (LN->getExtensionType() == ISD::SEXTLOAD)
14824       return SDValue();
14825 
14826     // Avoid reading outside the memory accessed by the original load (could
14827     // happened if we only adjust the load base pointer by ShAmt). Instead we
14828     // try to narrow the load even further. The typical scenario here is:
14829     //   (i64 (truncate (i96 (srl (load x), 64)))) ->
14830     //     (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14831     if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14832       // Don't replace sextload by zextload.
14833       if (ExtType == ISD::SEXTLOAD)
14834         return SDValue();
14835       // Narrow the load.
14836       ExtType = ISD::ZEXTLOAD;
14837       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14838     }
14839 
14840     // If the SRL is only used by a masking AND, we may be able to adjust
14841     // the ExtVT to make the AND redundant.
14842     SDNode *Mask = *(SRL->user_begin());
14843     if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14844         isa<ConstantSDNode>(Mask->getOperand(1))) {
14845       unsigned Offset, ActiveBits;
14846       const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14847       if (ShiftMask.isMask()) {
14848         EVT MaskedVT =
14849             EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14850         // If the mask is smaller, recompute the type.
14851         if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14852             TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14853           ExtVT = MaskedVT;
14854       } else if (ExtType == ISD::ZEXTLOAD &&
14855                  ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14856                  (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14857         EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14858         // If the mask is shifted we can use a narrower load and a shl to insert
14859         // the trailing zeros.
14860         if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14861             TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14862           ExtVT = MaskedVT;
14863           ShAmt = Offset + ShAmt;
14864           ShiftedOffset = Offset;
14865         }
14866       }
14867     }
14868 
14869     N0 = SRL.getOperand(0);
14870   }
14871 
14872   // If the load is shifted left (and the result isn't shifted back right), we
14873   // can fold a truncate through the shift. The typical scenario is that N
14874   // points at a TRUNCATE here so the attempted fold is:
14875   //   (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14876   // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14877   unsigned ShLeftAmt = 0;
14878   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14879       ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
14880     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14881       ShLeftAmt = N01->getZExtValue();
14882       N0 = N0.getOperand(0);
14883     }
14884   }
14885 
14886   // If we haven't found a load, we can't narrow it.
14887   if (!isa<LoadSDNode>(N0))
14888     return SDValue();
14889 
14890   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14891   // Reducing the width of a volatile load is illegal.  For atomics, we may be
14892   // able to reduce the width provided we never widen again. (see D66309)
14893   if (!LN0->isSimple() ||
14894       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14895     return SDValue();
14896 
14897   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14898     unsigned LVTStoreBits =
14899         LN0->getMemoryVT().getStoreSizeInBits().getFixedValue();
14900     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14901     return LVTStoreBits - EVTStoreBits - ShAmt;
14902   };
14903 
14904   // We need to adjust the pointer to the load by ShAmt bits in order to load
14905   // the correct bytes.
14906   unsigned PtrAdjustmentInBits =
14907       DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14908 
14909   uint64_t PtrOff = PtrAdjustmentInBits / 8;
14910   SDLoc DL(LN0);
14911   // The original load itself didn't wrap, so an offset within it doesn't.
14912   SDValue NewPtr =
14913       DAG.getMemBasePlusOffset(LN0->getBasePtr(), TypeSize::getFixed(PtrOff),
14914                                DL, SDNodeFlags::NoUnsignedWrap);
14915   AddToWorklist(NewPtr.getNode());
14916 
14917   SDValue Load;
14918   if (ExtType == ISD::NON_EXTLOAD)
14919     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14920                        LN0->getPointerInfo().getWithOffset(PtrOff),
14921                        LN0->getOriginalAlign(),
14922                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14923   else
14924     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14925                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14926                           LN0->getOriginalAlign(),
14927                           LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14928 
14929   // Replace the old load's chain with the new load's chain.
14930   WorklistRemover DeadNodes(*this);
14931   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14932 
14933   // Shift the result left, if we've swallowed a left shift.
14934   SDValue Result = Load;
14935   if (ShLeftAmt != 0) {
14936     // If the shift amount is as large as the result size (but, presumably,
14937     // no larger than the source) then the useful bits of the result are
14938     // zero; we can't simply return the shortened shift, because the result
14939     // of that operation is undefined.
14940     if (ShLeftAmt >= VT.getScalarSizeInBits())
14941       Result = DAG.getConstant(0, DL, VT);
14942     else
14943       Result = DAG.getNode(ISD::SHL, DL, VT, Result,
14944                            DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
14945   }
14946 
14947   if (ShiftedOffset != 0) {
14948     // We're using a shifted mask, so the load now has an offset. This means
14949     // that data has been loaded into the lower bytes than it would have been
14950     // before, so we need to shl the loaded data into the correct position in the
14951     // register.
14952     SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14953     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14954     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14955   }
14956 
14957   // Return the new loaded value.
14958   return Result;
14959 }
14960 
14961 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14962   SDValue N0 = N->getOperand(0);
14963   SDValue N1 = N->getOperand(1);
14964   EVT VT = N->getValueType(0);
14965   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14966   unsigned VTBits = VT.getScalarSizeInBits();
14967   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14968   SDLoc DL(N);
14969 
14970   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14971   if (N0.isUndef())
14972     return DAG.getConstant(0, DL, VT);
14973 
14974   // fold (sext_in_reg c1) -> c1
14975   if (SDValue C =
14976           DAG.FoldConstantArithmetic(ISD::SIGN_EXTEND_INREG, DL, VT, {N0, N1}))
14977     return C;
14978 
14979   // If the input is already sign extended, just drop the extension.
14980   if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14981     return N0;
14982 
14983   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14984   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14985       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14986     return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
14987 
14988   // fold (sext_in_reg (sext x)) -> (sext x)
14989   // fold (sext_in_reg (aext x)) -> (sext x)
14990   // if x is small enough or if we know that x has more than 1 sign bit and the
14991   // sign_extend_inreg is extending from one of them.
14992   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14993     SDValue N00 = N0.getOperand(0);
14994     unsigned N00Bits = N00.getScalarValueSizeInBits();
14995     if ((N00Bits <= ExtVTBits ||
14996          DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14997         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14998       return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
14999   }
15000 
15001   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15002   // if x is small enough or if we know that x has more than 1 sign bit and the
15003   // sign_extend_inreg is extending from one of them.
15004   if (ISD::isExtVecInRegOpcode(N0.getOpcode())) {
15005     SDValue N00 = N0.getOperand(0);
15006     unsigned N00Bits = N00.getScalarValueSizeInBits();
15007     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
15008     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
15009     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15010     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
15011     if ((N00Bits == ExtVTBits ||
15012          (!IsZext && (N00Bits < ExtVTBits ||
15013                       DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15014         (!LegalOperations ||
15015          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
15016       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15017   }
15018 
15019   // fold (sext_in_reg (zext x)) -> (sext x)
15020   // iff we are extending the source sign bit.
15021   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15022     SDValue N00 = N0.getOperand(0);
15023     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15024         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15025       return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15026   }
15027 
15028   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15029   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15030     return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15031 
15032   // fold operands of sext_in_reg based on knowledge that the top bits are not
15033   // demanded.
15034   if (SimplifyDemandedBits(SDValue(N, 0)))
15035     return SDValue(N, 0);
15036 
15037   // fold (sext_in_reg (load x)) -> (smaller sextload x)
15038   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15039   if (SDValue NarrowLoad = reduceLoadWidth(N))
15040     return NarrowLoad;
15041 
15042   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15043   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15044   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15045   if (N0.getOpcode() == ISD::SRL) {
15046     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15047       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15048         // We can turn this into an SRA iff the input to the SRL is already sign
15049         // extended enough.
15050         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15051         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15052           return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15053                              N0.getOperand(1));
15054       }
15055   }
15056 
15057   // fold (sext_inreg (extload x)) -> (sextload x)
15058   // If sextload is not supported by target, we can only do the combine when
15059   // load has one use. Doing otherwise can block folding the extload with other
15060   // extends that the target does support.
15061   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
15062       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15063       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15064         N0.hasOneUse()) ||
15065        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15066     auto *LN0 = cast<LoadSDNode>(N0);
15067     SDValue ExtLoad =
15068         DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15069                        LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15070     CombineTo(N, ExtLoad);
15071     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15072     AddToWorklist(ExtLoad.getNode());
15073     return SDValue(N, 0); // Return N so it doesn't get rechecked!
15074   }
15075 
15076   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15077   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
15078       N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15079       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15080        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15081     auto *LN0 = cast<LoadSDNode>(N0);
15082     SDValue ExtLoad =
15083         DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15084                        LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15085     CombineTo(N, ExtLoad);
15086     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15087     return SDValue(N, 0); // Return N so it doesn't get rechecked!
15088   }
15089 
15090   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15091   // ignore it if the masked load is already sign extended
15092   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
15093     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15094         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15095         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
15096       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
15097           VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15098           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15099           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15100       CombineTo(N, ExtMaskedLoad);
15101       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
15102       return SDValue(N, 0); // Return N so it doesn't get rechecked!
15103     }
15104   }
15105 
15106   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15107   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
15108     if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15109         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
15110       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
15111                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
15112 
15113       SDValue ExtLoad = DAG.getMaskedGather(
15114           DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15115           GN0->getIndexType(), ISD::SEXTLOAD);
15116 
15117       CombineTo(N, ExtLoad);
15118       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15119       AddToWorklist(ExtLoad.getNode());
15120       return SDValue(N, 0); // Return N so it doesn't get rechecked!
15121     }
15122   }
15123 
15124   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
15125   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
15126     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
15127                                            N0.getOperand(1), false))
15128       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
15129   }
15130 
15131   // Fold (iM_signext_inreg
15132   //        (extract_subvector (zext|anyext|sext iN_v to _) _)
15133   //        from iN)
15134   //      -> (extract_subvector (signext iN_v to iM))
15135   if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
15136       ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
15137     SDValue InnerExt = N0.getOperand(0);
15138     EVT InnerExtVT = InnerExt->getValueType(0);
15139     SDValue Extendee = InnerExt->getOperand(0);
15140 
15141     if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
15142         (!LegalOperations ||
15143          TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
15144       SDValue SignExtExtendee =
15145           DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
15146       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
15147                          N0.getOperand(1));
15148     }
15149   }
15150 
15151   return SDValue();
15152 }
15153 
15154 static SDValue foldExtendVectorInregToExtendOfSubvector(
15155     SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
15156     bool LegalOperations) {
15157   unsigned InregOpcode = N->getOpcode();
15158   unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
15159 
15160   SDValue Src = N->getOperand(0);
15161   EVT VT = N->getValueType(0);
15162   EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
15163                                Src.getValueType().getVectorElementType(),
15164                                VT.getVectorElementCount());
15165 
15166   assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
15167          "Expected EXTEND_VECTOR_INREG dag node in input!");
15168 
15169   // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15170   // FIXME: one-use check may be overly restrictive
15171   if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
15172     return SDValue();
15173 
15174   // Profitability check: we must be extending exactly one of it's operands.
15175   // FIXME: this is probably overly restrictive.
15176   Src = Src.getOperand(0);
15177   if (Src.getValueType() != SrcVT)
15178     return SDValue();
15179 
15180   if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
15181     return SDValue();
15182 
15183   return DAG.getNode(Opcode, DL, VT, Src);
15184 }
15185 
15186 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
15187   SDValue N0 = N->getOperand(0);
15188   EVT VT = N->getValueType(0);
15189   SDLoc DL(N);
15190 
15191   if (N0.isUndef()) {
15192     // aext_vector_inreg(undef) = undef because the top bits are undefined.
15193     // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
15194     return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
15195                ? DAG.getUNDEF(VT)
15196                : DAG.getConstant(0, DL, VT);
15197   }
15198 
15199   if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15200     return Res;
15201 
15202   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
15203     return SDValue(N, 0);
15204 
15205   if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, DL, TLI, DAG,
15206                                                            LegalOperations))
15207     return R;
15208 
15209   return SDValue();
15210 }
15211 
15212 SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
15213   EVT VT = N->getValueType(0);
15214   SDValue N0 = N->getOperand(0);
15215 
15216   SDValue FPVal;
15217   if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
15218       DAG.getTargetLoweringInfo().shouldConvertFpToSat(
15219           ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
15220     return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
15221                        DAG.getValueType(VT.getScalarType()));
15222 
15223   return SDValue();
15224 }
15225 
15226 /// Detect patterns of truncation with unsigned saturation:
15227 ///
15228 /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
15229 /// Return the source value x to be truncated or SDValue() if the pattern was
15230 /// not matched.
15231 ///
15232 static SDValue detectUSatUPattern(SDValue In, EVT VT) {
15233   unsigned NumDstBits = VT.getScalarSizeInBits();
15234   unsigned NumSrcBits = In.getScalarValueSizeInBits();
15235   // Saturation with truncation. We truncate from InVT to VT.
15236   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15237 
15238   SDValue Min;
15239   APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15240   if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
15241     return Min;
15242 
15243   return SDValue();
15244 }
15245 
15246 /// Detect patterns of truncation with signed saturation:
15247 /// (truncate (smin (smax (x, signed_min_of_dest_type),
15248 ///                  signed_max_of_dest_type)) to dest_type)
15249 /// or:
15250 /// (truncate (smax (smin (x, signed_max_of_dest_type),
15251 ///                  signed_min_of_dest_type)) to dest_type).
15252 ///
15253 /// Return the source value to be truncated or SDValue() if the pattern was not
15254 /// matched.
15255 static SDValue detectSSatSPattern(SDValue In, EVT VT) {
15256   unsigned NumDstBits = VT.getScalarSizeInBits();
15257   unsigned NumSrcBits = In.getScalarValueSizeInBits();
15258   // Saturation with truncation. We truncate from InVT to VT.
15259   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15260 
15261   SDValue Val;
15262   APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
15263   APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
15264 
15265   if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
15266                           m_SpecificInt(SignedMax))))
15267     return Val;
15268 
15269   if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
15270                           m_SpecificInt(SignedMin))))
15271     return Val;
15272 
15273   return SDValue();
15274 }
15275 
15276 /// Detect patterns of truncation with unsigned saturation:
15277 static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
15278                                   const SDLoc &DL) {
15279   unsigned NumDstBits = VT.getScalarSizeInBits();
15280   unsigned NumSrcBits = In.getScalarValueSizeInBits();
15281   // Saturation with truncation. We truncate from InVT to VT.
15282   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15283 
15284   SDValue Val;
15285   APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15286   // Min == 0, Max is unsigned max of destination type.
15287   if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
15288                           m_Zero())))
15289     return Val;
15290 
15291   if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
15292                           m_SpecificInt(UnsignedMax))))
15293     return Val;
15294 
15295   if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
15296                           m_SpecificInt(UnsignedMax))))
15297     return Val;
15298 
15299   return SDValue();
15300 }
15301 
15302 static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
15303                                SDLoc &DL, const TargetLowering &TLI,
15304                                SelectionDAG &DAG) {
15305   auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
15306     return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
15307             TLI.isTypeDesirableForOp(Opc, VT));
15308   };
15309 
15310   if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
15311     if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
15312       if (SDValue SSatVal = detectSSatSPattern(Src, VT))
15313         return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
15314     if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15315       if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15316         return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15317   } else if (Src.getOpcode() == ISD::UMIN) {
15318     if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15319       if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15320         return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15321     if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
15322       if (SDValue USatVal = detectUSatUPattern(Src, VT))
15323         return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
15324   }
15325 
15326   return SDValue();
15327 }
15328 
15329 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
15330   SDValue N0 = N->getOperand(0);
15331   EVT VT = N->getValueType(0);
15332   EVT SrcVT = N0.getValueType();
15333   bool isLE = DAG.getDataLayout().isLittleEndian();
15334   SDLoc DL(N);
15335 
15336   // trunc(undef) = undef
15337   if (N0.isUndef())
15338     return DAG.getUNDEF(VT);
15339 
15340   // fold (truncate (truncate x)) -> (truncate x)
15341   if (N0.getOpcode() == ISD::TRUNCATE)
15342     return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15343 
15344   // fold saturated truncate
15345   if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
15346     return SaturatedTR;
15347 
15348   // fold (truncate c1) -> c1
15349   if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
15350     return C;
15351 
15352   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
15353   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
15354       N0.getOpcode() == ISD::SIGN_EXTEND ||
15355       N0.getOpcode() == ISD::ANY_EXTEND) {
15356     // if the source is smaller than the dest, we still need an extend.
15357     if (N0.getOperand(0).getValueType().bitsLT(VT))
15358       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15359     // if the source is larger than the dest, than we just need the truncate.
15360     if (N0.getOperand(0).getValueType().bitsGT(VT))
15361       return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15362     // if the source and dest are the same type, we can drop both the extend
15363     // and the truncate.
15364     return N0.getOperand(0);
15365   }
15366 
15367   // Try to narrow a truncate-of-sext_in_reg to the destination type:
15368   // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
15369   if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15370       N0.hasOneUse()) {
15371     SDValue X = N0.getOperand(0);
15372     SDValue ExtVal = N0.getOperand(1);
15373     EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
15374     if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
15375       SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
15376       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
15377     }
15378   }
15379 
15380   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
15381   if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
15382     return SDValue();
15383 
15384   // Fold extract-and-trunc into a narrow extract. For example:
15385   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
15386   //   i32 y = TRUNCATE(i64 x)
15387   //        -- becomes --
15388   //   v16i8 b = BITCAST (v2i64 val)
15389   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
15390   //
15391   // Note: We only run this optimization after type legalization (which often
15392   // creates this pattern) and before operation legalization after which
15393   // we need to be more careful about the vector instructions that we generate.
15394   if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
15395       N0->hasOneUse()) {
15396     EVT TrTy = N->getValueType(0);
15397     SDValue Src = N0;
15398 
15399     // Check for cases where we shift down an upper element before truncation.
15400     int EltOffset = 0;
15401     if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
15402       if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
15403         if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
15404           Src = Src.getOperand(0);
15405           EltOffset = *ShAmt / TrTy.getSizeInBits();
15406         }
15407       }
15408     }
15409 
15410     if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15411       EVT VecTy = Src.getOperand(0).getValueType();
15412       EVT ExTy = Src.getValueType();
15413 
15414       auto EltCnt = VecTy.getVectorElementCount();
15415       unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
15416       auto NewEltCnt = EltCnt * SizeRatio;
15417 
15418       EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
15419       assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
15420 
15421       SDValue EltNo = Src->getOperand(1);
15422       if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
15423         int Elt = EltNo->getAsZExtVal();
15424         int Index = isLE ? (Elt * SizeRatio + EltOffset)
15425                          : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
15426         return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
15427                            DAG.getBitcast(NVT, Src.getOperand(0)),
15428                            DAG.getVectorIdxConstant(Index, DL));
15429       }
15430     }
15431   }
15432 
15433   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
15434   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
15435       TLI.isTruncateFree(SrcVT, VT)) {
15436     if (!LegalOperations ||
15437         (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
15438          TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
15439       SDLoc SL(N0);
15440       SDValue Cond = N0.getOperand(0);
15441       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
15442       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
15443       return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
15444     }
15445   }
15446 
15447   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
15448   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15449       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
15450       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
15451     SDValue Amt = N0.getOperand(1);
15452     KnownBits Known = DAG.computeKnownBits(Amt);
15453     unsigned Size = VT.getScalarSizeInBits();
15454     if (Known.countMaxActiveBits() <= Log2_32(Size)) {
15455       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
15456       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15457       if (AmtVT != Amt.getValueType()) {
15458         Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
15459         AddToWorklist(Amt.getNode());
15460       }
15461       return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
15462     }
15463   }
15464 
15465   if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
15466     return V;
15467 
15468   if (SDValue ABD = foldABSToABD(N, DL))
15469     return ABD;
15470 
15471   // Attempt to pre-truncate BUILD_VECTOR sources.
15472   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
15473       N0.hasOneUse() &&
15474       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
15475       // Avoid creating illegal types if running after type legalizer.
15476       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
15477     EVT SVT = VT.getScalarType();
15478     SmallVector<SDValue, 8> TruncOps;
15479     for (const SDValue &Op : N0->op_values()) {
15480       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
15481       TruncOps.push_back(TruncOp);
15482     }
15483     return DAG.getBuildVector(VT, DL, TruncOps);
15484   }
15485 
15486   // trunc (splat_vector x) -> splat_vector (trunc x)
15487   if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
15488       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
15489       (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
15490     EVT SVT = VT.getScalarType();
15491     return DAG.getSplatVector(
15492         VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
15493   }
15494 
15495   // Fold a series of buildvector, bitcast, and truncate if possible.
15496   // For example fold
15497   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
15498   //   (2xi32 (buildvector x, y)).
15499   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
15500       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15501       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
15502       N0.getOperand(0).hasOneUse()) {
15503     SDValue BuildVect = N0.getOperand(0);
15504     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
15505     EVT TruncVecEltTy = VT.getVectorElementType();
15506 
15507     // Check that the element types match.
15508     if (BuildVectEltTy == TruncVecEltTy) {
15509       // Now we only need to compute the offset of the truncated elements.
15510       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
15511       unsigned TruncVecNumElts = VT.getVectorNumElements();
15512       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15513       unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
15514 
15515       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
15516              "Invalid number of elements");
15517 
15518       SmallVector<SDValue, 8> Opnds;
15519       for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
15520            i += TruncEltOffset)
15521         Opnds.push_back(BuildVect.getOperand(i));
15522 
15523       return DAG.getBuildVector(VT, DL, Opnds);
15524     }
15525   }
15526 
15527   // fold (truncate (load x)) -> (smaller load x)
15528   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
15529   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
15530     if (SDValue Reduced = reduceLoadWidth(N))
15531       return Reduced;
15532 
15533     // Handle the case where the truncated result is at least as wide as the
15534     // loaded type.
15535     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
15536       auto *LN0 = cast<LoadSDNode>(N0);
15537       if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
15538         SDValue NewLoad = DAG.getExtLoad(
15539             LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
15540             LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
15541         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
15542         return NewLoad;
15543       }
15544     }
15545   }
15546 
15547   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
15548   // where ... are all 'undef'.
15549   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
15550     SmallVector<EVT, 8> VTs;
15551     SDValue V;
15552     unsigned Idx = 0;
15553     unsigned NumDefs = 0;
15554 
15555     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
15556       SDValue X = N0.getOperand(i);
15557       if (!X.isUndef()) {
15558         V = X;
15559         Idx = i;
15560         NumDefs++;
15561       }
15562       // Stop if more than one members are non-undef.
15563       if (NumDefs > 1)
15564         break;
15565 
15566       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
15567                                      VT.getVectorElementType(),
15568                                      X.getValueType().getVectorElementCount()));
15569     }
15570 
15571     if (NumDefs == 0)
15572       return DAG.getUNDEF(VT);
15573 
15574     if (NumDefs == 1) {
15575       assert(V.getNode() && "The single defined operand is empty!");
15576       SmallVector<SDValue, 8> Opnds;
15577       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
15578         if (i != Idx) {
15579           Opnds.push_back(DAG.getUNDEF(VTs[i]));
15580           continue;
15581         }
15582         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
15583         AddToWorklist(NV.getNode());
15584         Opnds.push_back(NV);
15585       }
15586       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
15587     }
15588   }
15589 
15590   // Fold truncate of a bitcast of a vector to an extract of the low vector
15591   // element.
15592   //
15593   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
15594   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
15595     SDValue VecSrc = N0.getOperand(0);
15596     EVT VecSrcVT = VecSrc.getValueType();
15597     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
15598         (!LegalOperations ||
15599          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
15600       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
15601       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
15602                          DAG.getVectorIdxConstant(Idx, DL));
15603     }
15604   }
15605 
15606   // Simplify the operands using demanded-bits information.
15607   if (SimplifyDemandedBits(SDValue(N, 0)))
15608     return SDValue(N, 0);
15609 
15610   // fold (truncate (extract_subvector(ext x))) ->
15611   //      (extract_subvector x)
15612   // TODO: This can be generalized to cover cases where the truncate and extract
15613   // do not fully cancel each other out.
15614   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
15615     SDValue N00 = N0.getOperand(0);
15616     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
15617         N00.getOpcode() == ISD::ZERO_EXTEND ||
15618         N00.getOpcode() == ISD::ANY_EXTEND) {
15619       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15620           VT.getVectorElementType())
15621         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15622                            N00.getOperand(0), N0.getOperand(1));
15623     }
15624   }
15625 
15626   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15627     return NewVSel;
15628 
15629   // Narrow a suitable binary operation with a non-opaque constant operand by
15630   // moving it ahead of the truncate. This is limited to pre-legalization
15631   // because targets may prefer a wider type during later combines and invert
15632   // this transform.
15633   switch (N0.getOpcode()) {
15634   case ISD::ADD:
15635   case ISD::SUB:
15636   case ISD::MUL:
15637   case ISD::AND:
15638   case ISD::OR:
15639   case ISD::XOR:
15640     if (!LegalOperations && N0.hasOneUse() &&
15641         (isConstantOrConstantVector(N0.getOperand(0), true) ||
15642          isConstantOrConstantVector(N0.getOperand(1), true))) {
15643       // TODO: We already restricted this to pre-legalization, but for vectors
15644       // we are extra cautious to not create an unsupported operation.
15645       // Target-specific changes are likely needed to avoid regressions here.
15646       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15647         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15648         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15649         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15650       }
15651     }
15652     break;
15653   case ISD::ADDE:
15654   case ISD::UADDO_CARRY:
15655     // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15656     // (trunc uaddo_carry(X, Y, Carry)) ->
15657     //     (uaddo_carry trunc(X), trunc(Y), Carry)
15658     // When the adde's carry is not used.
15659     // We only do for uaddo_carry before legalize operation
15660     if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15661          TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15662         N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15663       SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15664       SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15665       SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15666       return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15667     }
15668     break;
15669   case ISD::USUBSAT:
15670     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15671     // enough to know that the upper bits are zero we must ensure that we don't
15672     // introduce an extra truncate.
15673     if (!LegalOperations && N0.hasOneUse() &&
15674         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
15675         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
15676             VT.getScalarSizeInBits() &&
15677         hasOperation(N0.getOpcode(), VT)) {
15678       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15679                                  DAG, DL);
15680     }
15681     break;
15682   }
15683 
15684   return SDValue();
15685 }
15686 
15687 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15688   SDValue Elt = N->getOperand(i);
15689   if (Elt.getOpcode() != ISD::MERGE_VALUES)
15690     return Elt.getNode();
15691   return Elt.getOperand(Elt.getResNo()).getNode();
15692 }
15693 
15694 /// build_pair (load, load) -> load
15695 /// if load locations are consecutive.
15696 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15697   assert(N->getOpcode() == ISD::BUILD_PAIR);
15698 
15699   auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15700   auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15701 
15702   // A BUILD_PAIR is always having the least significant part in elt 0 and the
15703   // most significant part in elt 1. So when combining into one large load, we
15704   // need to consider the endianness.
15705   if (DAG.getDataLayout().isBigEndian())
15706     std::swap(LD1, LD2);
15707 
15708   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15709       !LD1->hasOneUse() || !LD2->hasOneUse() ||
15710       LD1->getAddressSpace() != LD2->getAddressSpace())
15711     return SDValue();
15712 
15713   unsigned LD1Fast = 0;
15714   EVT LD1VT = LD1->getValueType(0);
15715   unsigned LD1Bytes = LD1VT.getStoreSize();
15716   if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15717       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15718       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15719                              *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15720     return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15721                        LD1->getPointerInfo(), LD1->getAlign());
15722 
15723   return SDValue();
15724 }
15725 
15726 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15727   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15728   // and Lo parts; on big-endian machines it doesn't.
15729   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15730 }
15731 
15732 SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15733                                           const TargetLowering &TLI) {
15734   // If this is not a bitcast to an FP type or if the target doesn't have
15735   // IEEE754-compliant FP logic, we're done.
15736   EVT VT = N->getValueType(0);
15737   SDValue N0 = N->getOperand(0);
15738   EVT SourceVT = N0.getValueType();
15739 
15740   if (!VT.isFloatingPoint())
15741     return SDValue();
15742 
15743   // TODO: Handle cases where the integer constant is a different scalar
15744   // bitwidth to the FP.
15745   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15746     return SDValue();
15747 
15748   unsigned FPOpcode;
15749   APInt SignMask;
15750   switch (N0.getOpcode()) {
15751   case ISD::AND:
15752     FPOpcode = ISD::FABS;
15753     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15754     break;
15755   case ISD::XOR:
15756     FPOpcode = ISD::FNEG;
15757     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15758     break;
15759   case ISD::OR:
15760     FPOpcode = ISD::FABS;
15761     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15762     break;
15763   default:
15764     return SDValue();
15765   }
15766 
15767   if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15768     return SDValue();
15769 
15770   // This needs to be the inverse of logic in foldSignChangeInBitcast.
15771   // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15772   // removing this would require more changes.
15773   auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15774     if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
15775       return true;
15776 
15777     return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15778   };
15779 
15780   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15781   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15782   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15783   //   fneg (fabs X)
15784   SDValue LogicOp0 = N0.getOperand(0);
15785   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15786   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15787       IsBitCastOrFree(LogicOp0, VT)) {
15788     SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15789     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15790     NumFPLogicOpsConv++;
15791     if (N0.getOpcode() == ISD::OR)
15792       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15793     return FPOp;
15794   }
15795 
15796   return SDValue();
15797 }
15798 
15799 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15800   SDValue N0 = N->getOperand(0);
15801   EVT VT = N->getValueType(0);
15802 
15803   if (N0.isUndef())
15804     return DAG.getUNDEF(VT);
15805 
15806   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15807   // Only do this before legalize types, unless both types are integer and the
15808   // scalar type is legal. Only do this before legalize ops, since the target
15809   // maybe depending on the bitcast.
15810   // First check to see if this is all constant.
15811   // TODO: Support FP bitcasts after legalize types.
15812   if (VT.isVector() &&
15813       (!LegalTypes ||
15814        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15815         TLI.isTypeLegal(VT.getVectorElementType()))) &&
15816       N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15817       cast<BuildVectorSDNode>(N0)->isConstant())
15818     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15819                                              VT.getVectorElementType());
15820 
15821   // If the input is a constant, let getNode fold it.
15822   if (isIntOrFPConstant(N0)) {
15823     // If we can't allow illegal operations, we need to check that this is just
15824     // a fp -> int or int -> conversion and that the resulting operation will
15825     // be legal.
15826     if (!LegalOperations ||
15827         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15828          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
15829         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15830          TLI.isOperationLegal(ISD::Constant, VT))) {
15831       SDValue C = DAG.getBitcast(VT, N0);
15832       if (C.getNode() != N)
15833         return C;
15834     }
15835   }
15836 
15837   // (conv (conv x, t1), t2) -> (conv x, t2)
15838   if (N0.getOpcode() == ISD::BITCAST)
15839     return DAG.getBitcast(VT, N0.getOperand(0));
15840 
15841   // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15842   // iff the current bitwise logicop type isn't legal
15843   if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15844       !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15845     auto IsFreeBitcast = [VT](SDValue V) {
15846       return (V.getOpcode() == ISD::BITCAST &&
15847               V.getOperand(0).getValueType() == VT) ||
15848              (ISD::isBuildVectorOfConstantSDNodes(V.getNode()) &&
15849               V->hasOneUse());
15850     };
15851     if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15852       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15853                          DAG.getBitcast(VT, N0.getOperand(0)),
15854                          DAG.getBitcast(VT, N0.getOperand(1)));
15855   }
15856 
15857   // fold (conv (load x)) -> (load (conv*)x)
15858   // If the resultant load doesn't need a higher alignment than the original!
15859   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15860       // Do not remove the cast if the types differ in endian layout.
15861       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
15862           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15863       // If the load is volatile, we only want to change the load type if the
15864       // resulting load is legal. Otherwise we might increase the number of
15865       // memory accesses. We don't care if the original type was legal or not
15866       // as we assume software couldn't rely on the number of accesses of an
15867       // illegal type.
15868       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15869        TLI.isOperationLegal(ISD::LOAD, VT))) {
15870     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15871 
15872     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15873                                     *LN0->getMemOperand())) {
15874       SDValue Load =
15875           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15876                       LN0->getMemOperand());
15877       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15878       return Load;
15879     }
15880   }
15881 
15882   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15883     return V;
15884 
15885   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15886   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15887   //
15888   // For ppc_fp128:
15889   // fold (bitcast (fneg x)) ->
15890   //     flipbit = signbit
15891   //     (xor (bitcast x) (build_pair flipbit, flipbit))
15892   //
15893   // fold (bitcast (fabs x)) ->
15894   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
15895   //     (xor (bitcast x) (build_pair flipbit, flipbit))
15896   // This often reduces constant pool loads.
15897   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15898        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15899       N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15900       !N0.getValueType().isVector()) {
15901     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15902     AddToWorklist(NewConv.getNode());
15903 
15904     SDLoc DL(N);
15905     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15906       assert(VT.getSizeInBits() == 128);
15907       SDValue SignBit = DAG.getConstant(
15908           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15909       SDValue FlipBit;
15910       if (N0.getOpcode() == ISD::FNEG) {
15911         FlipBit = SignBit;
15912         AddToWorklist(FlipBit.getNode());
15913       } else {
15914         assert(N0.getOpcode() == ISD::FABS);
15915         SDValue Hi =
15916             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15917                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
15918                                               SDLoc(NewConv)));
15919         AddToWorklist(Hi.getNode());
15920         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15921         AddToWorklist(FlipBit.getNode());
15922       }
15923       SDValue FlipBits =
15924           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15925       AddToWorklist(FlipBits.getNode());
15926       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15927     }
15928     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15929     if (N0.getOpcode() == ISD::FNEG)
15930       return DAG.getNode(ISD::XOR, DL, VT,
15931                          NewConv, DAG.getConstant(SignBit, DL, VT));
15932     assert(N0.getOpcode() == ISD::FABS);
15933     return DAG.getNode(ISD::AND, DL, VT,
15934                        NewConv, DAG.getConstant(~SignBit, DL, VT));
15935   }
15936 
15937   // fold (bitconvert (fcopysign cst, x)) ->
15938   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
15939   // Note that we don't handle (copysign x, cst) because this can always be
15940   // folded to an fneg or fabs.
15941   //
15942   // For ppc_fp128:
15943   // fold (bitcast (fcopysign cst, x)) ->
15944   //     flipbit = (and (extract_element
15945   //                     (xor (bitcast cst), (bitcast x)), 0),
15946   //                    signbit)
15947   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
15948   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15949       isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15950       !VT.isVector()) {
15951     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15952     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15953     if (isTypeLegal(IntXVT)) {
15954       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15955       AddToWorklist(X.getNode());
15956 
15957       // If X has a different width than the result/lhs, sext it or truncate it.
15958       unsigned VTWidth = VT.getSizeInBits();
15959       if (OrigXWidth < VTWidth) {
15960         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15961         AddToWorklist(X.getNode());
15962       } else if (OrigXWidth > VTWidth) {
15963         // To get the sign bit in the right place, we have to shift it right
15964         // before truncating.
15965         SDLoc DL(X);
15966         X = DAG.getNode(ISD::SRL, DL,
15967                         X.getValueType(), X,
15968                         DAG.getConstant(OrigXWidth-VTWidth, DL,
15969                                         X.getValueType()));
15970         AddToWorklist(X.getNode());
15971         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15972         AddToWorklist(X.getNode());
15973       }
15974 
15975       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15976         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15977         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15978         AddToWorklist(Cst.getNode());
15979         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15980         AddToWorklist(X.getNode());
15981         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15982         AddToWorklist(XorResult.getNode());
15983         SDValue XorResult64 = DAG.getNode(
15984             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15985             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
15986                                   SDLoc(XorResult)));
15987         AddToWorklist(XorResult64.getNode());
15988         SDValue FlipBit =
15989             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15990                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15991         AddToWorklist(FlipBit.getNode());
15992         SDValue FlipBits =
15993             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15994         AddToWorklist(FlipBits.getNode());
15995         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15996       }
15997       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15998       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15999                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
16000       AddToWorklist(X.getNode());
16001 
16002       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16003       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
16004                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
16005       AddToWorklist(Cst.getNode());
16006 
16007       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
16008     }
16009   }
16010 
16011   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16012   if (N0.getOpcode() == ISD::BUILD_PAIR)
16013     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
16014       return CombineLD;
16015 
16016   // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
16017   //   => int_vt (any_extend elt_vt:x)
16018   if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
16019     SDValue SrcScalar = N0.getOperand(0);
16020     if (SrcScalar.getValueType().isScalarInteger())
16021       return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
16022   }
16023 
16024   // Remove double bitcasts from shuffles - this is often a legacy of
16025   // XformToShuffleWithZero being used to combine bitmaskings (of
16026   // float vectors bitcast to integer vectors) into shuffles.
16027   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16028   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
16029       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16030       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
16031       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
16032     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
16033 
16034     // If operands are a bitcast, peek through if it casts the original VT.
16035     // If operands are a constant, just bitcast back to original VT.
16036     auto PeekThroughBitcast = [&](SDValue Op) {
16037       if (Op.getOpcode() == ISD::BITCAST &&
16038           Op.getOperand(0).getValueType() == VT)
16039         return SDValue(Op.getOperand(0));
16040       if (Op.isUndef() || isAnyConstantBuildVector(Op))
16041         return DAG.getBitcast(VT, Op);
16042       return SDValue();
16043     };
16044 
16045     // FIXME: If either input vector is bitcast, try to convert the shuffle to
16046     // the result type of this bitcast. This would eliminate at least one
16047     // bitcast. See the transform in InstCombine.
16048     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16049     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16050     if (!(SV0 && SV1))
16051       return SDValue();
16052 
16053     int MaskScale =
16054         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
16055     SmallVector<int, 8> NewMask;
16056     for (int M : SVN->getMask())
16057       for (int i = 0; i != MaskScale; ++i)
16058         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16059 
16060     SDValue LegalShuffle =
16061         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
16062     if (LegalShuffle)
16063       return LegalShuffle;
16064   }
16065 
16066   return SDValue();
16067 }
16068 
16069 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
16070   EVT VT = N->getValueType(0);
16071   return CombineConsecutiveLoads(N, VT);
16072 }
16073 
16074 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
16075   SDValue N0 = N->getOperand(0);
16076 
16077   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
16078     return N0;
16079 
16080   // We currently avoid folding freeze over SRA/SRL, due to the problems seen
16081   // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
16082   // example https://reviews.llvm.org/D136529#4120959.
16083   if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
16084     return SDValue();
16085 
16086   // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16087   // Try to push freeze through instructions that propagate but don't produce
16088   // poison as far as possible. If an operand of freeze follows three
16089   // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16090   // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16091   // the freeze through to the operands that are not guaranteed non-poison.
16092   // NOTE: we will strip poison-generating flags, so ignore them here.
16093   if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
16094                                  /*ConsiderFlags*/ false) ||
16095       N0->getNumValues() != 1 || !N0->hasOneUse())
16096     return SDValue();
16097 
16098   bool AllowMultipleMaybePoisonOperands =
16099       N0.getOpcode() == ISD::SELECT_CC ||
16100       N0.getOpcode() == ISD::SETCC ||
16101       N0.getOpcode() == ISD::BUILD_VECTOR ||
16102       N0.getOpcode() == ISD::BUILD_PAIR ||
16103       N0.getOpcode() == ISD::VECTOR_SHUFFLE ||
16104       N0.getOpcode() == ISD::CONCAT_VECTORS;
16105 
16106   // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
16107   // ones" or "constant" into something that depends on FrozenUndef. We can
16108   // instead pick undef values to keep those properties, while at the same time
16109   // folding away the freeze.
16110   // If we implement a more general solution for folding away freeze(undef) in
16111   // the future, then this special handling can be removed.
16112   if (N0.getOpcode() == ISD::BUILD_VECTOR) {
16113     SDLoc DL(N0);
16114     EVT VT = N0.getValueType();
16115     if (llvm::ISD::isBuildVectorAllOnes(N0.getNode()) && VT.isInteger())
16116       return DAG.getAllOnesConstant(DL, VT);
16117     if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
16118       SmallVector<SDValue, 8> NewVecC;
16119       for (const SDValue &Op : N0->op_values())
16120         NewVecC.push_back(
16121             Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
16122       return DAG.getBuildVector(VT, DL, NewVecC);
16123     }
16124   }
16125 
16126   SmallSet<SDValue, 8> MaybePoisonOperands;
16127   SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
16128   for (auto [OpNo, Op] : enumerate(N0->ops())) {
16129     if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
16130                                              /*Depth*/ 1))
16131       continue;
16132     bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
16133     bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
16134     if (IsNewMaybePoisonOperand)
16135       MaybePoisonOperandNumbers.push_back(OpNo);
16136     if (!HadMaybePoisonOperands)
16137       continue;
16138     if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
16139       // Multiple maybe-poison ops when not allowed - bail out.
16140       return SDValue();
16141     }
16142   }
16143   // NOTE: the whole op may be not guaranteed to not be undef or poison because
16144   // it could create undef or poison due to it's poison-generating flags.
16145   // So not finding any maybe-poison operands is fine.
16146 
16147   for (unsigned OpNo : MaybePoisonOperandNumbers) {
16148     // N0 can mutate during iteration, so make sure to refetch the maybe poison
16149     // operands via the operand numbers. The typical scenario is that we have
16150     // something like this
16151     //   t262: i32 = freeze t181
16152     //   t150: i32 = ctlz_zero_undef t262
16153     //   t184: i32 = ctlz_zero_undef t181
16154     //   t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
16155     // When freezing the t181 operand we get t262 back, and then the
16156     // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
16157     // also recursively replace t184 by t150.
16158     SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
16159     // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
16160     if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
16161       continue;
16162     // First, freeze each offending operand.
16163     SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
16164     // Then, change all other uses of unfrozen operand to use frozen operand.
16165     DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
16166     if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
16167         FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
16168       // But, that also updated the use in the freeze we just created, thus
16169       // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16170       DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
16171                              MaybePoisonOperand);
16172     }
16173   }
16174 
16175   // This node has been merged with another.
16176   if (N->getOpcode() == ISD::DELETED_NODE)
16177     return SDValue(N, 0);
16178 
16179   // The whole node may have been updated, so the value we were holding
16180   // may no longer be valid. Re-fetch the operand we're `freeze`ing.
16181   N0 = N->getOperand(0);
16182 
16183   // Finally, recreate the node, it's operands were updated to use
16184   // frozen operands, so we just need to use it's "original" operands.
16185   SmallVector<SDValue> Ops(N0->ops());
16186   // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
16187   for (SDValue &Op : Ops) {
16188     if (Op.getOpcode() == ISD::UNDEF)
16189       Op = DAG.getFreeze(Op);
16190   }
16191 
16192   SDValue R;
16193   if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
16194     // Special case handling for ShuffleVectorSDNode nodes.
16195     R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
16196                              SVN->getMask());
16197   } else {
16198     // NOTE: this strips poison generating flags.
16199     R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
16200   }
16201   assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
16202          "Can't create node that may be undef/poison!");
16203   return R;
16204 }
16205 
16206 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
16207 /// operands. DstEltVT indicates the destination element value type.
16208 SDValue DAGCombiner::
16209 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
16210   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
16211 
16212   // If this is already the right type, we're done.
16213   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
16214 
16215   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
16216   unsigned DstBitSize = DstEltVT.getSizeInBits();
16217 
16218   // If this is a conversion of N elements of one type to N elements of another
16219   // type, convert each element.  This handles FP<->INT cases.
16220   if (SrcBitSize == DstBitSize) {
16221     SmallVector<SDValue, 8> Ops;
16222     for (SDValue Op : BV->op_values()) {
16223       // If the vector element type is not legal, the BUILD_VECTOR operands
16224       // are promoted and implicitly truncated.  Make that explicit here.
16225       if (Op.getValueType() != SrcEltVT)
16226         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
16227       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
16228       AddToWorklist(Ops.back().getNode());
16229     }
16230     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
16231                               BV->getValueType(0).getVectorNumElements());
16232     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
16233   }
16234 
16235   // Otherwise, we're growing or shrinking the elements.  To avoid having to
16236   // handle annoying details of growing/shrinking FP values, we convert them to
16237   // int first.
16238   if (SrcEltVT.isFloatingPoint()) {
16239     // Convert the input float vector to a int vector where the elements are the
16240     // same sizes.
16241     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
16242     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
16243     SrcEltVT = IntVT;
16244   }
16245 
16246   // Now we know the input is an integer vector.  If the output is a FP type,
16247   // convert to integer first, then to FP of the right size.
16248   if (DstEltVT.isFloatingPoint()) {
16249     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
16250     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
16251 
16252     // Next, convert to FP elements of the same size.
16253     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
16254   }
16255 
16256   // Okay, we know the src/dst types are both integers of differing types.
16257   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
16258 
16259   // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
16260   // BuildVectorSDNode?
16261   auto *BVN = cast<BuildVectorSDNode>(BV);
16262 
16263   // Extract the constant raw bit data.
16264   BitVector UndefElements;
16265   SmallVector<APInt> RawBits;
16266   bool IsLE = DAG.getDataLayout().isLittleEndian();
16267   if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
16268     return SDValue();
16269 
16270   SDLoc DL(BV);
16271   SmallVector<SDValue, 8> Ops;
16272   for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
16273     if (UndefElements[I])
16274       Ops.push_back(DAG.getUNDEF(DstEltVT));
16275     else
16276       Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
16277   }
16278 
16279   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
16280   return DAG.getBuildVector(VT, DL, Ops);
16281 }
16282 
16283 // Returns true if floating point contraction is allowed on the FMUL-SDValue
16284 // `N`
16285 static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
16286   assert(N.getOpcode() == ISD::FMUL);
16287 
16288   return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
16289          N->getFlags().hasAllowContract();
16290 }
16291 
16292 // Returns true if `N` can assume no infinities involved in its computation.
16293 static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
16294   return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
16295 }
16296 
16297 /// Try to perform FMA combining on a given FADD node.
16298 template <class MatchContextClass>
16299 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
16300   SDValue N0 = N->getOperand(0);
16301   SDValue N1 = N->getOperand(1);
16302   EVT VT = N->getValueType(0);
16303   SDLoc SL(N);
16304   MatchContextClass matcher(DAG, TLI, N);
16305   const TargetOptions &Options = DAG.getTarget().Options;
16306 
16307   bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16308 
16309   // Floating-point multiply-add with intermediate rounding.
16310   // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16311   // FIXME: Add VP_FMAD opcode.
16312   bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16313 
16314   // Floating-point multiply-add without intermediate rounding.
16315   bool HasFMA =
16316       (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16317       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT);
16318 
16319   // No valid opcode, do not combine.
16320   if (!HasFMAD && !HasFMA)
16321     return SDValue();
16322 
16323   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16324                               Options.UnsafeFPMath || HasFMAD);
16325   // If the addition is not contractable, do not combine.
16326   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16327     return SDValue();
16328 
16329   // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
16330   // beneficial. It does not reduce latency. It increases register pressure. It
16331   // replaces an fadd with an fma which is a more complex instruction, so is
16332   // likely to have a larger encoding, use more functional units, etc.
16333   if (N0 == N1)
16334     return SDValue();
16335 
16336   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16337     return SDValue();
16338 
16339   // Always prefer FMAD to FMA for precision.
16340   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16341   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
16342 
16343   auto isFusedOp = [&](SDValue N) {
16344     return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16345   };
16346 
16347   // Is the node an FMUL and contractable either due to global flags or
16348   // SDNodeFlags.
16349   auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16350     if (!matcher.match(N, ISD::FMUL))
16351       return false;
16352     return AllowFusionGlobally || N->getFlags().hasAllowContract();
16353   };
16354   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
16355   // prefer to fold the multiply with fewer uses.
16356   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
16357     if (N0->use_size() > N1->use_size())
16358       std::swap(N0, N1);
16359   }
16360 
16361   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
16362   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
16363     return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
16364                            N0.getOperand(1), N1);
16365   }
16366 
16367   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
16368   // Note: Commutes FADD operands.
16369   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
16370     return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
16371                            N1.getOperand(1), N0);
16372   }
16373 
16374   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
16375   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
16376   // This also works with nested fma instructions:
16377   // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
16378   // fma A, B, (fma C, D, fma (E, F, G))
16379   // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
16380   // fma A, B, (fma C, D, fma (E, F, G)).
16381   // This requires reassociation because it changes the order of operations.
16382   bool CanReassociate =
16383       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16384   if (CanReassociate) {
16385     SDValue FMA, E;
16386     if (isFusedOp(N0) && N0.hasOneUse()) {
16387       FMA = N0;
16388       E = N1;
16389     } else if (isFusedOp(N1) && N1.hasOneUse()) {
16390       FMA = N1;
16391       E = N0;
16392     }
16393 
16394     SDValue TmpFMA = FMA;
16395     while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
16396       SDValue FMul = TmpFMA->getOperand(2);
16397       if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
16398         SDValue C = FMul.getOperand(0);
16399         SDValue D = FMul.getOperand(1);
16400         SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
16401         DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
16402         // Replacing the inner FMul could cause the outer FMA to be simplified
16403         // away.
16404         return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
16405       }
16406 
16407       TmpFMA = TmpFMA->getOperand(2);
16408     }
16409   }
16410 
16411   // Look through FP_EXTEND nodes to do more combining.
16412 
16413   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
16414   if (matcher.match(N0, ISD::FP_EXTEND)) {
16415     SDValue N00 = N0.getOperand(0);
16416     if (isContractableFMUL(N00) &&
16417         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16418                             N00.getValueType())) {
16419       return matcher.getNode(
16420           PreferredFusedOpcode, SL, VT,
16421           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16422           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
16423     }
16424   }
16425 
16426   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
16427   // Note: Commutes FADD operands.
16428   if (matcher.match(N1, ISD::FP_EXTEND)) {
16429     SDValue N10 = N1.getOperand(0);
16430     if (isContractableFMUL(N10) &&
16431         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16432                             N10.getValueType())) {
16433       return matcher.getNode(
16434           PreferredFusedOpcode, SL, VT,
16435           matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
16436           matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16437     }
16438   }
16439 
16440   // More folding opportunities when target permits.
16441   if (Aggressive) {
16442     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
16443     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
16444     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16445                                     SDValue Z) {
16446       return matcher.getNode(
16447           PreferredFusedOpcode, SL, VT, X, Y,
16448           matcher.getNode(PreferredFusedOpcode, SL, VT,
16449                           matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16450                           matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16451     };
16452     if (isFusedOp(N0)) {
16453       SDValue N02 = N0.getOperand(2);
16454       if (matcher.match(N02, ISD::FP_EXTEND)) {
16455         SDValue N020 = N02.getOperand(0);
16456         if (isContractableFMUL(N020) &&
16457             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16458                                 N020.getValueType())) {
16459           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
16460                                       N020.getOperand(0), N020.getOperand(1),
16461                                       N1);
16462         }
16463       }
16464     }
16465 
16466     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
16467     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
16468     // FIXME: This turns two single-precision and one double-precision
16469     // operation into two double-precision operations, which might not be
16470     // interesting for all targets, especially GPUs.
16471     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16472                                     SDValue Z) {
16473       return matcher.getNode(
16474           PreferredFusedOpcode, SL, VT,
16475           matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
16476           matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
16477           matcher.getNode(PreferredFusedOpcode, SL, VT,
16478                           matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16479                           matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16480     };
16481     if (N0.getOpcode() == ISD::FP_EXTEND) {
16482       SDValue N00 = N0.getOperand(0);
16483       if (isFusedOp(N00)) {
16484         SDValue N002 = N00.getOperand(2);
16485         if (isContractableFMUL(N002) &&
16486             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16487                                 N00.getValueType())) {
16488           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
16489                                       N002.getOperand(0), N002.getOperand(1),
16490                                       N1);
16491         }
16492       }
16493     }
16494 
16495     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
16496     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
16497     if (isFusedOp(N1)) {
16498       SDValue N12 = N1.getOperand(2);
16499       if (N12.getOpcode() == ISD::FP_EXTEND) {
16500         SDValue N120 = N12.getOperand(0);
16501         if (isContractableFMUL(N120) &&
16502             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16503                                 N120.getValueType())) {
16504           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
16505                                       N120.getOperand(0), N120.getOperand(1),
16506                                       N0);
16507         }
16508       }
16509     }
16510 
16511     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
16512     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
16513     // FIXME: This turns two single-precision and one double-precision
16514     // operation into two double-precision operations, which might not be
16515     // interesting for all targets, especially GPUs.
16516     if (N1.getOpcode() == ISD::FP_EXTEND) {
16517       SDValue N10 = N1.getOperand(0);
16518       if (isFusedOp(N10)) {
16519         SDValue N102 = N10.getOperand(2);
16520         if (isContractableFMUL(N102) &&
16521             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16522                                 N10.getValueType())) {
16523           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
16524                                       N102.getOperand(0), N102.getOperand(1),
16525                                       N0);
16526         }
16527       }
16528     }
16529   }
16530 
16531   return SDValue();
16532 }
16533 
16534 /// Try to perform FMA combining on a given FSUB node.
16535 template <class MatchContextClass>
16536 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
16537   SDValue N0 = N->getOperand(0);
16538   SDValue N1 = N->getOperand(1);
16539   EVT VT = N->getValueType(0);
16540   SDLoc SL(N);
16541   MatchContextClass matcher(DAG, TLI, N);
16542   const TargetOptions &Options = DAG.getTarget().Options;
16543 
16544   bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16545 
16546   // Floating-point multiply-add with intermediate rounding.
16547   // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16548   // FIXME: Add VP_FMAD opcode.
16549   bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16550 
16551   // Floating-point multiply-add without intermediate rounding.
16552   bool HasFMA =
16553       (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16554       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT);
16555 
16556   // No valid opcode, do not combine.
16557   if (!HasFMAD && !HasFMA)
16558     return SDValue();
16559 
16560   const SDNodeFlags Flags = N->getFlags();
16561   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16562                               Options.UnsafeFPMath || HasFMAD);
16563 
16564   // If the subtraction is not contractable, do not combine.
16565   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16566     return SDValue();
16567 
16568   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16569     return SDValue();
16570 
16571   // Always prefer FMAD to FMA for precision.
16572   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16573   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
16574   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
16575 
16576   // Is the node an FMUL and contractable either due to global flags or
16577   // SDNodeFlags.
16578   auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16579     if (!matcher.match(N, ISD::FMUL))
16580       return false;
16581     return AllowFusionGlobally || N->getFlags().hasAllowContract();
16582   };
16583 
16584   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16585   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
16586     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
16587       return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
16588                              XY.getOperand(1),
16589                              matcher.getNode(ISD::FNEG, SL, VT, Z));
16590     }
16591     return SDValue();
16592   };
16593 
16594   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16595   // Note: Commutes FSUB operands.
16596   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
16597     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
16598       return matcher.getNode(
16599           PreferredFusedOpcode, SL, VT,
16600           matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
16601           YZ.getOperand(1), X);
16602     }
16603     return SDValue();
16604   };
16605 
16606   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
16607   // prefer to fold the multiply with fewer uses.
16608   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
16609       (N0->use_size() > N1->use_size())) {
16610     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
16611     if (SDValue V = tryToFoldXSubYZ(N0, N1))
16612       return V;
16613     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
16614     if (SDValue V = tryToFoldXYSubZ(N0, N1))
16615       return V;
16616   } else {
16617     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16618     if (SDValue V = tryToFoldXYSubZ(N0, N1))
16619       return V;
16620     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16621     if (SDValue V = tryToFoldXSubYZ(N0, N1))
16622       return V;
16623   }
16624 
16625   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
16626   if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
16627       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
16628     SDValue N00 = N0.getOperand(0).getOperand(0);
16629     SDValue N01 = N0.getOperand(0).getOperand(1);
16630     return matcher.getNode(PreferredFusedOpcode, SL, VT,
16631                            matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
16632                            matcher.getNode(ISD::FNEG, SL, VT, N1));
16633   }
16634 
16635   // Look through FP_EXTEND nodes to do more combining.
16636 
16637   // fold (fsub (fpext (fmul x, y)), z)
16638   //   -> (fma (fpext x), (fpext y), (fneg z))
16639   if (matcher.match(N0, ISD::FP_EXTEND)) {
16640     SDValue N00 = N0.getOperand(0);
16641     if (isContractableFMUL(N00) &&
16642         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16643                             N00.getValueType())) {
16644       return matcher.getNode(
16645           PreferredFusedOpcode, SL, VT,
16646           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16647           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16648           matcher.getNode(ISD::FNEG, SL, VT, N1));
16649     }
16650   }
16651 
16652   // fold (fsub x, (fpext (fmul y, z)))
16653   //   -> (fma (fneg (fpext y)), (fpext z), x)
16654   // Note: Commutes FSUB operands.
16655   if (matcher.match(N1, ISD::FP_EXTEND)) {
16656     SDValue N10 = N1.getOperand(0);
16657     if (isContractableFMUL(N10) &&
16658         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16659                             N10.getValueType())) {
16660       return matcher.getNode(
16661           PreferredFusedOpcode, SL, VT,
16662           matcher.getNode(
16663               ISD::FNEG, SL, VT,
16664               matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16665           matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16666     }
16667   }
16668 
16669   // fold (fsub (fpext (fneg (fmul, x, y))), z)
16670   //   -> (fneg (fma (fpext x), (fpext y), z))
16671   // Note: This could be removed with appropriate canonicalization of the
16672   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16673   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16674   // from implementing the canonicalization in visitFSUB.
16675   if (matcher.match(N0, ISD::FP_EXTEND)) {
16676     SDValue N00 = N0.getOperand(0);
16677     if (matcher.match(N00, ISD::FNEG)) {
16678       SDValue N000 = N00.getOperand(0);
16679       if (isContractableFMUL(N000) &&
16680           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16681                               N00.getValueType())) {
16682         return matcher.getNode(
16683             ISD::FNEG, SL, VT,
16684             matcher.getNode(
16685                 PreferredFusedOpcode, SL, VT,
16686                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16687                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16688                 N1));
16689       }
16690     }
16691   }
16692 
16693   // fold (fsub (fneg (fpext (fmul, x, y))), z)
16694   //   -> (fneg (fma (fpext x)), (fpext y), z)
16695   // Note: This could be removed with appropriate canonicalization of the
16696   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16697   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16698   // from implementing the canonicalization in visitFSUB.
16699   if (matcher.match(N0, ISD::FNEG)) {
16700     SDValue N00 = N0.getOperand(0);
16701     if (matcher.match(N00, ISD::FP_EXTEND)) {
16702       SDValue N000 = N00.getOperand(0);
16703       if (isContractableFMUL(N000) &&
16704           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16705                               N000.getValueType())) {
16706         return matcher.getNode(
16707             ISD::FNEG, SL, VT,
16708             matcher.getNode(
16709                 PreferredFusedOpcode, SL, VT,
16710                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16711                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16712                 N1));
16713       }
16714     }
16715   }
16716 
16717   auto isReassociable = [&Options](SDNode *N) {
16718     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16719   };
16720 
16721   auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16722                                             &isReassociable](SDValue N) {
16723     return isContractableFMUL(N) && isReassociable(N.getNode());
16724   };
16725 
16726   auto isFusedOp = [&](SDValue N) {
16727     return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16728   };
16729 
16730   // More folding opportunities when target permits.
16731   if (Aggressive && isReassociable(N)) {
16732     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16733     // fold (fsub (fma x, y, (fmul u, v)), z)
16734     //   -> (fma x, y (fma u, v, (fneg z)))
16735     if (CanFuse && isFusedOp(N0) &&
16736         isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16737         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16738       return matcher.getNode(
16739           PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16740           matcher.getNode(PreferredFusedOpcode, SL, VT,
16741                           N0.getOperand(2).getOperand(0),
16742                           N0.getOperand(2).getOperand(1),
16743                           matcher.getNode(ISD::FNEG, SL, VT, N1)));
16744     }
16745 
16746     // fold (fsub x, (fma y, z, (fmul u, v)))
16747     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
16748     if (CanFuse && isFusedOp(N1) &&
16749         isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16750         N1->hasOneUse() && NoSignedZero) {
16751       SDValue N20 = N1.getOperand(2).getOperand(0);
16752       SDValue N21 = N1.getOperand(2).getOperand(1);
16753       return matcher.getNode(
16754           PreferredFusedOpcode, SL, VT,
16755           matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16756           N1.getOperand(1),
16757           matcher.getNode(PreferredFusedOpcode, SL, VT,
16758                           matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16759     }
16760 
16761     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16762     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16763     if (isFusedOp(N0) && N0->hasOneUse()) {
16764       SDValue N02 = N0.getOperand(2);
16765       if (matcher.match(N02, ISD::FP_EXTEND)) {
16766         SDValue N020 = N02.getOperand(0);
16767         if (isContractableAndReassociableFMUL(N020) &&
16768             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16769                                 N020.getValueType())) {
16770           return matcher.getNode(
16771               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16772               matcher.getNode(
16773                   PreferredFusedOpcode, SL, VT,
16774                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16775                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16776                   matcher.getNode(ISD::FNEG, SL, VT, N1)));
16777         }
16778       }
16779     }
16780 
16781     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16782     //   -> (fma (fpext x), (fpext y),
16783     //           (fma (fpext u), (fpext v), (fneg z)))
16784     // FIXME: This turns two single-precision and one double-precision
16785     // operation into two double-precision operations, which might not be
16786     // interesting for all targets, especially GPUs.
16787     if (matcher.match(N0, ISD::FP_EXTEND)) {
16788       SDValue N00 = N0.getOperand(0);
16789       if (isFusedOp(N00)) {
16790         SDValue N002 = N00.getOperand(2);
16791         if (isContractableAndReassociableFMUL(N002) &&
16792             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16793                                 N00.getValueType())) {
16794           return matcher.getNode(
16795               PreferredFusedOpcode, SL, VT,
16796               matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16797               matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16798               matcher.getNode(
16799                   PreferredFusedOpcode, SL, VT,
16800                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16801                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16802                   matcher.getNode(ISD::FNEG, SL, VT, N1)));
16803         }
16804       }
16805     }
16806 
16807     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16808     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16809     if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16810         N1->hasOneUse()) {
16811       SDValue N120 = N1.getOperand(2).getOperand(0);
16812       if (isContractableAndReassociableFMUL(N120) &&
16813           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16814                               N120.getValueType())) {
16815         SDValue N1200 = N120.getOperand(0);
16816         SDValue N1201 = N120.getOperand(1);
16817         return matcher.getNode(
16818             PreferredFusedOpcode, SL, VT,
16819             matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16820             N1.getOperand(1),
16821             matcher.getNode(
16822                 PreferredFusedOpcode, SL, VT,
16823                 matcher.getNode(ISD::FNEG, SL, VT,
16824                                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16825                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16826       }
16827     }
16828 
16829     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16830     //   -> (fma (fneg (fpext y)), (fpext z),
16831     //           (fma (fneg (fpext u)), (fpext v), x))
16832     // FIXME: This turns two single-precision and one double-precision
16833     // operation into two double-precision operations, which might not be
16834     // interesting for all targets, especially GPUs.
16835     if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16836       SDValue CvtSrc = N1.getOperand(0);
16837       SDValue N100 = CvtSrc.getOperand(0);
16838       SDValue N101 = CvtSrc.getOperand(1);
16839       SDValue N102 = CvtSrc.getOperand(2);
16840       if (isContractableAndReassociableFMUL(N102) &&
16841           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16842                               CvtSrc.getValueType())) {
16843         SDValue N1020 = N102.getOperand(0);
16844         SDValue N1021 = N102.getOperand(1);
16845         return matcher.getNode(
16846             PreferredFusedOpcode, SL, VT,
16847             matcher.getNode(ISD::FNEG, SL, VT,
16848                             matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16849             matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16850             matcher.getNode(
16851                 PreferredFusedOpcode, SL, VT,
16852                 matcher.getNode(ISD::FNEG, SL, VT,
16853                                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16854                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16855       }
16856     }
16857   }
16858 
16859   return SDValue();
16860 }
16861 
16862 /// Try to perform FMA combining on a given FMUL node based on the distributive
16863 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16864 /// subtraction instead of addition).
16865 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16866   SDValue N0 = N->getOperand(0);
16867   SDValue N1 = N->getOperand(1);
16868   EVT VT = N->getValueType(0);
16869   SDLoc SL(N);
16870 
16871   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16872 
16873   const TargetOptions &Options = DAG.getTarget().Options;
16874 
16875   // The transforms below are incorrect when x == 0 and y == inf, because the
16876   // intermediate multiplication produces a nan.
16877   SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16878   if (!hasNoInfs(Options, FAdd))
16879     return SDValue();
16880 
16881   // Floating-point multiply-add without intermediate rounding.
16882   bool HasFMA =
16883       isContractableFMUL(Options, SDValue(N, 0)) &&
16884       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16885       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT);
16886 
16887   // Floating-point multiply-add with intermediate rounding. This can result
16888   // in a less precise result due to the changed rounding order.
16889   bool HasFMAD = Options.UnsafeFPMath &&
16890                  (LegalOperations && TLI.isFMADLegal(DAG, N));
16891 
16892   // No valid opcode, do not combine.
16893   if (!HasFMAD && !HasFMA)
16894     return SDValue();
16895 
16896   // Always prefer FMAD to FMA for precision.
16897   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16898   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
16899 
16900   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16901   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16902   auto FuseFADD = [&](SDValue X, SDValue Y) {
16903     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16904       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16905         if (C->isExactlyValue(+1.0))
16906           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16907                              Y);
16908         if (C->isExactlyValue(-1.0))
16909           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16910                              DAG.getNode(ISD::FNEG, SL, VT, Y));
16911       }
16912     }
16913     return SDValue();
16914   };
16915 
16916   if (SDValue FMA = FuseFADD(N0, N1))
16917     return FMA;
16918   if (SDValue FMA = FuseFADD(N1, N0))
16919     return FMA;
16920 
16921   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16922   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16923   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16924   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16925   auto FuseFSUB = [&](SDValue X, SDValue Y) {
16926     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16927       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16928         if (C0->isExactlyValue(+1.0))
16929           return DAG.getNode(PreferredFusedOpcode, SL, VT,
16930                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16931                              Y);
16932         if (C0->isExactlyValue(-1.0))
16933           return DAG.getNode(PreferredFusedOpcode, SL, VT,
16934                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16935                              DAG.getNode(ISD::FNEG, SL, VT, Y));
16936       }
16937       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16938         if (C1->isExactlyValue(+1.0))
16939           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16940                              DAG.getNode(ISD::FNEG, SL, VT, Y));
16941         if (C1->isExactlyValue(-1.0))
16942           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16943                              Y);
16944       }
16945     }
16946     return SDValue();
16947   };
16948 
16949   if (SDValue FMA = FuseFSUB(N0, N1))
16950     return FMA;
16951   if (SDValue FMA = FuseFSUB(N1, N0))
16952     return FMA;
16953 
16954   return SDValue();
16955 }
16956 
16957 SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16958   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16959 
16960   // FADD -> FMA combines:
16961   if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16962     if (Fused.getOpcode() != ISD::DELETED_NODE)
16963       AddToWorklist(Fused.getNode());
16964     return Fused;
16965   }
16966   return SDValue();
16967 }
16968 
16969 SDValue DAGCombiner::visitFADD(SDNode *N) {
16970   SDValue N0 = N->getOperand(0);
16971   SDValue N1 = N->getOperand(1);
16972   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
16973   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
16974   EVT VT = N->getValueType(0);
16975   SDLoc DL(N);
16976   const TargetOptions &Options = DAG.getTarget().Options;
16977   SDNodeFlags Flags = N->getFlags();
16978   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16979 
16980   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16981     return R;
16982 
16983   // fold (fadd c1, c2) -> c1 + c2
16984   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16985     return C;
16986 
16987   // canonicalize constant to RHS
16988   if (N0CFP && !N1CFP)
16989     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16990 
16991   // fold vector ops
16992   if (VT.isVector())
16993     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16994       return FoldedVOp;
16995 
16996   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16997   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16998   if (N1C && N1C->isZero())
16999     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
17000       return N0;
17001 
17002   if (SDValue NewSel = foldBinOpIntoSelect(N))
17003     return NewSel;
17004 
17005   // fold (fadd A, (fneg B)) -> (fsub A, B)
17006   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17007     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17008             N1, DAG, LegalOperations, ForCodeSize))
17009       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17010 
17011   // fold (fadd (fneg A), B) -> (fsub B, A)
17012   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17013     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17014             N0, DAG, LegalOperations, ForCodeSize))
17015       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17016 
17017   auto isFMulNegTwo = [](SDValue FMul) {
17018     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17019       return false;
17020     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17021     return C && C->isExactlyValue(-2.0);
17022   };
17023 
17024   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17025   if (isFMulNegTwo(N0)) {
17026     SDValue B = N0.getOperand(0);
17027     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17028     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17029   }
17030   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17031   if (isFMulNegTwo(N1)) {
17032     SDValue B = N1.getOperand(0);
17033     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17034     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17035   }
17036 
17037   // No FP constant should be created after legalization as Instruction
17038   // Selection pass has a hard time dealing with FP constants.
17039   bool AllowNewConst = (Level < AfterLegalizeDAG);
17040 
17041   // If nnan is enabled, fold lots of things.
17042   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
17043     // If allowed, fold (fadd (fneg x), x) -> 0.0
17044     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
17045       return DAG.getConstantFP(0.0, DL, VT);
17046 
17047     // If allowed, fold (fadd x, (fneg x)) -> 0.0
17048     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
17049       return DAG.getConstantFP(0.0, DL, VT);
17050   }
17051 
17052   // If 'unsafe math' or reassoc and nsz, fold lots of things.
17053   // TODO: break out portions of the transformations below for which Unsafe is
17054   //       considered and which do not require both nsz and reassoc
17055   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
17056        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17057       AllowNewConst) {
17058     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17059     if (N1CFP && N0.getOpcode() == ISD::FADD &&
17060         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
17061       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
17062       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
17063     }
17064 
17065     // We can fold chains of FADD's of the same value into multiplications.
17066     // This transform is not safe in general because we are reducing the number
17067     // of rounding steps.
17068     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
17069       if (N0.getOpcode() == ISD::FMUL) {
17070         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17071         bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
17072 
17073         // (fadd (fmul x, c), x) -> (fmul x, c+1)
17074         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
17075           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17076                                        DAG.getConstantFP(1.0, DL, VT));
17077           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
17078         }
17079 
17080         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17081         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
17082             N1.getOperand(0) == N1.getOperand(1) &&
17083             N0.getOperand(0) == N1.getOperand(0)) {
17084           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17085                                        DAG.getConstantFP(2.0, DL, VT));
17086           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
17087         }
17088       }
17089 
17090       if (N1.getOpcode() == ISD::FMUL) {
17091         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17092         bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
17093 
17094         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17095         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
17096           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17097                                        DAG.getConstantFP(1.0, DL, VT));
17098           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
17099         }
17100 
17101         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17102         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
17103             N0.getOperand(0) == N0.getOperand(1) &&
17104             N1.getOperand(0) == N0.getOperand(0)) {
17105           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17106                                        DAG.getConstantFP(2.0, DL, VT));
17107           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
17108         }
17109       }
17110 
17111       if (N0.getOpcode() == ISD::FADD) {
17112         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17113         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17114         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
17115             (N0.getOperand(0) == N1)) {
17116           return DAG.getNode(ISD::FMUL, DL, VT, N1,
17117                              DAG.getConstantFP(3.0, DL, VT));
17118         }
17119       }
17120 
17121       if (N1.getOpcode() == ISD::FADD) {
17122         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17123         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17124         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
17125             N1.getOperand(0) == N0) {
17126           return DAG.getNode(ISD::FMUL, DL, VT, N0,
17127                              DAG.getConstantFP(3.0, DL, VT));
17128         }
17129       }
17130 
17131       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17132       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
17133           N0.getOperand(0) == N0.getOperand(1) &&
17134           N1.getOperand(0) == N1.getOperand(1) &&
17135           N0.getOperand(0) == N1.getOperand(0)) {
17136         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
17137                            DAG.getConstantFP(4.0, DL, VT));
17138       }
17139     }
17140 
17141     // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17142     if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
17143                                           VT, N0, N1, Flags))
17144       return SD;
17145   } // enable-unsafe-fp-math
17146 
17147   // FADD -> FMA combines:
17148   if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
17149     if (Fused.getOpcode() != ISD::DELETED_NODE)
17150       AddToWorklist(Fused.getNode());
17151     return Fused;
17152   }
17153   return SDValue();
17154 }
17155 
17156 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
17157   SDValue Chain = N->getOperand(0);
17158   SDValue N0 = N->getOperand(1);
17159   SDValue N1 = N->getOperand(2);
17160   EVT VT = N->getValueType(0);
17161   EVT ChainVT = N->getValueType(1);
17162   SDLoc DL(N);
17163   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17164 
17165   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17166   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17167     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17168             N1, DAG, LegalOperations, ForCodeSize)) {
17169       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17170                          {Chain, N0, NegN1});
17171     }
17172 
17173   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
17174   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17175     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17176             N0, DAG, LegalOperations, ForCodeSize)) {
17177       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17178                          {Chain, N1, NegN0});
17179     }
17180   return SDValue();
17181 }
17182 
17183 SDValue DAGCombiner::visitFSUB(SDNode *N) {
17184   SDValue N0 = N->getOperand(0);
17185   SDValue N1 = N->getOperand(1);
17186   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
17187   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17188   EVT VT = N->getValueType(0);
17189   SDLoc DL(N);
17190   const TargetOptions &Options = DAG.getTarget().Options;
17191   const SDNodeFlags Flags = N->getFlags();
17192   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17193 
17194   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17195     return R;
17196 
17197   // fold (fsub c1, c2) -> c1-c2
17198   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
17199     return C;
17200 
17201   // fold vector ops
17202   if (VT.isVector())
17203     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17204       return FoldedVOp;
17205 
17206   if (SDValue NewSel = foldBinOpIntoSelect(N))
17207     return NewSel;
17208 
17209   // (fsub A, 0) -> A
17210   if (N1CFP && N1CFP->isZero()) {
17211     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
17212         Flags.hasNoSignedZeros()) {
17213       return N0;
17214     }
17215   }
17216 
17217   if (N0 == N1) {
17218     // (fsub x, x) -> 0.0
17219     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
17220       return DAG.getConstantFP(0.0f, DL, VT);
17221   }
17222 
17223   // (fsub -0.0, N1) -> -N1
17224   if (N0CFP && N0CFP->isZero()) {
17225     if (N0CFP->isNegative() ||
17226         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
17227       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
17228       // flushed to zero, unless all users treat denorms as zero (DAZ).
17229       // FIXME: This transform will change the sign of a NaN and the behavior
17230       // of a signaling NaN. It is only valid when a NoNaN flag is present.
17231       DenormalMode DenormMode = DAG.getDenormalMode(VT);
17232       if (DenormMode == DenormalMode::getIEEE()) {
17233         if (SDValue NegN1 =
17234                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17235           return NegN1;
17236         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17237           return DAG.getNode(ISD::FNEG, DL, VT, N1);
17238       }
17239     }
17240   }
17241 
17242   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
17243        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17244       N1.getOpcode() == ISD::FADD) {
17245     // X - (X + Y) -> -Y
17246     if (N0 == N1->getOperand(0))
17247       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
17248     // X - (Y + X) -> -Y
17249     if (N0 == N1->getOperand(1))
17250       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
17251   }
17252 
17253   // fold (fsub A, (fneg B)) -> (fadd A, B)
17254   if (SDValue NegN1 =
17255           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17256     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
17257 
17258   // FSUB -> FMA combines:
17259   if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
17260     AddToWorklist(Fused.getNode());
17261     return Fused;
17262   }
17263 
17264   return SDValue();
17265 }
17266 
17267 // Transform IEEE Floats:
17268 //      (fmul C, (uitofp Pow2))
17269 //          -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
17270 //      (fdiv C, (uitofp Pow2))
17271 //          -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
17272 //
17273 // The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
17274 // there is no need for more than an add/sub.
17275 //
17276 // This is valid under the following circumstances:
17277 // 1) We are dealing with IEEE floats
17278 // 2) C is normal
17279 // 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
17280 // TODO: Much of this could also be used for generating `ldexp` on targets the
17281 // prefer it.
17282 SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
17283   EVT VT = N->getValueType(0);
17284   SDValue ConstOp, Pow2Op;
17285 
17286   std::optional<int> Mantissa;
17287   auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
17288     if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
17289       return false;
17290 
17291     ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
17292     Pow2Op = N->getOperand(1 - ConstOpIdx);
17293     if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
17294         (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
17295          !DAG.computeKnownBits(Pow2Op).isNonNegative()))
17296       return false;
17297 
17298     Pow2Op = Pow2Op.getOperand(0);
17299 
17300     // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
17301     // TODO: We could use knownbits to make this bound more precise.
17302     int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
17303 
17304     auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
17305       if (CFP == nullptr)
17306         return false;
17307 
17308       const APFloat &APF = CFP->getValueAPF();
17309 
17310       // Make sure we have normal/ieee constant.
17311       if (!APF.isNormal() || !APF.isIEEE())
17312         return false;
17313 
17314       // Make sure the floats exponent is within the bounds that this transform
17315       // produces bitwise equals value.
17316       int CurExp = ilogb(APF);
17317       // FMul by pow2 will only increase exponent.
17318       int MinExp =
17319           N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
17320       // FDiv by pow2 will only decrease exponent.
17321       int MaxExp =
17322           N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
17323       if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
17324           MaxExp >= APFloat::semanticsMaxExponent(APF.getSemantics()))
17325         return false;
17326 
17327       // Finally make sure we actually know the mantissa for the float type.
17328       int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
17329       if (!Mantissa)
17330         Mantissa = ThisMantissa;
17331 
17332       return *Mantissa == ThisMantissa && ThisMantissa > 0;
17333     };
17334 
17335     // TODO: We may be able to include undefs.
17336     return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
17337   };
17338 
17339   if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
17340     return SDValue();
17341 
17342   if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
17343     return SDValue();
17344 
17345   // Get log2 after all other checks have taken place. This is because
17346   // BuildLogBase2 may create a new node.
17347   SDLoc DL(N);
17348   // Get Log2 type with same bitwidth as the float type (VT).
17349   EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
17350   if (VT.isVector())
17351     NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
17352                                 VT.getVectorElementCount());
17353 
17354   SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
17355                                /*InexpensiveOnly*/ true, NewIntVT);
17356   if (!Log2)
17357     return SDValue();
17358 
17359   // Perform actual transform.
17360   SDValue MantissaShiftCnt =
17361       DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
17362   // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
17363   // `(X << C1) + (C << C1)`, but that isn't always the case because of the
17364   // cast. We could implement that by handle here to handle the casts.
17365   SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
17366   SDValue ResAsInt =
17367       DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
17368                   NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
17369   SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
17370   return ResAsFP;
17371 }
17372 
17373 SDValue DAGCombiner::visitFMUL(SDNode *N) {
17374   SDValue N0 = N->getOperand(0);
17375   SDValue N1 = N->getOperand(1);
17376   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17377   EVT VT = N->getValueType(0);
17378   SDLoc DL(N);
17379   const TargetOptions &Options = DAG.getTarget().Options;
17380   const SDNodeFlags Flags = N->getFlags();
17381   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17382 
17383   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17384     return R;
17385 
17386   // fold (fmul c1, c2) -> c1*c2
17387   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
17388     return C;
17389 
17390   // canonicalize constant to RHS
17391   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
17392      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
17393     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
17394 
17395   // fold vector ops
17396   if (VT.isVector())
17397     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17398       return FoldedVOp;
17399 
17400   if (SDValue NewSel = foldBinOpIntoSelect(N))
17401     return NewSel;
17402 
17403   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
17404     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
17405     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
17406         N0.getOpcode() == ISD::FMUL) {
17407       SDValue N00 = N0.getOperand(0);
17408       SDValue N01 = N0.getOperand(1);
17409       // Avoid an infinite loop by making sure that N00 is not a constant
17410       // (the inner multiply has not been constant folded yet).
17411       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
17412           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
17413         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
17414         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
17415       }
17416     }
17417 
17418     // Match a special-case: we convert X * 2.0 into fadd.
17419     // fmul (fadd X, X), C -> fmul X, 2.0 * C
17420     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
17421         N0.getOperand(0) == N0.getOperand(1)) {
17422       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
17423       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
17424       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
17425     }
17426 
17427     // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
17428     if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
17429                                           VT, N0, N1, Flags))
17430       return SD;
17431   }
17432 
17433   // fold (fmul X, 2.0) -> (fadd X, X)
17434   if (N1CFP && N1CFP->isExactlyValue(+2.0))
17435     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
17436 
17437   // fold (fmul X, -1.0) -> (fsub -0.0, X)
17438   if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
17439     if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
17440       return DAG.getNode(ISD::FSUB, DL, VT,
17441                          DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
17442     }
17443   }
17444 
17445   // -N0 * -N1 --> N0 * N1
17446   TargetLowering::NegatibleCost CostN0 =
17447       TargetLowering::NegatibleCost::Expensive;
17448   TargetLowering::NegatibleCost CostN1 =
17449       TargetLowering::NegatibleCost::Expensive;
17450   SDValue NegN0 =
17451       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17452   if (NegN0) {
17453     HandleSDNode NegN0Handle(NegN0);
17454     SDValue NegN1 =
17455         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17456     if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17457                   CostN1 == TargetLowering::NegatibleCost::Cheaper))
17458       return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
17459   }
17460 
17461   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
17462   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
17463   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
17464       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
17465       TLI.isOperationLegal(ISD::FABS, VT)) {
17466     SDValue Select = N0, X = N1;
17467     if (Select.getOpcode() != ISD::SELECT)
17468       std::swap(Select, X);
17469 
17470     SDValue Cond = Select.getOperand(0);
17471     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
17472     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
17473 
17474     if (TrueOpnd && FalseOpnd &&
17475         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
17476         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
17477         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
17478       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17479       switch (CC) {
17480       default: break;
17481       case ISD::SETOLT:
17482       case ISD::SETULT:
17483       case ISD::SETOLE:
17484       case ISD::SETULE:
17485       case ISD::SETLT:
17486       case ISD::SETLE:
17487         std::swap(TrueOpnd, FalseOpnd);
17488         [[fallthrough]];
17489       case ISD::SETOGT:
17490       case ISD::SETUGT:
17491       case ISD::SETOGE:
17492       case ISD::SETUGE:
17493       case ISD::SETGT:
17494       case ISD::SETGE:
17495         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
17496             TLI.isOperationLegal(ISD::FNEG, VT))
17497           return DAG.getNode(ISD::FNEG, DL, VT,
17498                    DAG.getNode(ISD::FABS, DL, VT, X));
17499         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
17500           return DAG.getNode(ISD::FABS, DL, VT, X);
17501 
17502         break;
17503       }
17504     }
17505   }
17506 
17507   // FMUL -> FMA combines:
17508   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
17509     AddToWorklist(Fused.getNode());
17510     return Fused;
17511   }
17512 
17513   // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
17514   // able to run.
17515   if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17516     return R;
17517 
17518   return SDValue();
17519 }
17520 
17521 template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
17522   SDValue N0 = N->getOperand(0);
17523   SDValue N1 = N->getOperand(1);
17524   SDValue N2 = N->getOperand(2);
17525   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
17526   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
17527   EVT VT = N->getValueType(0);
17528   SDLoc DL(N);
17529   const TargetOptions &Options = DAG.getTarget().Options;
17530   // FMA nodes have flags that propagate to the created nodes.
17531   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17532   MatchContextClass matcher(DAG, TLI, N);
17533 
17534   // Constant fold FMA.
17535   if (SDValue C =
17536           DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
17537     return C;
17538 
17539   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
17540   TargetLowering::NegatibleCost CostN0 =
17541       TargetLowering::NegatibleCost::Expensive;
17542   TargetLowering::NegatibleCost CostN1 =
17543       TargetLowering::NegatibleCost::Expensive;
17544   SDValue NegN0 =
17545       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17546   if (NegN0) {
17547     HandleSDNode NegN0Handle(NegN0);
17548     SDValue NegN1 =
17549         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17550     if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17551                   CostN1 == TargetLowering::NegatibleCost::Cheaper))
17552       return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
17553   }
17554 
17555   // FIXME: use fast math flags instead of Options.UnsafeFPMath
17556   if (Options.UnsafeFPMath) {
17557     if (N0CFP && N0CFP->isZero())
17558       return N2;
17559     if (N1CFP && N1CFP->isZero())
17560       return N2;
17561   }
17562 
17563   // FIXME: Support splat of constant.
17564   if (N0CFP && N0CFP->isExactlyValue(1.0))
17565     return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
17566   if (N1CFP && N1CFP->isExactlyValue(1.0))
17567     return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17568 
17569   // Canonicalize (fma c, x, y) -> (fma x, c, y)
17570   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
17571      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
17572     return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
17573 
17574   bool CanReassociate =
17575       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17576   if (CanReassociate) {
17577     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
17578     if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
17579         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
17580         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
17581       return matcher.getNode(
17582           ISD::FMUL, DL, VT, N0,
17583           matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
17584     }
17585 
17586     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
17587     if (matcher.match(N0, ISD::FMUL) &&
17588         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
17589         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
17590       return matcher.getNode(
17591           ISD::FMA, DL, VT, N0.getOperand(0),
17592           matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
17593     }
17594   }
17595 
17596   // (fma x, -1, y) -> (fadd (fneg x), y)
17597   // FIXME: Support splat of constant.
17598   if (N1CFP) {
17599     if (N1CFP->isExactlyValue(1.0))
17600       return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17601 
17602     if (N1CFP->isExactlyValue(-1.0) &&
17603         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
17604       SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
17605       AddToWorklist(RHSNeg.getNode());
17606       return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
17607     }
17608 
17609     // fma (fneg x), K, y -> fma x -K, y
17610     if (matcher.match(N0, ISD::FNEG) &&
17611         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
17612          (N1.hasOneUse() &&
17613           !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
17614       return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
17615                              matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
17616     }
17617   }
17618 
17619   // FIXME: Support splat of constant.
17620   if (CanReassociate) {
17621     // (fma x, c, x) -> (fmul x, (c+1))
17622     if (N1CFP && N0 == N2) {
17623       return matcher.getNode(ISD::FMUL, DL, VT, N0,
17624                              matcher.getNode(ISD::FADD, DL, VT, N1,
17625                                              DAG.getConstantFP(1.0, DL, VT)));
17626     }
17627 
17628     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
17629     if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
17630       return matcher.getNode(ISD::FMUL, DL, VT, N0,
17631                              matcher.getNode(ISD::FADD, DL, VT, N1,
17632                                              DAG.getConstantFP(-1.0, DL, VT)));
17633     }
17634   }
17635 
17636   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
17637   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
17638   if (!TLI.isFNegFree(VT))
17639     if (SDValue Neg = TLI.getCheaperNegatedExpression(
17640             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
17641       return matcher.getNode(ISD::FNEG, DL, VT, Neg);
17642   return SDValue();
17643 }
17644 
17645 SDValue DAGCombiner::visitFMAD(SDNode *N) {
17646   SDValue N0 = N->getOperand(0);
17647   SDValue N1 = N->getOperand(1);
17648   SDValue N2 = N->getOperand(2);
17649   EVT VT = N->getValueType(0);
17650   SDLoc DL(N);
17651 
17652   // Constant fold FMAD.
17653   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
17654     return C;
17655 
17656   return SDValue();
17657 }
17658 
17659 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
17660 // reciprocal.
17661 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17662 // Notice that this is not always beneficial. One reason is different targets
17663 // may have different costs for FDIV and FMUL, so sometimes the cost of two
17664 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17665 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17666 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17667   // TODO: Limit this transform based on optsize/minsize - it always creates at
17668   //       least 1 extra instruction. But the perf win may be substantial enough
17669   //       that only minsize should restrict this.
17670   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17671   const SDNodeFlags Flags = N->getFlags();
17672   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17673     return SDValue();
17674 
17675   // Skip if current node is a reciprocal/fneg-reciprocal.
17676   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17677   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17678   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17679     return SDValue();
17680 
17681   // Exit early if the target does not want this transform or if there can't
17682   // possibly be enough uses of the divisor to make the transform worthwhile.
17683   unsigned MinUses = TLI.combineRepeatedFPDivisors();
17684 
17685   // For splat vectors, scale the number of uses by the splat factor. If we can
17686   // convert the division into a scalar op, that will likely be much faster.
17687   unsigned NumElts = 1;
17688   EVT VT = N->getValueType(0);
17689   if (VT.isVector() && DAG.isSplatValue(N1))
17690     NumElts = VT.getVectorMinNumElements();
17691 
17692   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17693     return SDValue();
17694 
17695   // Find all FDIV users of the same divisor.
17696   // Use a set because duplicates may be present in the user list.
17697   SetVector<SDNode *> Users;
17698   for (auto *U : N1->users()) {
17699     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17700       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17701       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17702           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17703           U->getFlags().hasAllowReassociation() &&
17704           U->getFlags().hasNoSignedZeros())
17705         continue;
17706 
17707       // This division is eligible for optimization only if global unsafe math
17708       // is enabled or if this division allows reciprocal formation.
17709       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17710         Users.insert(U);
17711     }
17712   }
17713 
17714   // Now that we have the actual number of divisor uses, make sure it meets
17715   // the minimum threshold specified by the target.
17716   if ((Users.size() * NumElts) < MinUses)
17717     return SDValue();
17718 
17719   SDLoc DL(N);
17720   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17721   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17722 
17723   // Dividend / Divisor -> Dividend * Reciprocal
17724   for (auto *U : Users) {
17725     SDValue Dividend = U->getOperand(0);
17726     if (Dividend != FPOne) {
17727       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17728                                     Reciprocal, Flags);
17729       CombineTo(U, NewNode);
17730     } else if (U != Reciprocal.getNode()) {
17731       // In the absence of fast-math-flags, this user node is always the
17732       // same node as Reciprocal, but with FMF they may be different nodes.
17733       CombineTo(U, Reciprocal);
17734     }
17735   }
17736   return SDValue(N, 0);  // N was replaced.
17737 }
17738 
17739 SDValue DAGCombiner::visitFDIV(SDNode *N) {
17740   SDValue N0 = N->getOperand(0);
17741   SDValue N1 = N->getOperand(1);
17742   EVT VT = N->getValueType(0);
17743   SDLoc DL(N);
17744   const TargetOptions &Options = DAG.getTarget().Options;
17745   SDNodeFlags Flags = N->getFlags();
17746   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17747 
17748   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17749     return R;
17750 
17751   // fold (fdiv c1, c2) -> c1/c2
17752   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17753     return C;
17754 
17755   // fold vector ops
17756   if (VT.isVector())
17757     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17758       return FoldedVOp;
17759 
17760   if (SDValue NewSel = foldBinOpIntoSelect(N))
17761     return NewSel;
17762 
17763   if (SDValue V = combineRepeatedFPDivisors(N))
17764     return V;
17765 
17766   // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17767   // the loss is acceptable with AllowReciprocal.
17768   if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
17769     // Compute the reciprocal 1.0 / c2.
17770     const APFloat &N1APF = N1CFP->getValueAPF();
17771     APFloat Recip = APFloat::getOne(N1APF.getSemantics());
17772     APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17773     // Only do the transform if the reciprocal is a legal fp immediate that
17774     // isn't too nasty (eg NaN, denormal, ...).
17775     if (((st == APFloat::opOK && !Recip.isDenormal()) ||
17776          (st == APFloat::opInexact &&
17777           (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
17778         (!LegalOperations ||
17779          // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17780          // backend)... we should handle this gracefully after Legalize.
17781          // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17782          TLI.isOperationLegal(ISD::ConstantFP, VT) ||
17783          TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17784       return DAG.getNode(ISD::FMUL, DL, VT, N0,
17785                          DAG.getConstantFP(Recip, DL, VT));
17786   }
17787 
17788   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17789     // If this FDIV is part of a reciprocal square root, it may be folded
17790     // into a target-specific square root estimate instruction.
17791     if (N1.getOpcode() == ISD::FSQRT) {
17792       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17793         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17794     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17795                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17796       if (SDValue RV =
17797               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17798         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17799         AddToWorklist(RV.getNode());
17800         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17801       }
17802     } else if (N1.getOpcode() == ISD::FP_ROUND &&
17803                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17804       if (SDValue RV =
17805               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17806         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17807         AddToWorklist(RV.getNode());
17808         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17809       }
17810     } else if (N1.getOpcode() == ISD::FMUL) {
17811       // Look through an FMUL. Even though this won't remove the FDIV directly,
17812       // it's still worthwhile to get rid of the FSQRT if possible.
17813       SDValue Sqrt, Y;
17814       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17815         Sqrt = N1.getOperand(0);
17816         Y = N1.getOperand(1);
17817       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17818         Sqrt = N1.getOperand(1);
17819         Y = N1.getOperand(0);
17820       }
17821       if (Sqrt.getNode()) {
17822         // If the other multiply operand is known positive, pull it into the
17823         // sqrt. That will eliminate the division if we convert to an estimate.
17824         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17825             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17826           SDValue A;
17827           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17828             A = Y.getOperand(0);
17829           else if (Y == Sqrt.getOperand(0))
17830             A = Y;
17831           if (A) {
17832             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17833             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17834             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17835             SDValue AAZ =
17836                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17837             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17838               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17839 
17840             // Estimate creation failed. Clean up speculatively created nodes.
17841             recursivelyDeleteUnusedNodes(AAZ.getNode());
17842           }
17843         }
17844 
17845         // We found a FSQRT, so try to make this fold:
17846         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17847         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17848           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17849           AddToWorklist(Div.getNode());
17850           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17851         }
17852       }
17853     }
17854 
17855     // Fold into a reciprocal estimate and multiply instead of a real divide.
17856     if (Options.NoInfsFPMath || Flags.hasNoInfs())
17857       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17858         return RV;
17859   }
17860 
17861   // Fold X/Sqrt(X) -> Sqrt(X)
17862   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17863       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17864     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17865       return N1;
17866 
17867   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17868   TargetLowering::NegatibleCost CostN0 =
17869       TargetLowering::NegatibleCost::Expensive;
17870   TargetLowering::NegatibleCost CostN1 =
17871       TargetLowering::NegatibleCost::Expensive;
17872   SDValue NegN0 =
17873       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17874   if (NegN0) {
17875     HandleSDNode NegN0Handle(NegN0);
17876     SDValue NegN1 =
17877         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17878     if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17879                   CostN1 == TargetLowering::NegatibleCost::Cheaper))
17880       return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
17881   }
17882 
17883   if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17884     return R;
17885 
17886   return SDValue();
17887 }
17888 
17889 SDValue DAGCombiner::visitFREM(SDNode *N) {
17890   SDValue N0 = N->getOperand(0);
17891   SDValue N1 = N->getOperand(1);
17892   EVT VT = N->getValueType(0);
17893   SDNodeFlags Flags = N->getFlags();
17894   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17895   SDLoc DL(N);
17896 
17897   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17898     return R;
17899 
17900   // fold (frem c1, c2) -> fmod(c1,c2)
17901   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
17902     return C;
17903 
17904   if (SDValue NewSel = foldBinOpIntoSelect(N))
17905     return NewSel;
17906 
17907   // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
17908   // power of 2.
17909   if (!TLI.isOperationLegal(ISD::FREM, VT) &&
17910       TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
17911       TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
17912       TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
17913       DAG.isKnownToBeAPowerOfTwoFP(N1)) {
17914     bool NeedsCopySign =
17915         !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
17916     SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
17917     SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
17918     SDValue MLA;
17919     if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
17920       MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17921                         N1, N0);
17922     } else {
17923       SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17924       MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17925     }
17926     return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
17927   }
17928 
17929   return SDValue();
17930 }
17931 
17932 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17933   SDNodeFlags Flags = N->getFlags();
17934   const TargetOptions &Options = DAG.getTarget().Options;
17935 
17936   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17937   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17938   if (!Flags.hasApproximateFuncs() ||
17939       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17940     return SDValue();
17941 
17942   SDValue N0 = N->getOperand(0);
17943   if (TLI.isFsqrtCheap(N0, DAG))
17944     return SDValue();
17945 
17946   // FSQRT nodes have flags that propagate to the created nodes.
17947   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17948   //       transform the fdiv, we may produce a sub-optimal estimate sequence
17949   //       because the reciprocal calculation may not have to filter out a
17950   //       0.0 input.
17951   return buildSqrtEstimate(N0, Flags);
17952 }
17953 
17954 /// copysign(x, fp_extend(y)) -> copysign(x, y)
17955 /// copysign(x, fp_round(y)) -> copysign(x, y)
17956 /// Operands to the functions are the type of X and Y respectively.
17957 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17958   // Always fold no-op FP casts.
17959   if (XTy == YTy)
17960     return true;
17961 
17962   // Do not optimize out type conversion of f128 type yet.
17963   // For some targets like x86_64, configuration is changed to keep one f128
17964   // value in one SSE register, but instruction selection cannot handle
17965   // FCOPYSIGN on SSE registers yet.
17966   if (YTy == MVT::f128)
17967     return false;
17968 
17969   return !YTy.isVector() || EnableVectorFCopySignExtendRound;
17970 }
17971 
17972 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
17973   SDValue N1 = N->getOperand(1);
17974   if (N1.getOpcode() != ISD::FP_EXTEND &&
17975       N1.getOpcode() != ISD::FP_ROUND)
17976     return false;
17977   EVT N1VT = N1->getValueType(0);
17978   EVT N1Op0VT = N1->getOperand(0).getValueType();
17979   return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17980 }
17981 
17982 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17983   SDValue N0 = N->getOperand(0);
17984   SDValue N1 = N->getOperand(1);
17985   EVT VT = N->getValueType(0);
17986   SDLoc DL(N);
17987 
17988   // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17989   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
17990     return C;
17991 
17992   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17993     const APFloat &V = N1C->getValueAPF();
17994     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
17995     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17996     if (!V.isNegative()) {
17997       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17998         return DAG.getNode(ISD::FABS, DL, VT, N0);
17999     } else {
18000       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
18001         return DAG.getNode(ISD::FNEG, DL, VT,
18002                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
18003     }
18004   }
18005 
18006   // copysign(fabs(x), y) -> copysign(x, y)
18007   // copysign(fneg(x), y) -> copysign(x, y)
18008   // copysign(copysign(x,z), y) -> copysign(x, y)
18009   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
18010       N0.getOpcode() == ISD::FCOPYSIGN)
18011     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0.getOperand(0), N1);
18012 
18013   // copysign(x, abs(y)) -> abs(x)
18014   if (N1.getOpcode() == ISD::FABS)
18015     return DAG.getNode(ISD::FABS, DL, VT, N0);
18016 
18017   // copysign(x, copysign(y,z)) -> copysign(x, z)
18018   if (N1.getOpcode() == ISD::FCOPYSIGN)
18019     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(1));
18020 
18021   // copysign(x, fp_extend(y)) -> copysign(x, y)
18022   // copysign(x, fp_round(y)) -> copysign(x, y)
18023   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
18024     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18025 
18026   // We only take the sign bit from the sign operand.
18027   EVT SignVT = N1.getValueType();
18028   if (SimplifyDemandedBits(N1,
18029                            APInt::getSignMask(SignVT.getScalarSizeInBits())))
18030     return SDValue(N, 0);
18031 
18032   // We only take the non-sign bits from the value operand
18033   if (SimplifyDemandedBits(N0,
18034                            APInt::getSignedMaxValue(VT.getScalarSizeInBits())))
18035     return SDValue(N, 0);
18036 
18037   return SDValue();
18038 }
18039 
18040 SDValue DAGCombiner::visitFPOW(SDNode *N) {
18041   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18042   if (!ExponentC)
18043     return SDValue();
18044   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18045 
18046   // Try to convert x ** (1/3) into cube root.
18047   // TODO: Handle the various flavors of long double.
18048   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
18049   //       Some range near 1/3 should be fine.
18050   EVT VT = N->getValueType(0);
18051   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18052       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18053     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18054     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18055     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
18056     // For regular numbers, rounding may cause the results to differ.
18057     // Therefore, we require { nsz ninf nnan afn } for this transform.
18058     // TODO: We could select out the special cases if we don't have nsz/ninf.
18059     SDNodeFlags Flags = N->getFlags();
18060     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
18061         !Flags.hasApproximateFuncs())
18062       return SDValue();
18063 
18064     // Do not create a cbrt() libcall if the target does not have it, and do not
18065     // turn a pow that has lowering support into a cbrt() libcall.
18066     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
18067         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
18068          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
18069       return SDValue();
18070 
18071     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18072   }
18073 
18074   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
18075   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
18076   // TODO: This could be extended (using a target hook) to handle smaller
18077   // power-of-2 fractional exponents.
18078   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18079   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18080   if (ExponentIs025 || ExponentIs075) {
18081     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18082     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
18083     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18084     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
18085     // For regular numbers, rounding may cause the results to differ.
18086     // Therefore, we require { nsz ninf afn } for this transform.
18087     // TODO: We could select out the special cases if we don't have nsz/ninf.
18088     SDNodeFlags Flags = N->getFlags();
18089 
18090     // We only need no signed zeros for the 0.25 case.
18091     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
18092         !Flags.hasApproximateFuncs())
18093       return SDValue();
18094 
18095     // Don't double the number of libcalls. We are trying to inline fast code.
18096     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
18097       return SDValue();
18098 
18099     // Assume that libcalls are the smallest code.
18100     // TODO: This restriction should probably be lifted for vectors.
18101     if (ForCodeSize)
18102       return SDValue();
18103 
18104     // pow(X, 0.25) --> sqrt(sqrt(X))
18105     SDLoc DL(N);
18106     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18107     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
18108     if (ExponentIs025)
18109       return SqrtSqrt;
18110     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18111     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
18112   }
18113 
18114   return SDValue();
18115 }
18116 
18117 static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
18118                                const TargetLowering &TLI) {
18119   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
18120   // replacing casts with a libcall. We also must be allowed to ignore -0.0
18121   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
18122   // conversions would return +0.0.
18123   // FIXME: We should be able to use node-level FMF here.
18124   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
18125   EVT VT = N->getValueType(0);
18126   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
18127       !DAG.getTarget().Options.NoSignedZerosFPMath)
18128     return SDValue();
18129 
18130   // fptosi/fptoui round towards zero, so converting from FP to integer and
18131   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18132   SDValue N0 = N->getOperand(0);
18133   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18134       N0.getOperand(0).getValueType() == VT)
18135     return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18136 
18137   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18138       N0.getOperand(0).getValueType() == VT)
18139     return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18140 
18141   return SDValue();
18142 }
18143 
18144 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
18145   SDValue N0 = N->getOperand(0);
18146   EVT VT = N->getValueType(0);
18147   EVT OpVT = N0.getValueType();
18148   SDLoc DL(N);
18149 
18150   // [us]itofp(undef) = 0, because the result value is bounded.
18151   if (N0.isUndef())
18152     return DAG.getConstantFP(0.0, DL, VT);
18153 
18154   // fold (sint_to_fp c1) -> c1fp
18155   // ...but only if the target supports immediate floating-point values
18156   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18157     if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
18158       return C;
18159 
18160   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
18161   // but UINT_TO_FP is legal on this target, try to convert.
18162   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
18163       hasOperation(ISD::UINT_TO_FP, OpVT)) {
18164     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
18165     if (DAG.SignBitIsZero(N0))
18166       return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
18167   }
18168 
18169   // The next optimizations are desirable only if SELECT_CC can be lowered.
18170   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18171   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
18172       !VT.isVector() &&
18173       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18174     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18175                          DAG.getConstantFP(0.0, DL, VT));
18176 
18177   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18178   //      (select (setcc x, y, cc), 1.0, 0.0)
18179   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
18180       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
18181       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18182     return DAG.getSelect(DL, VT, N0.getOperand(0),
18183                          DAG.getConstantFP(1.0, DL, VT),
18184                          DAG.getConstantFP(0.0, DL, VT));
18185 
18186   if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18187     return FTrunc;
18188 
18189   return SDValue();
18190 }
18191 
18192 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
18193   SDValue N0 = N->getOperand(0);
18194   EVT VT = N->getValueType(0);
18195   EVT OpVT = N0.getValueType();
18196   SDLoc DL(N);
18197 
18198   // [us]itofp(undef) = 0, because the result value is bounded.
18199   if (N0.isUndef())
18200     return DAG.getConstantFP(0.0, DL, VT);
18201 
18202   // fold (uint_to_fp c1) -> c1fp
18203   // ...but only if the target supports immediate floating-point values
18204   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18205     if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
18206       return C;
18207 
18208   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
18209   // but SINT_TO_FP is legal on this target, try to convert.
18210   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
18211       hasOperation(ISD::SINT_TO_FP, OpVT)) {
18212     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
18213     if (DAG.SignBitIsZero(N0))
18214       return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
18215   }
18216 
18217   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
18218   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
18219       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18220     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
18221                          DAG.getConstantFP(0.0, DL, VT));
18222 
18223   if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18224     return FTrunc;
18225 
18226   return SDValue();
18227 }
18228 
18229 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
18230 static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG) {
18231   SDValue N0 = N->getOperand(0);
18232   EVT VT = N->getValueType(0);
18233 
18234   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
18235     return SDValue();
18236 
18237   SDValue Src = N0.getOperand(0);
18238   EVT SrcVT = Src.getValueType();
18239   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
18240   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
18241 
18242   // We can safely assume the conversion won't overflow the output range,
18243   // because (for example) (uint8_t)18293.f is undefined behavior.
18244 
18245   // Since we can assume the conversion won't overflow, our decision as to
18246   // whether the input will fit in the float should depend on the minimum
18247   // of the input range and output range.
18248 
18249   // This means this is also safe for a signed input and unsigned output, since
18250   // a negative input would lead to undefined behavior.
18251   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
18252   unsigned OutputSize = (int)VT.getScalarSizeInBits();
18253   unsigned ActualSize = std::min(InputSize, OutputSize);
18254   const fltSemantics &Sem = N0.getValueType().getFltSemantics();
18255 
18256   // We can only fold away the float conversion if the input range can be
18257   // represented exactly in the float range.
18258   if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
18259     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
18260       unsigned ExtOp =
18261           IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
18262       return DAG.getNode(ExtOp, DL, VT, Src);
18263     }
18264     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
18265       return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
18266     return DAG.getBitcast(VT, Src);
18267   }
18268   return SDValue();
18269 }
18270 
18271 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
18272   SDValue N0 = N->getOperand(0);
18273   EVT VT = N->getValueType(0);
18274   SDLoc DL(N);
18275 
18276   // fold (fp_to_sint undef) -> undef
18277   if (N0.isUndef())
18278     return DAG.getUNDEF(VT);
18279 
18280   // fold (fp_to_sint c1fp) -> c1
18281   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
18282     return C;
18283 
18284   return FoldIntToFPToInt(N, DL, DAG);
18285 }
18286 
18287 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
18288   SDValue N0 = N->getOperand(0);
18289   EVT VT = N->getValueType(0);
18290   SDLoc DL(N);
18291 
18292   // fold (fp_to_uint undef) -> undef
18293   if (N0.isUndef())
18294     return DAG.getUNDEF(VT);
18295 
18296   // fold (fp_to_uint c1fp) -> c1
18297   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
18298     return C;
18299 
18300   return FoldIntToFPToInt(N, DL, DAG);
18301 }
18302 
18303 SDValue DAGCombiner::visitXROUND(SDNode *N) {
18304   SDValue N0 = N->getOperand(0);
18305   EVT VT = N->getValueType(0);
18306 
18307   // fold (lrint|llrint undef) -> undef
18308   // fold (lround|llround undef) -> undef
18309   if (N0.isUndef())
18310     return DAG.getUNDEF(VT);
18311 
18312   // fold (lrint|llrint c1fp) -> c1
18313   // fold (lround|llround c1fp) -> c1
18314   if (SDValue C =
18315           DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
18316     return C;
18317 
18318   return SDValue();
18319 }
18320 
18321 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
18322   SDValue N0 = N->getOperand(0);
18323   SDValue N1 = N->getOperand(1);
18324   EVT VT = N->getValueType(0);
18325   SDLoc DL(N);
18326 
18327   // fold (fp_round c1fp) -> c1fp
18328   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
18329     return C;
18330 
18331   // fold (fp_round (fp_extend x)) -> x
18332   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
18333     return N0.getOperand(0);
18334 
18335   // fold (fp_round (fp_round x)) -> (fp_round x)
18336   if (N0.getOpcode() == ISD::FP_ROUND) {
18337     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
18338     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
18339 
18340     // Avoid folding legal fp_rounds into non-legal ones.
18341     if (!hasOperation(ISD::FP_ROUND, VT))
18342       return SDValue();
18343 
18344     // Skip this folding if it results in an fp_round from f80 to f16.
18345     //
18346     // f80 to f16 always generates an expensive (and as yet, unimplemented)
18347     // libcall to __truncxfhf2 instead of selecting native f16 conversion
18348     // instructions from f32 or f64.  Moreover, the first (value-preserving)
18349     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
18350     // x86.
18351     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
18352       return SDValue();
18353 
18354     // If the first fp_round isn't a value preserving truncation, it might
18355     // introduce a tie in the second fp_round, that wouldn't occur in the
18356     // single-step fp_round we want to fold to.
18357     // In other words, double rounding isn't the same as rounding.
18358     // Also, this is a value preserving truncation iff both fp_round's are.
18359     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
18360       return DAG.getNode(
18361           ISD::FP_ROUND, DL, VT, N0.getOperand(0),
18362           DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
18363   }
18364 
18365   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
18366   // Note: From a legality perspective, this is a two step transform.  First,
18367   // we duplicate the fp_round to the arguments of the copysign, then we
18368   // eliminate the fp_round on Y.  The second step requires an additional
18369   // predicate to match the implementation above.
18370   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
18371       CanCombineFCOPYSIGN_EXTEND_ROUND(VT,
18372                                        N0.getValueType())) {
18373     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
18374                               N0.getOperand(0), N1);
18375     AddToWorklist(Tmp.getNode());
18376     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
18377   }
18378 
18379   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
18380     return NewVSel;
18381 
18382   return SDValue();
18383 }
18384 
18385 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
18386   SDValue N0 = N->getOperand(0);
18387   EVT VT = N->getValueType(0);
18388   SDLoc DL(N);
18389 
18390   if (VT.isVector())
18391     if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
18392       return FoldedVOp;
18393 
18394   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
18395   if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
18396     return SDValue();
18397 
18398   // fold (fp_extend c1fp) -> c1fp
18399   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
18400     return C;
18401 
18402   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
18403   if (N0.getOpcode() == ISD::FP16_TO_FP &&
18404       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
18405     return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
18406 
18407   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
18408   // value of X.
18409   if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
18410     SDValue In = N0.getOperand(0);
18411     if (In.getValueType() == VT) return In;
18412     if (VT.bitsLT(In.getValueType()))
18413       return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
18414     return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
18415   }
18416 
18417   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
18418   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
18419       TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
18420     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
18421     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
18422                                      LN0->getChain(),
18423                                      LN0->getBasePtr(), N0.getValueType(),
18424                                      LN0->getMemOperand());
18425     CombineTo(N, ExtLoad);
18426     CombineTo(
18427         N0.getNode(),
18428         DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
18429                     DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
18430         ExtLoad.getValue(1));
18431     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
18432   }
18433 
18434   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
18435     return NewVSel;
18436 
18437   return SDValue();
18438 }
18439 
18440 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
18441   SDValue N0 = N->getOperand(0);
18442   EVT VT = N->getValueType(0);
18443 
18444   // fold (fceil c1) -> fceil(c1)
18445   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
18446     return C;
18447 
18448   return SDValue();
18449 }
18450 
18451 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
18452   SDValue N0 = N->getOperand(0);
18453   EVT VT = N->getValueType(0);
18454 
18455   // fold (ftrunc c1) -> ftrunc(c1)
18456   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
18457     return C;
18458 
18459   // fold ftrunc (known rounded int x) -> x
18460   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
18461   // likely to be generated to extract integer from a rounded floating value.
18462   switch (N0.getOpcode()) {
18463   default: break;
18464   case ISD::FRINT:
18465   case ISD::FTRUNC:
18466   case ISD::FNEARBYINT:
18467   case ISD::FROUNDEVEN:
18468   case ISD::FFLOOR:
18469   case ISD::FCEIL:
18470     return N0;
18471   }
18472 
18473   return SDValue();
18474 }
18475 
18476 SDValue DAGCombiner::visitFFREXP(SDNode *N) {
18477   SDValue N0 = N->getOperand(0);
18478 
18479   // fold (ffrexp c1) -> ffrexp(c1)
18480   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
18481     return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
18482   return SDValue();
18483 }
18484 
18485 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
18486   SDValue N0 = N->getOperand(0);
18487   EVT VT = N->getValueType(0);
18488 
18489   // fold (ffloor c1) -> ffloor(c1)
18490   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
18491     return C;
18492 
18493   return SDValue();
18494 }
18495 
18496 SDValue DAGCombiner::visitFNEG(SDNode *N) {
18497   SDValue N0 = N->getOperand(0);
18498   EVT VT = N->getValueType(0);
18499   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18500 
18501   // Constant fold FNEG.
18502   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
18503     return C;
18504 
18505   if (SDValue NegN0 =
18506           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
18507     return NegN0;
18508 
18509   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
18510   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
18511   // know it was called from a context with a nsz flag if the input fsub does
18512   // not.
18513   if (N0.getOpcode() == ISD::FSUB &&
18514       (DAG.getTarget().Options.NoSignedZerosFPMath ||
18515        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
18516     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
18517                        N0.getOperand(0));
18518   }
18519 
18520   if (SDValue Cast = foldSignChangeInBitcast(N))
18521     return Cast;
18522 
18523   return SDValue();
18524 }
18525 
18526 SDValue DAGCombiner::visitFMinMax(SDNode *N) {
18527   SDValue N0 = N->getOperand(0);
18528   SDValue N1 = N->getOperand(1);
18529   EVT VT = N->getValueType(0);
18530   const SDNodeFlags Flags = N->getFlags();
18531   unsigned Opc = N->getOpcode();
18532   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
18533   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
18534   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18535 
18536   // Constant fold.
18537   if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
18538     return C;
18539 
18540   // Canonicalize to constant on RHS.
18541   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
18542       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
18543     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
18544 
18545   if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
18546     const APFloat &AF = N1CFP->getValueAPF();
18547 
18548     // minnum(X, nan) -> X
18549     // maxnum(X, nan) -> X
18550     // minimum(X, nan) -> nan
18551     // maximum(X, nan) -> nan
18552     if (AF.isNaN())
18553       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
18554 
18555     // In the following folds, inf can be replaced with the largest finite
18556     // float, if the ninf flag is set.
18557     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
18558       // minnum(X, -inf) -> -inf
18559       // maxnum(X, +inf) -> +inf
18560       // minimum(X, -inf) -> -inf if nnan
18561       // maximum(X, +inf) -> +inf if nnan
18562       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
18563         return N->getOperand(1);
18564 
18565       // minnum(X, +inf) -> X if nnan
18566       // maxnum(X, -inf) -> X if nnan
18567       // minimum(X, +inf) -> X
18568       // maximum(X, -inf) -> X
18569       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
18570         return N->getOperand(0);
18571     }
18572   }
18573 
18574   if (SDValue SD = reassociateReduction(
18575           PropagatesNaN
18576               ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
18577               : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
18578           Opc, SDLoc(N), VT, N0, N1, Flags))
18579     return SD;
18580 
18581   return SDValue();
18582 }
18583 
18584 SDValue DAGCombiner::visitFABS(SDNode *N) {
18585   SDValue N0 = N->getOperand(0);
18586   EVT VT = N->getValueType(0);
18587   SDLoc DL(N);
18588 
18589   // fold (fabs c1) -> fabs(c1)
18590   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
18591     return C;
18592 
18593   // fold (fabs (fabs x)) -> (fabs x)
18594   if (N0.getOpcode() == ISD::FABS)
18595     return N->getOperand(0);
18596 
18597   // fold (fabs (fneg x)) -> (fabs x)
18598   // fold (fabs (fcopysign x, y)) -> (fabs x)
18599   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
18600     return DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0));
18601 
18602   if (SDValue Cast = foldSignChangeInBitcast(N))
18603     return Cast;
18604 
18605   return SDValue();
18606 }
18607 
18608 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
18609   SDValue Chain = N->getOperand(0);
18610   SDValue N1 = N->getOperand(1);
18611   SDValue N2 = N->getOperand(2);
18612 
18613   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
18614   // nondeterministic jumps).
18615   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
18616     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18617                        N1->getOperand(0), N2, N->getFlags());
18618   }
18619 
18620   // Variant of the previous fold where there is a SETCC in between:
18621   //   BRCOND(SETCC(FREEZE(X), CONST, Cond))
18622   // =>
18623   //   BRCOND(FREEZE(SETCC(X, CONST, Cond)))
18624   // =>
18625   //   BRCOND(SETCC(X, CONST, Cond))
18626   // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
18627   // isn't equivalent to true or false.
18628   // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
18629   // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
18630   if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
18631     SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
18632     ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
18633     ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
18634     ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
18635     bool Updated = false;
18636 
18637     // Is 'X Cond C' always true or false?
18638     auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
18639       bool False = (Cond == ISD::SETULT && C->isZero()) ||
18640                    (Cond == ISD::SETLT && C->isMinSignedValue()) ||
18641                    (Cond == ISD::SETUGT && C->isAllOnes()) ||
18642                    (Cond == ISD::SETGT && C->isMaxSignedValue());
18643       bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
18644                   (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
18645                   (Cond == ISD::SETUGE && C->isZero()) ||
18646                   (Cond == ISD::SETGE && C->isMinSignedValue());
18647       return True || False;
18648     };
18649 
18650     if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
18651       if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
18652         S0 = S0->getOperand(0);
18653         Updated = true;
18654       }
18655     }
18656     if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
18657       if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
18658         S1 = S1->getOperand(0);
18659         Updated = true;
18660       }
18661     }
18662 
18663     if (Updated)
18664       return DAG.getNode(
18665           ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18666           DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
18667           N->getFlags());
18668   }
18669 
18670   // If N is a constant we could fold this into a fallthrough or unconditional
18671   // branch. However that doesn't happen very often in normal code, because
18672   // Instcombine/SimplifyCFG should have handled the available opportunities.
18673   // If we did this folding here, it would be necessary to update the
18674   // MachineBasicBlock CFG, which is awkward.
18675 
18676   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
18677   // on the target.
18678   if (N1.getOpcode() == ISD::SETCC &&
18679       TLI.isOperationLegalOrCustom(ISD::BR_CC,
18680                                    N1.getOperand(0).getValueType())) {
18681     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18682                        Chain, N1.getOperand(2),
18683                        N1.getOperand(0), N1.getOperand(1), N2);
18684   }
18685 
18686   if (N1.hasOneUse()) {
18687     // rebuildSetCC calls visitXor which may change the Chain when there is a
18688     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18689     HandleSDNode ChainHandle(Chain);
18690     if (SDValue NewN1 = rebuildSetCC(N1))
18691       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18692                          ChainHandle.getValue(), NewN1, N2, N->getFlags());
18693   }
18694 
18695   return SDValue();
18696 }
18697 
18698 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18699   if (N.getOpcode() == ISD::SRL ||
18700       (N.getOpcode() == ISD::TRUNCATE &&
18701        (N.getOperand(0).hasOneUse() &&
18702         N.getOperand(0).getOpcode() == ISD::SRL))) {
18703     // Look pass the truncate.
18704     if (N.getOpcode() == ISD::TRUNCATE)
18705       N = N.getOperand(0);
18706 
18707     // Match this pattern so that we can generate simpler code:
18708     //
18709     //   %a = ...
18710     //   %b = and i32 %a, 2
18711     //   %c = srl i32 %b, 1
18712     //   brcond i32 %c ...
18713     //
18714     // into
18715     //
18716     //   %a = ...
18717     //   %b = and i32 %a, 2
18718     //   %c = setcc eq %b, 0
18719     //   brcond %c ...
18720     //
18721     // This applies only when the AND constant value has one bit set and the
18722     // SRL constant is equal to the log2 of the AND constant. The back-end is
18723     // smart enough to convert the result into a TEST/JMP sequence.
18724     SDValue Op0 = N.getOperand(0);
18725     SDValue Op1 = N.getOperand(1);
18726 
18727     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18728       SDValue AndOp1 = Op0.getOperand(1);
18729 
18730       if (AndOp1.getOpcode() == ISD::Constant) {
18731         const APInt &AndConst = AndOp1->getAsAPIntVal();
18732 
18733         if (AndConst.isPowerOf2() &&
18734             Op1->getAsAPIntVal() == AndConst.logBase2()) {
18735           SDLoc DL(N);
18736           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18737                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18738                               ISD::SETNE);
18739         }
18740       }
18741     }
18742   }
18743 
18744   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18745   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18746   if (N.getOpcode() == ISD::XOR) {
18747     // Because we may call this on a speculatively constructed
18748     // SimplifiedSetCC Node, we need to simplify this node first.
18749     // Ideally this should be folded into SimplifySetCC and not
18750     // here. For now, grab a handle to N so we don't lose it from
18751     // replacements interal to the visit.
18752     HandleSDNode XORHandle(N);
18753     while (N.getOpcode() == ISD::XOR) {
18754       SDValue Tmp = visitXOR(N.getNode());
18755       // No simplification done.
18756       if (!Tmp.getNode())
18757         break;
18758       // Returning N is form in-visit replacement that may invalidated
18759       // N. Grab value from Handle.
18760       if (Tmp.getNode() == N.getNode())
18761         N = XORHandle.getValue();
18762       else // Node simplified. Try simplifying again.
18763         N = Tmp;
18764     }
18765 
18766     if (N.getOpcode() != ISD::XOR)
18767       return N;
18768 
18769     SDValue Op0 = N->getOperand(0);
18770     SDValue Op1 = N->getOperand(1);
18771 
18772     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18773       bool Equal = false;
18774       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18775       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18776           Op0.getValueType() == MVT::i1) {
18777         N = Op0;
18778         Op0 = N->getOperand(0);
18779         Op1 = N->getOperand(1);
18780         Equal = true;
18781       }
18782 
18783       EVT SetCCVT = N.getValueType();
18784       if (LegalTypes)
18785         SetCCVT = getSetCCResultType(SetCCVT);
18786       // Replace the uses of XOR with SETCC. Note, avoid this transformation if
18787       // it would introduce illegal operations post-legalization as this can
18788       // result in infinite looping between converting xor->setcc here, and
18789       // expanding setcc->xor in LegalizeSetCCCondCode if requested.
18790       const ISD::CondCode CC = Equal ? ISD::SETEQ : ISD::SETNE;
18791       if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
18792         return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
18793     }
18794   }
18795 
18796   return SDValue();
18797 }
18798 
18799 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18800 //
18801 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18802   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18803   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18804 
18805   // If N is a constant we could fold this into a fallthrough or unconditional
18806   // branch. However that doesn't happen very often in normal code, because
18807   // Instcombine/SimplifyCFG should have handled the available opportunities.
18808   // If we did this folding here, it would be necessary to update the
18809   // MachineBasicBlock CFG, which is awkward.
18810 
18811   // Use SimplifySetCC to simplify SETCC's.
18812   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
18813                                CondLHS, CondRHS, CC->get(), SDLoc(N),
18814                                false);
18815   if (Simp.getNode()) AddToWorklist(Simp.getNode());
18816 
18817   // fold to a simpler setcc
18818   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18819     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18820                        N->getOperand(0), Simp.getOperand(2),
18821                        Simp.getOperand(0), Simp.getOperand(1),
18822                        N->getOperand(4));
18823 
18824   return SDValue();
18825 }
18826 
18827 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18828                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18829                                      const TargetLowering &TLI) {
18830   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18831     if (LD->isIndexed())
18832       return false;
18833     EVT VT = LD->getMemoryVT();
18834     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18835       return false;
18836     Ptr = LD->getBasePtr();
18837   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18838     if (ST->isIndexed())
18839       return false;
18840     EVT VT = ST->getMemoryVT();
18841     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18842       return false;
18843     Ptr = ST->getBasePtr();
18844     IsLoad = false;
18845   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18846     if (LD->isIndexed())
18847       return false;
18848     EVT VT = LD->getMemoryVT();
18849     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18850         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18851       return false;
18852     Ptr = LD->getBasePtr();
18853     IsMasked = true;
18854   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18855     if (ST->isIndexed())
18856       return false;
18857     EVT VT = ST->getMemoryVT();
18858     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18859         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18860       return false;
18861     Ptr = ST->getBasePtr();
18862     IsLoad = false;
18863     IsMasked = true;
18864   } else {
18865     return false;
18866   }
18867   return true;
18868 }
18869 
18870 /// Try turning a load/store into a pre-indexed load/store when the base
18871 /// pointer is an add or subtract and it has other uses besides the load/store.
18872 /// After the transformation, the new indexed load/store has effectively folded
18873 /// the add/subtract in and all of its other uses are redirected to the
18874 /// new load/store.
18875 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18876   if (Level < AfterLegalizeDAG)
18877     return false;
18878 
18879   bool IsLoad = true;
18880   bool IsMasked = false;
18881   SDValue Ptr;
18882   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18883                                 Ptr, TLI))
18884     return false;
18885 
18886   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18887   // out.  There is no reason to make this a preinc/predec.
18888   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18889       Ptr->hasOneUse())
18890     return false;
18891 
18892   // Ask the target to do addressing mode selection.
18893   SDValue BasePtr;
18894   SDValue Offset;
18895   ISD::MemIndexedMode AM = ISD::UNINDEXED;
18896   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18897     return false;
18898 
18899   // Backends without true r+i pre-indexed forms may need to pass a
18900   // constant base with a variable offset so that constant coercion
18901   // will work with the patterns in canonical form.
18902   bool Swapped = false;
18903   if (isa<ConstantSDNode>(BasePtr)) {
18904     std::swap(BasePtr, Offset);
18905     Swapped = true;
18906   }
18907 
18908   // Don't create a indexed load / store with zero offset.
18909   if (isNullConstant(Offset))
18910     return false;
18911 
18912   // Try turning it into a pre-indexed load / store except when:
18913   // 1) The new base ptr is a frame index.
18914   // 2) If N is a store and the new base ptr is either the same as or is a
18915   //    predecessor of the value being stored.
18916   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18917   //    that would create a cycle.
18918   // 4) All uses are load / store ops that use it as old base ptr.
18919 
18920   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
18921   // (plus the implicit offset) to a register to preinc anyway.
18922   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18923     return false;
18924 
18925   // Check #2.
18926   if (!IsLoad) {
18927     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18928                            : cast<StoreSDNode>(N)->getValue();
18929 
18930     // Would require a copy.
18931     if (Val == BasePtr)
18932       return false;
18933 
18934     // Would create a cycle.
18935     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18936       return false;
18937   }
18938 
18939   // Caches for hasPredecessorHelper.
18940   SmallPtrSet<const SDNode *, 32> Visited;
18941   SmallVector<const SDNode *, 16> Worklist;
18942   Worklist.push_back(N);
18943 
18944   // If the offset is a constant, there may be other adds of constants that
18945   // can be folded with this one. We should do this to avoid having to keep
18946   // a copy of the original base pointer.
18947   SmallVector<SDNode *, 16> OtherUses;
18948   unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
18949   if (isa<ConstantSDNode>(Offset))
18950     for (SDUse &Use : BasePtr->uses()) {
18951       // Skip the use that is Ptr and uses of other results from BasePtr's
18952       // node (important for nodes that return multiple results).
18953       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18954         continue;
18955 
18956       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18957                                        MaxSteps))
18958         continue;
18959 
18960       if (Use.getUser()->getOpcode() != ISD::ADD &&
18961           Use.getUser()->getOpcode() != ISD::SUB) {
18962         OtherUses.clear();
18963         break;
18964       }
18965 
18966       SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
18967       if (!isa<ConstantSDNode>(Op1)) {
18968         OtherUses.clear();
18969         break;
18970       }
18971 
18972       // FIXME: In some cases, we can be smarter about this.
18973       if (Op1.getValueType() != Offset.getValueType()) {
18974         OtherUses.clear();
18975         break;
18976       }
18977 
18978       OtherUses.push_back(Use.getUser());
18979     }
18980 
18981   if (Swapped)
18982     std::swap(BasePtr, Offset);
18983 
18984   // Now check for #3 and #4.
18985   bool RealUse = false;
18986 
18987   for (SDNode *User : Ptr->users()) {
18988     if (User == N)
18989       continue;
18990     if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
18991       return false;
18992 
18993     // If Ptr may be folded in addressing mode of other use, then it's
18994     // not profitable to do this transformation.
18995     if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
18996       RealUse = true;
18997   }
18998 
18999   if (!RealUse)
19000     return false;
19001 
19002   SDValue Result;
19003   if (!IsMasked) {
19004     if (IsLoad)
19005       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19006     else
19007       Result =
19008           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19009   } else {
19010     if (IsLoad)
19011       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19012                                         Offset, AM);
19013     else
19014       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
19015                                          Offset, AM);
19016   }
19017   ++PreIndexedNodes;
19018   ++NodesCombined;
19019   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19020              Result.dump(&DAG); dbgs() << '\n');
19021   WorklistRemover DeadNodes(*this);
19022   if (IsLoad) {
19023     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19024     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19025   } else {
19026     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19027   }
19028 
19029   // Finally, since the node is now dead, remove it from the graph.
19030   deleteAndRecombine(N);
19031 
19032   if (Swapped)
19033     std::swap(BasePtr, Offset);
19034 
19035   // Replace other uses of BasePtr that can be updated to use Ptr
19036   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
19037     unsigned OffsetIdx = 1;
19038     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19039       OffsetIdx = 0;
19040     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
19041            BasePtr.getNode() && "Expected BasePtr operand");
19042 
19043     // We need to replace ptr0 in the following expression:
19044     //   x0 * offset0 + y0 * ptr0 = t0
19045     // knowing that
19046     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
19047     //
19048     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19049     // indexed load/store and the expression that needs to be re-written.
19050     //
19051     // Therefore, we have:
19052     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19053 
19054     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
19055     const APInt &Offset0 = CN->getAPIntValue();
19056     const APInt &Offset1 = Offset->getAsAPIntVal();
19057     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19058     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19059     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19060     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19061 
19062     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
19063 
19064     APInt CNV = Offset0;
19065     if (X0 < 0) CNV = -CNV;
19066     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
19067     else CNV = CNV - Offset1;
19068 
19069     SDLoc DL(OtherUses[i]);
19070 
19071     // We can now generate the new expression.
19072     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19073     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
19074 
19075     SDValue NewUse = DAG.getNode(Opcode,
19076                                  DL,
19077                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
19078     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
19079     deleteAndRecombine(OtherUses[i]);
19080   }
19081 
19082   // Replace the uses of Ptr with uses of the updated base value.
19083   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
19084   deleteAndRecombine(Ptr.getNode());
19085   AddToWorklist(Result.getNode());
19086 
19087   return true;
19088 }
19089 
19090 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
19091                                    SDValue &BasePtr, SDValue &Offset,
19092                                    ISD::MemIndexedMode &AM,
19093                                    SelectionDAG &DAG,
19094                                    const TargetLowering &TLI) {
19095   if (PtrUse == N ||
19096       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19097     return false;
19098 
19099   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
19100     return false;
19101 
19102   // Don't create a indexed load / store with zero offset.
19103   if (isNullConstant(Offset))
19104     return false;
19105 
19106   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19107     return false;
19108 
19109   SmallPtrSet<const SDNode *, 32> Visited;
19110   unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
19111   for (SDNode *User : BasePtr->users()) {
19112     if (User == Ptr.getNode())
19113       continue;
19114 
19115     // No if there's a later user which could perform the index instead.
19116     if (isa<MemSDNode>(User)) {
19117       bool IsLoad = true;
19118       bool IsMasked = false;
19119       SDValue OtherPtr;
19120       if (getCombineLoadStoreParts(User, ISD::POST_INC, ISD::POST_DEC, IsLoad,
19121                                    IsMasked, OtherPtr, TLI)) {
19122         SmallVector<const SDNode *, 2> Worklist;
19123         Worklist.push_back(User);
19124         if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
19125           return false;
19126       }
19127     }
19128 
19129     // If all the uses are load / store addresses, then don't do the
19130     // transformation.
19131     if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
19132       for (SDNode *UserUser : User->users())
19133         if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
19134           return false;
19135     }
19136   }
19137   return true;
19138 }
19139 
19140 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
19141                                          bool &IsMasked, SDValue &Ptr,
19142                                          SDValue &BasePtr, SDValue &Offset,
19143                                          ISD::MemIndexedMode &AM,
19144                                          SelectionDAG &DAG,
19145                                          const TargetLowering &TLI) {
19146   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
19147                                 IsMasked, Ptr, TLI) ||
19148       Ptr->hasOneUse())
19149     return nullptr;
19150 
19151   // Try turning it into a post-indexed load / store except when
19152   // 1) All uses are load / store ops that use it as base ptr (and
19153   //    it may be folded as addressing mmode).
19154   // 2) Op must be independent of N, i.e. Op is neither a predecessor
19155   //    nor a successor of N. Otherwise, if Op is folded that would
19156   //    create a cycle.
19157   unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
19158   for (SDNode *Op : Ptr->users()) {
19159     // Check for #1.
19160     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
19161       continue;
19162 
19163     // Check for #2.
19164     SmallPtrSet<const SDNode *, 32> Visited;
19165     SmallVector<const SDNode *, 8> Worklist;
19166     // Ptr is predecessor to both N and Op.
19167     Visited.insert(Ptr.getNode());
19168     Worklist.push_back(N);
19169     Worklist.push_back(Op);
19170     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
19171         !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
19172       return Op;
19173   }
19174   return nullptr;
19175 }
19176 
19177 /// Try to combine a load/store with a add/sub of the base pointer node into a
19178 /// post-indexed load/store. The transformation folded the add/subtract into the
19179 /// new indexed load/store effectively and all of its uses are redirected to the
19180 /// new load/store.
19181 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
19182   if (Level < AfterLegalizeDAG)
19183     return false;
19184 
19185   bool IsLoad = true;
19186   bool IsMasked = false;
19187   SDValue Ptr;
19188   SDValue BasePtr;
19189   SDValue Offset;
19190   ISD::MemIndexedMode AM = ISD::UNINDEXED;
19191   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
19192                                          Offset, AM, DAG, TLI);
19193   if (!Op)
19194     return false;
19195 
19196   SDValue Result;
19197   if (!IsMasked)
19198     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19199                                          Offset, AM)
19200                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
19201                                           BasePtr, Offset, AM);
19202   else
19203     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
19204                                                BasePtr, Offset, AM)
19205                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
19206                                                 BasePtr, Offset, AM);
19207   ++PostIndexedNodes;
19208   ++NodesCombined;
19209   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
19210              Result.dump(&DAG); dbgs() << '\n');
19211   WorklistRemover DeadNodes(*this);
19212   if (IsLoad) {
19213     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19214     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19215   } else {
19216     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19217   }
19218 
19219   // Finally, since the node is now dead, remove it from the graph.
19220   deleteAndRecombine(N);
19221 
19222   // Replace the uses of Use with uses of the updated base value.
19223   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
19224                                 Result.getValue(IsLoad ? 1 : 0));
19225   deleteAndRecombine(Op);
19226   return true;
19227 }
19228 
19229 /// Return the base-pointer arithmetic from an indexed \p LD.
19230 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
19231   ISD::MemIndexedMode AM = LD->getAddressingMode();
19232   assert(AM != ISD::UNINDEXED);
19233   SDValue BP = LD->getOperand(1);
19234   SDValue Inc = LD->getOperand(2);
19235 
19236   // Some backends use TargetConstants for load offsets, but don't expect
19237   // TargetConstants in general ADD nodes. We can convert these constants into
19238   // regular Constants (if the constant is not opaque).
19239   assert((Inc.getOpcode() != ISD::TargetConstant ||
19240           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
19241          "Cannot split out indexing using opaque target constants");
19242   if (Inc.getOpcode() == ISD::TargetConstant) {
19243     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
19244     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
19245                           ConstInc->getValueType(0));
19246   }
19247 
19248   unsigned Opc =
19249       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
19250   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
19251 }
19252 
19253 static inline ElementCount numVectorEltsOrZero(EVT T) {
19254   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
19255 }
19256 
19257 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
19258   EVT STType = Val.getValueType();
19259   EVT STMemType = ST->getMemoryVT();
19260   if (STType == STMemType)
19261     return true;
19262   if (isTypeLegal(STMemType))
19263     return false; // fail.
19264   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
19265       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
19266     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
19267     return true;
19268   }
19269   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
19270       STType.isInteger() && STMemType.isInteger()) {
19271     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
19272     return true;
19273   }
19274   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
19275     Val = DAG.getBitcast(STMemType, Val);
19276     return true;
19277   }
19278   return false; // fail.
19279 }
19280 
19281 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
19282   EVT LDMemType = LD->getMemoryVT();
19283   EVT LDType = LD->getValueType(0);
19284   assert(Val.getValueType() == LDMemType &&
19285          "Attempting to extend value of non-matching type");
19286   if (LDType == LDMemType)
19287     return true;
19288   if (LDMemType.isInteger() && LDType.isInteger()) {
19289     switch (LD->getExtensionType()) {
19290     case ISD::NON_EXTLOAD:
19291       Val = DAG.getBitcast(LDType, Val);
19292       return true;
19293     case ISD::EXTLOAD:
19294       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
19295       return true;
19296     case ISD::SEXTLOAD:
19297       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
19298       return true;
19299     case ISD::ZEXTLOAD:
19300       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
19301       return true;
19302     }
19303   }
19304   return false;
19305 }
19306 
19307 StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
19308                                                 int64_t &Offset) {
19309   SDValue Chain = LD->getOperand(0);
19310 
19311   // Look through CALLSEQ_START.
19312   if (Chain.getOpcode() == ISD::CALLSEQ_START)
19313     Chain = Chain->getOperand(0);
19314 
19315   StoreSDNode *ST = nullptr;
19316   SmallVector<SDValue, 8> Aliases;
19317   if (Chain.getOpcode() == ISD::TokenFactor) {
19318     // Look for unique store within the TokenFactor.
19319     for (SDValue Op : Chain->ops()) {
19320       StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
19321       if (!Store)
19322         continue;
19323       BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
19324       BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
19325       if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
19326         continue;
19327       // Make sure the store is not aliased with any nodes in TokenFactor.
19328       GatherAllAliases(Store, Chain, Aliases);
19329       if (Aliases.empty() ||
19330           (Aliases.size() == 1 && Aliases.front().getNode() == Store))
19331         ST = Store;
19332       break;
19333     }
19334   } else {
19335     StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
19336     if (Store) {
19337       BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
19338       BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
19339       if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
19340         ST = Store;
19341     }
19342   }
19343 
19344   return ST;
19345 }
19346 
19347 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
19348   if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
19349     return SDValue();
19350   SDValue Chain = LD->getOperand(0);
19351   int64_t Offset;
19352 
19353   StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
19354   // TODO: Relax this restriction for unordered atomics (see D66309)
19355   if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
19356     return SDValue();
19357 
19358   EVT LDType = LD->getValueType(0);
19359   EVT LDMemType = LD->getMemoryVT();
19360   EVT STMemType = ST->getMemoryVT();
19361   EVT STType = ST->getValue().getValueType();
19362 
19363   // There are two cases to consider here:
19364   //  1. The store is fixed width and the load is scalable. In this case we
19365   //     don't know at compile time if the store completely envelops the load
19366   //     so we abandon the optimisation.
19367   //  2. The store is scalable and the load is fixed width. We could
19368   //     potentially support a limited number of cases here, but there has been
19369   //     no cost-benefit analysis to prove it's worth it.
19370   bool LdStScalable = LDMemType.isScalableVT();
19371   if (LdStScalable != STMemType.isScalableVT())
19372     return SDValue();
19373 
19374   // If we are dealing with scalable vectors on a big endian platform the
19375   // calculation of offsets below becomes trickier, since we do not know at
19376   // compile time the absolute size of the vector. Until we've done more
19377   // analysis on big-endian platforms it seems better to bail out for now.
19378   if (LdStScalable && DAG.getDataLayout().isBigEndian())
19379     return SDValue();
19380 
19381   // Normalize for Endianness. After this Offset=0 will denote that the least
19382   // significant bit in the loaded value maps to the least significant bit in
19383   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
19384   // n:th least significant byte of the stored value.
19385   int64_t OrigOffset = Offset;
19386   if (DAG.getDataLayout().isBigEndian())
19387     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
19388               (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
19389                  8 -
19390              Offset;
19391 
19392   // Check that the stored value cover all bits that are loaded.
19393   bool STCoversLD;
19394 
19395   TypeSize LdMemSize = LDMemType.getSizeInBits();
19396   TypeSize StMemSize = STMemType.getSizeInBits();
19397   if (LdStScalable)
19398     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
19399   else
19400     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
19401                                    StMemSize.getFixedValue());
19402 
19403   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
19404     if (LD->isIndexed()) {
19405       // Cannot handle opaque target constants and we must respect the user's
19406       // request not to split indexes from loads.
19407       if (!canSplitIdx(LD))
19408         return SDValue();
19409       SDValue Idx = SplitIndexingFromLoad(LD);
19410       SDValue Ops[] = {Val, Idx, Chain};
19411       return CombineTo(LD, Ops, 3);
19412     }
19413     return CombineTo(LD, Val, Chain);
19414   };
19415 
19416   if (!STCoversLD)
19417     return SDValue();
19418 
19419   // Memory as copy space (potentially masked).
19420   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
19421     // Simple case: Direct non-truncating forwarding
19422     if (LDType.getSizeInBits() == LdMemSize)
19423       return ReplaceLd(LD, ST->getValue(), Chain);
19424     // Can we model the truncate and extension with an and mask?
19425     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
19426         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
19427       // Mask to size of LDMemType
19428       auto Mask =
19429           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
19430                                                StMemSize.getFixedValue()),
19431                           SDLoc(ST), STType);
19432       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
19433       return ReplaceLd(LD, Val, Chain);
19434     }
19435   }
19436 
19437   // Handle some cases for big-endian that would be Offset 0 and handled for
19438   // little-endian.
19439   SDValue Val = ST->getValue();
19440   if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
19441     if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
19442         !LDType.isVector() && isTypeLegal(STType) &&
19443         TLI.isOperationLegal(ISD::SRL, STType)) {
19444       Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
19445                         DAG.getConstant(Offset * 8, SDLoc(LD), STType));
19446       Offset = 0;
19447     }
19448   }
19449 
19450   // TODO: Deal with nonzero offset.
19451   if (LD->getBasePtr().isUndef() || Offset != 0)
19452     return SDValue();
19453   // Model necessary truncations / extenstions.
19454   // Truncate Value To Stored Memory Size.
19455   do {
19456     if (!getTruncatedStoreValue(ST, Val))
19457       break;
19458     if (!isTypeLegal(LDMemType))
19459       break;
19460     if (STMemType != LDMemType) {
19461       // TODO: Support vectors? This requires extract_subvector/bitcast.
19462       if (!STMemType.isVector() && !LDMemType.isVector() &&
19463           STMemType.isInteger() && LDMemType.isInteger())
19464         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
19465       else
19466         break;
19467     }
19468     if (!extendLoadedValueToExtension(LD, Val))
19469       break;
19470     return ReplaceLd(LD, Val, Chain);
19471   } while (false);
19472 
19473   // On failure, cleanup dead nodes we may have created.
19474   if (Val->use_empty())
19475     deleteAndRecombine(Val.getNode());
19476   return SDValue();
19477 }
19478 
19479 SDValue DAGCombiner::visitLOAD(SDNode *N) {
19480   LoadSDNode *LD  = cast<LoadSDNode>(N);
19481   SDValue Chain = LD->getChain();
19482   SDValue Ptr   = LD->getBasePtr();
19483 
19484   // If load is not volatile and there are no uses of the loaded value (and
19485   // the updated indexed value in case of indexed loads), change uses of the
19486   // chain value into uses of the chain input (i.e. delete the dead load).
19487   // TODO: Allow this for unordered atomics (see D66309)
19488   if (LD->isSimple()) {
19489     if (N->getValueType(1) == MVT::Other) {
19490       // Unindexed loads.
19491       if (!N->hasAnyUseOfValue(0)) {
19492         // It's not safe to use the two value CombineTo variant here. e.g.
19493         // v1, chain2 = load chain1, loc
19494         // v2, chain3 = load chain2, loc
19495         // v3         = add v2, c
19496         // Now we replace use of chain2 with chain1.  This makes the second load
19497         // isomorphic to the one we are deleting, and thus makes this load live.
19498         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
19499                    dbgs() << "\nWith chain: "; Chain.dump(&DAG);
19500                    dbgs() << "\n");
19501         WorklistRemover DeadNodes(*this);
19502         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19503         AddUsersToWorklist(Chain.getNode());
19504         if (N->use_empty())
19505           deleteAndRecombine(N);
19506 
19507         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
19508       }
19509     } else {
19510       // Indexed loads.
19511       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
19512 
19513       // If this load has an opaque TargetConstant offset, then we cannot split
19514       // the indexing into an add/sub directly (that TargetConstant may not be
19515       // valid for a different type of node, and we cannot convert an opaque
19516       // target constant into a regular constant).
19517       bool CanSplitIdx = canSplitIdx(LD);
19518 
19519       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
19520         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
19521         SDValue Index;
19522         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
19523           Index = SplitIndexingFromLoad(LD);
19524           // Try to fold the base pointer arithmetic into subsequent loads and
19525           // stores.
19526           AddUsersToWorklist(N);
19527         } else
19528           Index = DAG.getUNDEF(N->getValueType(1));
19529         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
19530                    dbgs() << "\nWith: "; Undef.dump(&DAG);
19531                    dbgs() << " and 2 other values\n");
19532         WorklistRemover DeadNodes(*this);
19533         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
19534         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
19535         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
19536         deleteAndRecombine(N);
19537         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
19538       }
19539     }
19540   }
19541 
19542   // If this load is directly stored, replace the load value with the stored
19543   // value.
19544   if (auto V = ForwardStoreValueToDirectLoad(LD))
19545     return V;
19546 
19547   // Try to infer better alignment information than the load already has.
19548   if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
19549       !LD->isAtomic()) {
19550     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
19551       if (*Alignment > LD->getAlign() &&
19552           isAligned(*Alignment, LD->getSrcValueOffset())) {
19553         SDValue NewLoad = DAG.getExtLoad(
19554             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
19555             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
19556             LD->getMemOperand()->getFlags(), LD->getAAInfo());
19557         // NewLoad will always be N as we are only refining the alignment
19558         assert(NewLoad.getNode() == N);
19559         (void)NewLoad;
19560       }
19561     }
19562   }
19563 
19564   if (LD->isUnindexed()) {
19565     // Walk up chain skipping non-aliasing memory nodes.
19566     SDValue BetterChain = FindBetterChain(LD, Chain);
19567 
19568     // If there is a better chain.
19569     if (Chain != BetterChain) {
19570       SDValue ReplLoad;
19571 
19572       // Replace the chain to void dependency.
19573       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
19574         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
19575                                BetterChain, Ptr, LD->getMemOperand());
19576       } else {
19577         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
19578                                   LD->getValueType(0),
19579                                   BetterChain, Ptr, LD->getMemoryVT(),
19580                                   LD->getMemOperand());
19581       }
19582 
19583       // Create token factor to keep old chain connected.
19584       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
19585                                   MVT::Other, Chain, ReplLoad.getValue(1));
19586 
19587       // Replace uses with load result and token factor
19588       return CombineTo(N, ReplLoad.getValue(0), Token);
19589     }
19590   }
19591 
19592   // Try transforming N to an indexed load.
19593   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
19594     return SDValue(N, 0);
19595 
19596   // Try to slice up N to more direct loads if the slices are mapped to
19597   // different register banks or pairing can take place.
19598   if (SliceUpLoad(N))
19599     return SDValue(N, 0);
19600 
19601   return SDValue();
19602 }
19603 
19604 namespace {
19605 
19606 /// Helper structure used to slice a load in smaller loads.
19607 /// Basically a slice is obtained from the following sequence:
19608 /// Origin = load Ty1, Base
19609 /// Shift = srl Ty1 Origin, CstTy Amount
19610 /// Inst = trunc Shift to Ty2
19611 ///
19612 /// Then, it will be rewritten into:
19613 /// Slice = load SliceTy, Base + SliceOffset
19614 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
19615 ///
19616 /// SliceTy is deduced from the number of bits that are actually used to
19617 /// build Inst.
19618 struct LoadedSlice {
19619   /// Helper structure used to compute the cost of a slice.
19620   struct Cost {
19621     /// Are we optimizing for code size.
19622     bool ForCodeSize = false;
19623 
19624     /// Various cost.
19625     unsigned Loads = 0;
19626     unsigned Truncates = 0;
19627     unsigned CrossRegisterBanksCopies = 0;
19628     unsigned ZExts = 0;
19629     unsigned Shift = 0;
19630 
19631     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
19632 
19633     /// Get the cost of one isolated slice.
19634     Cost(const LoadedSlice &LS, bool ForCodeSize)
19635         : ForCodeSize(ForCodeSize), Loads(1) {
19636       EVT TruncType = LS.Inst->getValueType(0);
19637       EVT LoadedType = LS.getLoadedType();
19638       if (TruncType != LoadedType &&
19639           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19640         ZExts = 1;
19641     }
19642 
19643     /// Account for slicing gain in the current cost.
19644     /// Slicing provide a few gains like removing a shift or a
19645     /// truncate. This method allows to grow the cost of the original
19646     /// load with the gain from this slice.
19647     void addSliceGain(const LoadedSlice &LS) {
19648       // Each slice saves a truncate.
19649       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
19650       if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
19651         ++Truncates;
19652       // If there is a shift amount, this slice gets rid of it.
19653       if (LS.Shift)
19654         ++Shift;
19655       // If this slice can merge a cross register bank copy, account for it.
19656       if (LS.canMergeExpensiveCrossRegisterBankCopy())
19657         ++CrossRegisterBanksCopies;
19658     }
19659 
19660     Cost &operator+=(const Cost &RHS) {
19661       Loads += RHS.Loads;
19662       Truncates += RHS.Truncates;
19663       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
19664       ZExts += RHS.ZExts;
19665       Shift += RHS.Shift;
19666       return *this;
19667     }
19668 
19669     bool operator==(const Cost &RHS) const {
19670       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
19671              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
19672              ZExts == RHS.ZExts && Shift == RHS.Shift;
19673     }
19674 
19675     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
19676 
19677     bool operator<(const Cost &RHS) const {
19678       // Assume cross register banks copies are as expensive as loads.
19679       // FIXME: Do we want some more target hooks?
19680       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19681       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
19682       // Unless we are optimizing for code size, consider the
19683       // expensive operation first.
19684       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19685         return ExpensiveOpsLHS < ExpensiveOpsRHS;
19686       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19687              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19688     }
19689 
19690     bool operator>(const Cost &RHS) const { return RHS < *this; }
19691 
19692     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19693 
19694     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19695   };
19696 
19697   // The last instruction that represent the slice. This should be a
19698   // truncate instruction.
19699   SDNode *Inst;
19700 
19701   // The original load instruction.
19702   LoadSDNode *Origin;
19703 
19704   // The right shift amount in bits from the original load.
19705   unsigned Shift;
19706 
19707   // The DAG from which Origin came from.
19708   // This is used to get some contextual information about legal types, etc.
19709   SelectionDAG *DAG;
19710 
19711   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19712               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19713       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19714 
19715   /// Get the bits used in a chunk of bits \p BitWidth large.
19716   /// \return Result is \p BitWidth and has used bits set to 1 and
19717   ///         not used bits set to 0.
19718   APInt getUsedBits() const {
19719     // Reproduce the trunc(lshr) sequence:
19720     // - Start from the truncated value.
19721     // - Zero extend to the desired bit width.
19722     // - Shift left.
19723     assert(Origin && "No original load to compare against.");
19724     unsigned BitWidth = Origin->getValueSizeInBits(0);
19725     assert(Inst && "This slice is not bound to an instruction");
19726     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19727            "Extracted slice is bigger than the whole type!");
19728     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19729     UsedBits.setAllBits();
19730     UsedBits = UsedBits.zext(BitWidth);
19731     UsedBits <<= Shift;
19732     return UsedBits;
19733   }
19734 
19735   /// Get the size of the slice to be loaded in bytes.
19736   unsigned getLoadedSize() const {
19737     unsigned SliceSize = getUsedBits().popcount();
19738     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19739     return SliceSize / 8;
19740   }
19741 
19742   /// Get the type that will be loaded for this slice.
19743   /// Note: This may not be the final type for the slice.
19744   EVT getLoadedType() const {
19745     assert(DAG && "Missing context");
19746     LLVMContext &Ctxt = *DAG->getContext();
19747     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19748   }
19749 
19750   /// Get the alignment of the load used for this slice.
19751   Align getAlign() const {
19752     Align Alignment = Origin->getAlign();
19753     uint64_t Offset = getOffsetFromBase();
19754     if (Offset != 0)
19755       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19756     return Alignment;
19757   }
19758 
19759   /// Check if this slice can be rewritten with legal operations.
19760   bool isLegal() const {
19761     // An invalid slice is not legal.
19762     if (!Origin || !Inst || !DAG)
19763       return false;
19764 
19765     // Offsets are for indexed load only, we do not handle that.
19766     if (!Origin->getOffset().isUndef())
19767       return false;
19768 
19769     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19770 
19771     // Check that the type is legal.
19772     EVT SliceType = getLoadedType();
19773     if (!TLI.isTypeLegal(SliceType))
19774       return false;
19775 
19776     // Check that the load is legal for this type.
19777     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19778       return false;
19779 
19780     // Check that the offset can be computed.
19781     // 1. Check its type.
19782     EVT PtrType = Origin->getBasePtr().getValueType();
19783     if (PtrType == MVT::Untyped || PtrType.isExtended())
19784       return false;
19785 
19786     // 2. Check that it fits in the immediate.
19787     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19788       return false;
19789 
19790     // 3. Check that the computation is legal.
19791     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19792       return false;
19793 
19794     // Check that the zext is legal if it needs one.
19795     EVT TruncateType = Inst->getValueType(0);
19796     if (TruncateType != SliceType &&
19797         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19798       return false;
19799 
19800     return true;
19801   }
19802 
19803   /// Get the offset in bytes of this slice in the original chunk of
19804   /// bits.
19805   /// \pre DAG != nullptr.
19806   uint64_t getOffsetFromBase() const {
19807     assert(DAG && "Missing context.");
19808     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19809     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19810     uint64_t Offset = Shift / 8;
19811     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19812     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19813            "The size of the original loaded type is not a multiple of a"
19814            " byte.");
19815     // If Offset is bigger than TySizeInBytes, it means we are loading all
19816     // zeros. This should have been optimized before in the process.
19817     assert(TySizeInBytes > Offset &&
19818            "Invalid shift amount for given loaded size");
19819     if (IsBigEndian)
19820       Offset = TySizeInBytes - Offset - getLoadedSize();
19821     return Offset;
19822   }
19823 
19824   /// Generate the sequence of instructions to load the slice
19825   /// represented by this object and redirect the uses of this slice to
19826   /// this new sequence of instructions.
19827   /// \pre this->Inst && this->Origin are valid Instructions and this
19828   /// object passed the legal check: LoadedSlice::isLegal returned true.
19829   /// \return The last instruction of the sequence used to load the slice.
19830   SDValue loadSlice() const {
19831     assert(Inst && Origin && "Unable to replace a non-existing slice.");
19832     const SDValue &OldBaseAddr = Origin->getBasePtr();
19833     SDValue BaseAddr = OldBaseAddr;
19834     // Get the offset in that chunk of bytes w.r.t. the endianness.
19835     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19836     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19837     if (Offset) {
19838       // BaseAddr = BaseAddr + Offset.
19839       EVT ArithType = BaseAddr.getValueType();
19840       SDLoc DL(Origin);
19841       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19842                               DAG->getConstant(Offset, DL, ArithType));
19843     }
19844 
19845     // Create the type of the loaded slice according to its size.
19846     EVT SliceType = getLoadedType();
19847 
19848     // Create the load for the slice.
19849     SDValue LastInst =
19850         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19851                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
19852                      Origin->getMemOperand()->getFlags());
19853     // If the final type is not the same as the loaded type, this means that
19854     // we have to pad with zero. Create a zero extend for that.
19855     EVT FinalType = Inst->getValueType(0);
19856     if (SliceType != FinalType)
19857       LastInst =
19858           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19859     return LastInst;
19860   }
19861 
19862   /// Check if this slice can be merged with an expensive cross register
19863   /// bank copy. E.g.,
19864   /// i = load i32
19865   /// f = bitcast i32 i to float
19866   bool canMergeExpensiveCrossRegisterBankCopy() const {
19867     if (!Inst || !Inst->hasOneUse())
19868       return false;
19869     SDNode *User = *Inst->user_begin();
19870     if (User->getOpcode() != ISD::BITCAST)
19871       return false;
19872     assert(DAG && "Missing context");
19873     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19874     EVT ResVT = User->getValueType(0);
19875     const TargetRegisterClass *ResRC =
19876         TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
19877     const TargetRegisterClass *ArgRC =
19878         TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
19879                            User->getOperand(0)->isDivergent());
19880     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19881       return false;
19882 
19883     // At this point, we know that we perform a cross-register-bank copy.
19884     // Check if it is expensive.
19885     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
19886     // Assume bitcasts are cheap, unless both register classes do not
19887     // explicitly share a common sub class.
19888     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19889       return false;
19890 
19891     // Check if it will be merged with the load.
19892     // 1. Check the alignment / fast memory access constraint.
19893     unsigned IsFast = 0;
19894     if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19895                                 Origin->getAddressSpace(), getAlign(),
19896                                 Origin->getMemOperand()->getFlags(), &IsFast) ||
19897         !IsFast)
19898       return false;
19899 
19900     // 2. Check that the load is a legal operation for that type.
19901     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19902       return false;
19903 
19904     // 3. Check that we do not have a zext in the way.
19905     if (Inst->getValueType(0) != getLoadedType())
19906       return false;
19907 
19908     return true;
19909   }
19910 };
19911 
19912 } // end anonymous namespace
19913 
19914 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
19915 /// \p UsedBits looks like 0..0 1..1 0..0.
19916 static bool areUsedBitsDense(const APInt &UsedBits) {
19917   // If all the bits are one, this is dense!
19918   if (UsedBits.isAllOnes())
19919     return true;
19920 
19921   // Get rid of the unused bits on the right.
19922   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19923   // Get rid of the unused bits on the left.
19924   if (NarrowedUsedBits.countl_zero())
19925     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19926   // Check that the chunk of bits is completely used.
19927   return NarrowedUsedBits.isAllOnes();
19928 }
19929 
19930 /// Check whether or not \p First and \p Second are next to each other
19931 /// in memory. This means that there is no hole between the bits loaded
19932 /// by \p First and the bits loaded by \p Second.
19933 static bool areSlicesNextToEachOther(const LoadedSlice &First,
19934                                      const LoadedSlice &Second) {
19935   assert(First.Origin == Second.Origin && First.Origin &&
19936          "Unable to match different memory origins.");
19937   APInt UsedBits = First.getUsedBits();
19938   assert((UsedBits & Second.getUsedBits()) == 0 &&
19939          "Slices are not supposed to overlap.");
19940   UsedBits |= Second.getUsedBits();
19941   return areUsedBitsDense(UsedBits);
19942 }
19943 
19944 /// Adjust the \p GlobalLSCost according to the target
19945 /// paring capabilities and the layout of the slices.
19946 /// \pre \p GlobalLSCost should account for at least as many loads as
19947 /// there is in the slices in \p LoadedSlices.
19948 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
19949                                  LoadedSlice::Cost &GlobalLSCost) {
19950   unsigned NumberOfSlices = LoadedSlices.size();
19951   // If there is less than 2 elements, no pairing is possible.
19952   if (NumberOfSlices < 2)
19953     return;
19954 
19955   // Sort the slices so that elements that are likely to be next to each
19956   // other in memory are next to each other in the list.
19957   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19958     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19959     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19960   });
19961   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19962   // First (resp. Second) is the first (resp. Second) potentially candidate
19963   // to be placed in a paired load.
19964   const LoadedSlice *First = nullptr;
19965   const LoadedSlice *Second = nullptr;
19966   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19967                 // Set the beginning of the pair.
19968                                                            First = Second) {
19969     Second = &LoadedSlices[CurrSlice];
19970 
19971     // If First is NULL, it means we start a new pair.
19972     // Get to the next slice.
19973     if (!First)
19974       continue;
19975 
19976     EVT LoadedType = First->getLoadedType();
19977 
19978     // If the types of the slices are different, we cannot pair them.
19979     if (LoadedType != Second->getLoadedType())
19980       continue;
19981 
19982     // Check if the target supplies paired loads for this type.
19983     Align RequiredAlignment;
19984     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19985       // move to the next pair, this type is hopeless.
19986       Second = nullptr;
19987       continue;
19988     }
19989     // Check if we meet the alignment requirement.
19990     if (First->getAlign() < RequiredAlignment)
19991       continue;
19992 
19993     // Check that both loads are next to each other in memory.
19994     if (!areSlicesNextToEachOther(*First, *Second))
19995       continue;
19996 
19997     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19998     --GlobalLSCost.Loads;
19999     // Move to the next pair.
20000     Second = nullptr;
20001   }
20002 }
20003 
20004 /// Check the profitability of all involved LoadedSlice.
20005 /// Currently, it is considered profitable if there is exactly two
20006 /// involved slices (1) which are (2) next to each other in memory, and
20007 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
20008 ///
20009 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
20010 /// the elements themselves.
20011 ///
20012 /// FIXME: When the cost model will be mature enough, we can relax
20013 /// constraints (1) and (2).
20014 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
20015                                 const APInt &UsedBits, bool ForCodeSize) {
20016   unsigned NumberOfSlices = LoadedSlices.size();
20017   if (StressLoadSlicing)
20018     return NumberOfSlices > 1;
20019 
20020   // Check (1).
20021   if (NumberOfSlices != 2)
20022     return false;
20023 
20024   // Check (2).
20025   if (!areUsedBitsDense(UsedBits))
20026     return false;
20027 
20028   // Check (3).
20029   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
20030   // The original code has one big load.
20031   OrigCost.Loads = 1;
20032   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
20033     const LoadedSlice &LS = LoadedSlices[CurrSlice];
20034     // Accumulate the cost of all the slices.
20035     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
20036     GlobalSlicingCost += SliceCost;
20037 
20038     // Account as cost in the original configuration the gain obtained
20039     // with the current slices.
20040     OrigCost.addSliceGain(LS);
20041   }
20042 
20043   // If the target supports paired load, adjust the cost accordingly.
20044   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
20045   return OrigCost > GlobalSlicingCost;
20046 }
20047 
20048 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
20049 /// operations, split it in the various pieces being extracted.
20050 ///
20051 /// This sort of thing is introduced by SROA.
20052 /// This slicing takes care not to insert overlapping loads.
20053 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
20054 bool DAGCombiner::SliceUpLoad(SDNode *N) {
20055   if (Level < AfterLegalizeDAG)
20056     return false;
20057 
20058   LoadSDNode *LD = cast<LoadSDNode>(N);
20059   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20060       !LD->getValueType(0).isInteger())
20061     return false;
20062 
20063   // The algorithm to split up a load of a scalable vector into individual
20064   // elements currently requires knowing the length of the loaded type,
20065   // so will need adjusting to work on scalable vectors.
20066   if (LD->getValueType(0).isScalableVector())
20067     return false;
20068 
20069   // Keep track of already used bits to detect overlapping values.
20070   // In that case, we will just abort the transformation.
20071   APInt UsedBits(LD->getValueSizeInBits(0), 0);
20072 
20073   SmallVector<LoadedSlice, 4> LoadedSlices;
20074 
20075   // Check if this load is used as several smaller chunks of bits.
20076   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
20077   // of computation for each trunc.
20078   for (SDUse &U : LD->uses()) {
20079     // Skip the uses of the chain.
20080     if (U.getResNo() != 0)
20081       continue;
20082 
20083     SDNode *User = U.getUser();
20084     unsigned Shift = 0;
20085 
20086     // Check if this is a trunc(lshr).
20087     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20088         isa<ConstantSDNode>(User->getOperand(1))) {
20089       Shift = User->getConstantOperandVal(1);
20090       User = *User->user_begin();
20091     }
20092 
20093     // At this point, User is a Truncate, iff we encountered, trunc or
20094     // trunc(lshr).
20095     if (User->getOpcode() != ISD::TRUNCATE)
20096       return false;
20097 
20098     // The width of the type must be a power of 2 and greater than 8-bits.
20099     // Otherwise the load cannot be represented in LLVM IR.
20100     // Moreover, if we shifted with a non-8-bits multiple, the slice
20101     // will be across several bytes. We do not support that.
20102     unsigned Width = User->getValueSizeInBits(0);
20103     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
20104       return false;
20105 
20106     // Build the slice for this chain of computations.
20107     LoadedSlice LS(User, LD, Shift, &DAG);
20108     APInt CurrentUsedBits = LS.getUsedBits();
20109 
20110     // Check if this slice overlaps with another.
20111     if ((CurrentUsedBits & UsedBits) != 0)
20112       return false;
20113     // Update the bits used globally.
20114     UsedBits |= CurrentUsedBits;
20115 
20116     // Check if the new slice would be legal.
20117     if (!LS.isLegal())
20118       return false;
20119 
20120     // Record the slice.
20121     LoadedSlices.push_back(LS);
20122   }
20123 
20124   // Abort slicing if it does not seem to be profitable.
20125   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
20126     return false;
20127 
20128   ++SlicedLoads;
20129 
20130   // Rewrite each chain to use an independent load.
20131   // By construction, each chain can be represented by a unique load.
20132 
20133   // Prepare the argument for the new token factor for all the slices.
20134   SmallVector<SDValue, 8> ArgChains;
20135   for (const LoadedSlice &LS : LoadedSlices) {
20136     SDValue SliceInst = LS.loadSlice();
20137     CombineTo(LS.Inst, SliceInst, true);
20138     if (SliceInst.getOpcode() != ISD::LOAD)
20139       SliceInst = SliceInst.getOperand(0);
20140     assert(SliceInst->getOpcode() == ISD::LOAD &&
20141            "It takes more than a zext to get to the loaded slice!!");
20142     ArgChains.push_back(SliceInst.getValue(1));
20143   }
20144 
20145   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
20146                               ArgChains);
20147   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20148   AddToWorklist(Chain.getNode());
20149   return true;
20150 }
20151 
20152 /// Check to see if V is (and load (ptr), imm), where the load is having
20153 /// specific bytes cleared out.  If so, return the byte size being masked out
20154 /// and the shift amount.
20155 static std::pair<unsigned, unsigned>
20156 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
20157   std::pair<unsigned, unsigned> Result(0, 0);
20158 
20159   // Check for the structure we're looking for.
20160   if (V->getOpcode() != ISD::AND ||
20161       !isa<ConstantSDNode>(V->getOperand(1)) ||
20162       !ISD::isNormalLoad(V->getOperand(0).getNode()))
20163     return Result;
20164 
20165   // Check the chain and pointer.
20166   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20167   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
20168 
20169   // This only handles simple types.
20170   if (V.getValueType() != MVT::i16 &&
20171       V.getValueType() != MVT::i32 &&
20172       V.getValueType() != MVT::i64)
20173     return Result;
20174 
20175   // Check the constant mask.  Invert it so that the bits being masked out are
20176   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
20177   // follow the sign bit for uniformity.
20178   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
20179   unsigned NotMaskLZ = llvm::countl_zero(NotMask);
20180   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
20181   unsigned NotMaskTZ = llvm::countr_zero(NotMask);
20182   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
20183   if (NotMaskLZ == 64) return Result;  // All zero mask.
20184 
20185   // See if we have a continuous run of bits.  If so, we have 0*1+0*
20186   if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
20187     return Result;
20188 
20189   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
20190   if (V.getValueType() != MVT::i64 && NotMaskLZ)
20191     NotMaskLZ -= 64-V.getValueSizeInBits();
20192 
20193   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
20194   switch (MaskedBytes) {
20195   case 1:
20196   case 2:
20197   case 4: break;
20198   default: return Result; // All one mask, or 5-byte mask.
20199   }
20200 
20201   // Verify that the first bit starts at a multiple of mask so that the access
20202   // is aligned the same as the access width.
20203   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
20204 
20205   // For narrowing to be valid, it must be the case that the load the
20206   // immediately preceding memory operation before the store.
20207   if (LD == Chain.getNode())
20208     ; // ok.
20209   else if (Chain->getOpcode() == ISD::TokenFactor &&
20210            SDValue(LD, 1).hasOneUse()) {
20211     // LD has only 1 chain use so they are no indirect dependencies.
20212     if (!LD->isOperandOf(Chain.getNode()))
20213       return Result;
20214   } else
20215     return Result; // Fail.
20216 
20217   Result.first = MaskedBytes;
20218   Result.second = NotMaskTZ/8;
20219   return Result;
20220 }
20221 
20222 /// Check to see if IVal is something that provides a value as specified by
20223 /// MaskInfo. If so, replace the specified store with a narrower store of
20224 /// truncated IVal.
20225 static SDValue
20226 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
20227                                 SDValue IVal, StoreSDNode *St,
20228                                 DAGCombiner *DC) {
20229   unsigned NumBytes = MaskInfo.first;
20230   unsigned ByteShift = MaskInfo.second;
20231   SelectionDAG &DAG = DC->getDAG();
20232 
20233   // Check to see if IVal is all zeros in the part being masked in by the 'or'
20234   // that uses this.  If not, this is not a replacement.
20235   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
20236                                   ByteShift*8, (ByteShift+NumBytes)*8);
20237   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
20238 
20239   // Check that it is legal on the target to do this.  It is legal if the new
20240   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
20241   // legalization. If the source type is legal, but the store type isn't, see
20242   // if we can use a truncating store.
20243   MVT VT = MVT::getIntegerVT(NumBytes * 8);
20244   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20245   bool UseTruncStore;
20246   if (DC->isTypeLegal(VT))
20247     UseTruncStore = false;
20248   else if (TLI.isTypeLegal(IVal.getValueType()) &&
20249            TLI.isTruncStoreLegal(IVal.getValueType(), VT))
20250     UseTruncStore = true;
20251   else
20252     return SDValue();
20253 
20254   // Can't do this for indexed stores.
20255   if (St->isIndexed())
20256     return SDValue();
20257 
20258   // Check that the target doesn't think this is a bad idea.
20259   if (St->getMemOperand() &&
20260       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
20261                               *St->getMemOperand()))
20262     return SDValue();
20263 
20264   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
20265   // shifted by ByteShift and truncated down to NumBytes.
20266   if (ByteShift) {
20267     SDLoc DL(IVal);
20268     IVal = DAG.getNode(
20269         ISD::SRL, DL, IVal.getValueType(), IVal,
20270         DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
20271   }
20272 
20273   // Figure out the offset for the store and the alignment of the access.
20274   unsigned StOffset;
20275   if (DAG.getDataLayout().isLittleEndian())
20276     StOffset = ByteShift;
20277   else
20278     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
20279 
20280   SDValue Ptr = St->getBasePtr();
20281   if (StOffset) {
20282     SDLoc DL(IVal);
20283     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(StOffset), DL);
20284   }
20285 
20286   ++OpsNarrowed;
20287   if (UseTruncStore)
20288     return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
20289                              St->getPointerInfo().getWithOffset(StOffset),
20290                              VT, St->getOriginalAlign());
20291 
20292   // Truncate down to the new size.
20293   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
20294 
20295   return DAG
20296       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
20297                 St->getPointerInfo().getWithOffset(StOffset),
20298                 St->getOriginalAlign());
20299 }
20300 
20301 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
20302 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
20303 /// narrowing the load and store if it would end up being a win for performance
20304 /// or code size.
20305 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
20306   StoreSDNode *ST  = cast<StoreSDNode>(N);
20307   if (!ST->isSimple())
20308     return SDValue();
20309 
20310   SDValue Chain = ST->getChain();
20311   SDValue Value = ST->getValue();
20312   SDValue Ptr   = ST->getBasePtr();
20313   EVT VT = Value.getValueType();
20314 
20315   if (ST->isTruncatingStore() || VT.isVector())
20316     return SDValue();
20317 
20318   unsigned Opc = Value.getOpcode();
20319 
20320   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
20321       !Value.hasOneUse())
20322     return SDValue();
20323 
20324   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
20325   // is a byte mask indicating a consecutive number of bytes, check to see if
20326   // Y is known to provide just those bytes.  If so, we try to replace the
20327   // load + replace + store sequence with a single (narrower) store, which makes
20328   // the load dead.
20329   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
20330     std::pair<unsigned, unsigned> MaskedLoad;
20331     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
20332     if (MaskedLoad.first)
20333       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
20334                                                   Value.getOperand(1), ST,this))
20335         return NewST;
20336 
20337     // Or is commutative, so try swapping X and Y.
20338     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
20339     if (MaskedLoad.first)
20340       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
20341                                                   Value.getOperand(0), ST,this))
20342         return NewST;
20343   }
20344 
20345   if (!EnableReduceLoadOpStoreWidth)
20346     return SDValue();
20347 
20348   if (Value.getOperand(1).getOpcode() != ISD::Constant)
20349     return SDValue();
20350 
20351   SDValue N0 = Value.getOperand(0);
20352   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
20353       Chain == SDValue(N0.getNode(), 1)) {
20354     LoadSDNode *LD = cast<LoadSDNode>(N0);
20355     if (LD->getBasePtr() != Ptr ||
20356         LD->getPointerInfo().getAddrSpace() !=
20357         ST->getPointerInfo().getAddrSpace())
20358       return SDValue();
20359 
20360     // Find the type NewVT to narrow the load / op / store to.
20361     SDValue N1 = Value.getOperand(1);
20362     unsigned BitWidth = N1.getValueSizeInBits();
20363     APInt Imm = N1->getAsAPIntVal();
20364     if (Opc == ISD::AND)
20365       Imm.flipAllBits();
20366     if (Imm == 0 || Imm.isAllOnes())
20367       return SDValue();
20368     // Find least/most significant bit that need to be part of the narrowed
20369     // operation. We assume target will need to address/access full bytes, so
20370     // we make sure to align LSB and MSB at byte boundaries.
20371     unsigned BitsPerByteMask = 7u;
20372     unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
20373     unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
20374     unsigned NewBW = NextPowerOf2(MSB - LSB);
20375     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
20376     // The narrowing should be profitable, the load/store operation should be
20377     // legal (or custom) and the store size should be equal to the NewVT width.
20378     while (NewBW < BitWidth &&
20379            (NewVT.getStoreSizeInBits() != NewBW ||
20380             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
20381             (!ReduceLoadOpStoreWidthForceNarrowingProfitable &&
20382              !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
20383       NewBW = NextPowerOf2(NewBW);
20384       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
20385     }
20386     if (NewBW >= BitWidth)
20387       return SDValue();
20388 
20389     // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
20390     // large enough to cover all bits that should be modified. This type might
20391     // however be larger than really needed (such as i32 while we actually only
20392     // need to modify one byte). Now we need to find our how to align the memory
20393     // accesses to satisfy preferred alignments as well as avoiding to access
20394     // memory outside the store size of the orignal access.
20395 
20396     unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
20397 
20398     // Let ShAmt denote amount of bits to skip, counted from the least
20399     // significant bits of Imm. And let PtrOff how much the pointer needs to be
20400     // offsetted (in bytes) for the new access.
20401     unsigned ShAmt = 0;
20402     uint64_t PtrOff = 0;
20403     for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
20404       // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
20405       if (ShAmt > LSB)
20406         return SDValue();
20407       if (ShAmt + NewBW < MSB)
20408         continue;
20409 
20410       // Calculate PtrOff.
20411       unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
20412                                          ? VTStoreSize - NewBW - ShAmt
20413                                          : ShAmt;
20414       PtrOff = PtrAdjustmentInBits / 8;
20415 
20416       // Now check if narrow access is allowed and fast, considering alignments.
20417       unsigned IsFast = 0;
20418       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20419       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
20420                                  LD->getAddressSpace(), NewAlign,
20421                                  LD->getMemOperand()->getFlags(), &IsFast) &&
20422           IsFast)
20423         break;
20424     }
20425     // If loop above did not find any accepted ShAmt we need to exit here.
20426     if (ShAmt + NewBW > VTStoreSize)
20427       return SDValue();
20428 
20429     APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
20430     if (Opc == ISD::AND)
20431       NewImm.flipAllBits();
20432     Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20433     SDValue NewPtr =
20434         DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
20435     SDValue NewLD =
20436         DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
20437                     LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
20438                     LD->getMemOperand()->getFlags(), LD->getAAInfo());
20439     SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
20440                                  DAG.getConstant(NewImm, SDLoc(Value), NewVT));
20441     SDValue NewST =
20442         DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
20443                      ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
20444 
20445     AddToWorklist(NewPtr.getNode());
20446     AddToWorklist(NewLD.getNode());
20447     AddToWorklist(NewVal.getNode());
20448     WorklistRemover DeadNodes(*this);
20449     DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
20450     ++OpsNarrowed;
20451     return NewST;
20452   }
20453 
20454   return SDValue();
20455 }
20456 
20457 /// For a given floating point load / store pair, if the load value isn't used
20458 /// by any other operations, then consider transforming the pair to integer
20459 /// load / store operations if the target deems the transformation profitable.
20460 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
20461   StoreSDNode *ST  = cast<StoreSDNode>(N);
20462   SDValue Value = ST->getValue();
20463   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
20464       Value.hasOneUse()) {
20465     LoadSDNode *LD = cast<LoadSDNode>(Value);
20466     EVT VT = LD->getMemoryVT();
20467     if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
20468         LD->isNonTemporal() || ST->isNonTemporal() ||
20469         LD->getPointerInfo().getAddrSpace() != 0 ||
20470         ST->getPointerInfo().getAddrSpace() != 0)
20471       return SDValue();
20472 
20473     TypeSize VTSize = VT.getSizeInBits();
20474 
20475     // We don't know the size of scalable types at compile time so we cannot
20476     // create an integer of the equivalent size.
20477     if (VTSize.isScalable())
20478       return SDValue();
20479 
20480     unsigned FastLD = 0, FastST = 0;
20481     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
20482     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
20483         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
20484         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
20485         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
20486         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20487                                 *LD->getMemOperand(), &FastLD) ||
20488         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20489                                 *ST->getMemOperand(), &FastST) ||
20490         !FastLD || !FastST)
20491       return SDValue();
20492 
20493     SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
20494                                 LD->getBasePtr(), LD->getMemOperand());
20495 
20496     SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
20497                                  ST->getBasePtr(), ST->getMemOperand());
20498 
20499     AddToWorklist(NewLD.getNode());
20500     AddToWorklist(NewST.getNode());
20501     WorklistRemover DeadNodes(*this);
20502     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
20503     ++LdStFP2Int;
20504     return NewST;
20505   }
20506 
20507   return SDValue();
20508 }
20509 
20510 // This is a helper function for visitMUL to check the profitability
20511 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
20512 // MulNode is the original multiply, AddNode is (add x, c1),
20513 // and ConstNode is c2.
20514 //
20515 // If the (add x, c1) has multiple uses, we could increase
20516 // the number of adds if we make this transformation.
20517 // It would only be worth doing this if we can remove a
20518 // multiply in the process. Check for that here.
20519 // To illustrate:
20520 //     (A + c1) * c3
20521 //     (A + c2) * c3
20522 // We're checking for cases where we have common "c3 * A" expressions.
20523 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
20524                                               SDValue ConstNode) {
20525   APInt Val;
20526 
20527   // If the add only has one use, and the target thinks the folding is
20528   // profitable or does not lead to worse code, this would be OK to do.
20529   if (AddNode->hasOneUse() &&
20530       TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
20531     return true;
20532 
20533   // Walk all the users of the constant with which we're multiplying.
20534   for (SDNode *User : ConstNode->users()) {
20535     if (User == MulNode) // This use is the one we're on right now. Skip it.
20536       continue;
20537 
20538     if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
20539       SDNode *OtherOp;
20540       SDNode *MulVar = AddNode.getOperand(0).getNode();
20541 
20542       // OtherOp is what we're multiplying against the constant.
20543       if (User->getOperand(0) == ConstNode)
20544         OtherOp = User->getOperand(1).getNode();
20545       else
20546         OtherOp = User->getOperand(0).getNode();
20547 
20548       // Check to see if multiply is with the same operand of our "add".
20549       //
20550       //     ConstNode  = CONST
20551       //     User = ConstNode * A  <-- visiting User. OtherOp is A.
20552       //     ...
20553       //     AddNode  = (A + c1)  <-- MulVar is A.
20554       //         = AddNode * ConstNode   <-- current visiting instruction.
20555       //
20556       // If we make this transformation, we will have a common
20557       // multiply (ConstNode * A) that we can save.
20558       if (OtherOp == MulVar)
20559         return true;
20560 
20561       // Now check to see if a future expansion will give us a common
20562       // multiply.
20563       //
20564       //     ConstNode  = CONST
20565       //     AddNode    = (A + c1)
20566       //     ...   = AddNode * ConstNode <-- current visiting instruction.
20567       //     ...
20568       //     OtherOp = (A + c2)
20569       //     User    = OtherOp * ConstNode <-- visiting User.
20570       //
20571       // If we make this transformation, we will have a common
20572       // multiply (CONST * A) after we also do the same transformation
20573       // to the "t2" instruction.
20574       if (OtherOp->getOpcode() == ISD::ADD &&
20575           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
20576           OtherOp->getOperand(0).getNode() == MulVar)
20577         return true;
20578     }
20579   }
20580 
20581   // Didn't find a case where this would be profitable.
20582   return false;
20583 }
20584 
20585 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
20586                                          unsigned NumStores) {
20587   SmallVector<SDValue, 8> Chains;
20588   SmallPtrSet<const SDNode *, 8> Visited;
20589   SDLoc StoreDL(StoreNodes[0].MemNode);
20590 
20591   for (unsigned i = 0; i < NumStores; ++i) {
20592     Visited.insert(StoreNodes[i].MemNode);
20593   }
20594 
20595   // don't include nodes that are children or repeated nodes.
20596   for (unsigned i = 0; i < NumStores; ++i) {
20597     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20598       Chains.push_back(StoreNodes[i].MemNode->getChain());
20599   }
20600 
20601   assert(!Chains.empty() && "Chain should have generated a chain");
20602   return DAG.getTokenFactor(StoreDL, Chains);
20603 }
20604 
20605 bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
20606   const Value *UnderlyingObj = nullptr;
20607   for (const auto &MemOp : StoreNodes) {
20608     const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
20609     // Pseudo value like stack frame has its own frame index and size, should
20610     // not use the first store's frame index for other frames.
20611     if (MMO->getPseudoValue())
20612       return false;
20613 
20614     if (!MMO->getValue())
20615       return false;
20616 
20617     const Value *Obj = getUnderlyingObject(MMO->getValue());
20618 
20619     if (UnderlyingObj && UnderlyingObj != Obj)
20620       return false;
20621 
20622     if (!UnderlyingObj)
20623       UnderlyingObj = Obj;
20624   }
20625 
20626   return true;
20627 }
20628 
20629 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20630     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
20631     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
20632   // Make sure we have something to merge.
20633   if (NumStores < 2)
20634     return false;
20635 
20636   assert((!UseTrunc || !UseVector) &&
20637          "This optimization cannot emit a vector truncating store");
20638 
20639   // The latest Node in the DAG.
20640   SDLoc DL(StoreNodes[0].MemNode);
20641 
20642   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
20643   unsigned SizeInBits = NumStores * ElementSizeBits;
20644   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20645 
20646   std::optional<MachineMemOperand::Flags> Flags;
20647   AAMDNodes AAInfo;
20648   for (unsigned I = 0; I != NumStores; ++I) {
20649     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20650     if (!Flags) {
20651       Flags = St->getMemOperand()->getFlags();
20652       AAInfo = St->getAAInfo();
20653       continue;
20654     }
20655     // Skip merging if there's an inconsistent flag.
20656     if (Flags != St->getMemOperand()->getFlags())
20657       return false;
20658     // Concatenate AA metadata.
20659     AAInfo = AAInfo.concat(St->getAAInfo());
20660   }
20661 
20662   EVT StoreTy;
20663   if (UseVector) {
20664     unsigned Elts = NumStores * NumMemElts;
20665     // Get the type for the merged vector store.
20666     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20667   } else
20668     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
20669 
20670   SDValue StoredVal;
20671   if (UseVector) {
20672     if (IsConstantSrc) {
20673       SmallVector<SDValue, 8> BuildVector;
20674       for (unsigned I = 0; I != NumStores; ++I) {
20675         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20676         SDValue Val = St->getValue();
20677         // If constant is of the wrong type, convert it now.  This comes up
20678         // when one of our stores was truncating.
20679         if (MemVT != Val.getValueType()) {
20680           Val = peekThroughBitcasts(Val);
20681           // Deal with constants of wrong size.
20682           if (ElementSizeBits != Val.getValueSizeInBits()) {
20683             auto *C = dyn_cast<ConstantSDNode>(Val);
20684             if (!C)
20685               // Not clear how to truncate FP values.
20686               // TODO: Handle truncation of build_vector constants
20687               return false;
20688 
20689             EVT IntMemVT =
20690                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
20691             Val = DAG.getConstant(C->getAPIntValue()
20692                                       .zextOrTrunc(Val.getValueSizeInBits())
20693                                       .zextOrTrunc(ElementSizeBits),
20694                                   SDLoc(C), IntMemVT);
20695           }
20696           // Make sure correctly size type is the correct type.
20697           Val = DAG.getBitcast(MemVT, Val);
20698         }
20699         BuildVector.push_back(Val);
20700       }
20701       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20702                                                : ISD::BUILD_VECTOR,
20703                               DL, StoreTy, BuildVector);
20704     } else {
20705       SmallVector<SDValue, 8> Ops;
20706       for (unsigned i = 0; i < NumStores; ++i) {
20707         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20708         SDValue Val = peekThroughBitcasts(St->getValue());
20709         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20710         // type MemVT. If the underlying value is not the correct
20711         // type, but it is an extraction of an appropriate vector we
20712         // can recast Val to be of the correct type. This may require
20713         // converting between EXTRACT_VECTOR_ELT and
20714         // EXTRACT_SUBVECTOR.
20715         if ((MemVT != Val.getValueType()) &&
20716             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
20717              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
20718           EVT MemVTScalarTy = MemVT.getScalarType();
20719           // We may need to add a bitcast here to get types to line up.
20720           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20721             Val = DAG.getBitcast(MemVT, Val);
20722           } else if (MemVT.isVector() &&
20723                      Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
20724             Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20725           } else {
20726             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20727                                             : ISD::EXTRACT_VECTOR_ELT;
20728             SDValue Vec = Val.getOperand(0);
20729             SDValue Idx = Val.getOperand(1);
20730             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20731           }
20732         }
20733         Ops.push_back(Val);
20734       }
20735 
20736       // Build the extracted vector elements back into a vector.
20737       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20738                                                : ISD::BUILD_VECTOR,
20739                               DL, StoreTy, Ops);
20740     }
20741   } else {
20742     // We should always use a vector store when merging extracted vector
20743     // elements, so this path implies a store of constants.
20744     assert(IsConstantSrc && "Merged vector elements should use vector store");
20745 
20746     APInt StoreInt(SizeInBits, 0);
20747 
20748     // Construct a single integer constant which is made of the smaller
20749     // constant inputs.
20750     bool IsLE = DAG.getDataLayout().isLittleEndian();
20751     for (unsigned i = 0; i < NumStores; ++i) {
20752       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20753       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20754 
20755       SDValue Val = St->getValue();
20756       Val = peekThroughBitcasts(Val);
20757       StoreInt <<= ElementSizeBits;
20758       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20759         StoreInt |= C->getAPIntValue()
20760                         .zextOrTrunc(ElementSizeBits)
20761                         .zextOrTrunc(SizeInBits);
20762       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20763         StoreInt |= C->getValueAPF()
20764                         .bitcastToAPInt()
20765                         .zextOrTrunc(ElementSizeBits)
20766                         .zextOrTrunc(SizeInBits);
20767         // If fp truncation is necessary give up for now.
20768         if (MemVT.getSizeInBits() != ElementSizeBits)
20769           return false;
20770       } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20771                  ISD::isBuildVectorOfConstantFPSDNodes(Val.getNode())) {
20772         // Not yet handled
20773         return false;
20774       } else {
20775         llvm_unreachable("Invalid constant element type");
20776       }
20777     }
20778 
20779     // Create the new Load and Store operations.
20780     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20781   }
20782 
20783   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20784   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20785   bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20786 
20787   // make sure we use trunc store if it's necessary to be legal.
20788   // When generate the new widen store, if the first store's pointer info can
20789   // not be reused, discard the pointer info except the address space because
20790   // now the widen store can not be represented by the original pointer info
20791   // which is for the narrow memory object.
20792   SDValue NewStore;
20793   if (!UseTrunc) {
20794     NewStore = DAG.getStore(
20795         NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20796         CanReusePtrInfo
20797             ? FirstInChain->getPointerInfo()
20798             : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20799         FirstInChain->getAlign(), *Flags, AAInfo);
20800   } else { // Must be realized as a trunc store
20801     EVT LegalizedStoredValTy =
20802         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20803     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20804     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20805     SDValue ExtendedStoreVal =
20806         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20807                         LegalizedStoredValTy);
20808     NewStore = DAG.getTruncStore(
20809         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20810         CanReusePtrInfo
20811             ? FirstInChain->getPointerInfo()
20812             : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20813         StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20814         AAInfo);
20815   }
20816 
20817   // Replace all merged stores with the new store.
20818   for (unsigned i = 0; i < NumStores; ++i)
20819     CombineTo(StoreNodes[i].MemNode, NewStore);
20820 
20821   AddToWorklist(NewChain.getNode());
20822   return true;
20823 }
20824 
20825 SDNode *
20826 DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
20827                                      SmallVectorImpl<MemOpLink> &StoreNodes) {
20828   // This holds the base pointer, index, and the offset in bytes from the base
20829   // pointer. We must have a base and an offset. Do not handle stores to undef
20830   // base pointers.
20831   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20832   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20833     return nullptr;
20834 
20835   SDValue Val = peekThroughBitcasts(St->getValue());
20836   StoreSource StoreSrc = getStoreSource(Val);
20837   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20838 
20839   // Match on loadbaseptr if relevant.
20840   EVT MemVT = St->getMemoryVT();
20841   BaseIndexOffset LBasePtr;
20842   EVT LoadVT;
20843   if (StoreSrc == StoreSource::Load) {
20844     auto *Ld = cast<LoadSDNode>(Val);
20845     LBasePtr = BaseIndexOffset::match(Ld, DAG);
20846     LoadVT = Ld->getMemoryVT();
20847     // Load and store should be the same type.
20848     if (MemVT != LoadVT)
20849       return nullptr;
20850     // Loads must only have one use.
20851     if (!Ld->hasNUsesOfValue(1, 0))
20852       return nullptr;
20853     // The memory operands must not be volatile/indexed/atomic.
20854     // TODO: May be able to relax for unordered atomics (see D66309)
20855     if (!Ld->isSimple() || Ld->isIndexed())
20856       return nullptr;
20857   }
20858   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20859                             int64_t &Offset) -> bool {
20860     // The memory operands must not be volatile/indexed/atomic.
20861     // TODO: May be able to relax for unordered atomics (see D66309)
20862     if (!Other->isSimple() || Other->isIndexed())
20863       return false;
20864     // Don't mix temporal stores with non-temporal stores.
20865     if (St->isNonTemporal() != Other->isNonTemporal())
20866       return false;
20867     if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*St, *Other))
20868       return false;
20869     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20870     // Allow merging constants of different types as integers.
20871     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20872                                            : Other->getMemoryVT() != MemVT;
20873     switch (StoreSrc) {
20874     case StoreSource::Load: {
20875       if (NoTypeMatch)
20876         return false;
20877       // The Load's Base Ptr must also match.
20878       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20879       if (!OtherLd)
20880         return false;
20881       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20882       if (LoadVT != OtherLd->getMemoryVT())
20883         return false;
20884       // Loads must only have one use.
20885       if (!OtherLd->hasNUsesOfValue(1, 0))
20886         return false;
20887       // The memory operands must not be volatile/indexed/atomic.
20888       // TODO: May be able to relax for unordered atomics (see D66309)
20889       if (!OtherLd->isSimple() || OtherLd->isIndexed())
20890         return false;
20891       // Don't mix temporal loads with non-temporal loads.
20892       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20893         return false;
20894       if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20895                                                    *OtherLd))
20896         return false;
20897       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20898         return false;
20899       break;
20900     }
20901     case StoreSource::Constant:
20902       if (NoTypeMatch)
20903         return false;
20904       if (getStoreSource(OtherBC) != StoreSource::Constant)
20905         return false;
20906       break;
20907     case StoreSource::Extract:
20908       // Do not merge truncated stores here.
20909       if (Other->isTruncatingStore())
20910         return false;
20911       if (!MemVT.bitsEq(OtherBC.getValueType()))
20912         return false;
20913       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20914           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20915         return false;
20916       break;
20917     default:
20918       llvm_unreachable("Unhandled store source for merging");
20919     }
20920     Ptr = BaseIndexOffset::match(Other, DAG);
20921     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20922   };
20923 
20924   // We are looking for a root node which is an ancestor to all mergable
20925   // stores. We search up through a load, to our root and then down
20926   // through all children. For instance we will find Store{1,2,3} if
20927   // St is Store1, Store2. or Store3 where the root is not a load
20928   // which always true for nonvolatile ops. TODO: Expand
20929   // the search to find all valid candidates through multiple layers of loads.
20930   //
20931   // Root
20932   // |-------|-------|
20933   // Load    Load    Store3
20934   // |       |
20935   // Store1   Store2
20936   //
20937   // FIXME: We should be able to climb and
20938   // descend TokenFactors to find candidates as well.
20939 
20940   SDNode *RootNode = St->getChain().getNode();
20941   // Bail out if we already analyzed this root node and found nothing.
20942   if (ChainsWithoutMergeableStores.contains(RootNode))
20943     return nullptr;
20944 
20945   // Check if the pair of StoreNode and the RootNode already bail out many
20946   // times which is over the limit in dependence check.
20947   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20948                                         SDNode *RootNode) -> bool {
20949     auto RootCount = StoreRootCountMap.find(StoreNode);
20950     return RootCount != StoreRootCountMap.end() &&
20951            RootCount->second.first == RootNode &&
20952            RootCount->second.second > StoreMergeDependenceLimit;
20953   };
20954 
20955   auto TryToAddCandidate = [&](SDUse &Use) {
20956     // This must be a chain use.
20957     if (Use.getOperandNo() != 0)
20958       return;
20959     if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
20960       BaseIndexOffset Ptr;
20961       int64_t PtrDiff;
20962       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20963           !OverLimitInDependenceCheck(OtherStore, RootNode))
20964         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20965     }
20966   };
20967 
20968   unsigned NumNodesExplored = 0;
20969   const unsigned MaxSearchNodes = 1024;
20970   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20971     RootNode = Ldn->getChain().getNode();
20972     // Bail out if we already analyzed this root node and found nothing.
20973     if (ChainsWithoutMergeableStores.contains(RootNode))
20974       return nullptr;
20975     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20976          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20977       SDNode *User = I->getUser();
20978       if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
20979         for (SDUse &U2 : User->uses())
20980           TryToAddCandidate(U2);
20981       }
20982       // Check stores that depend on the root (e.g. Store 3 in the chart above).
20983       if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
20984         TryToAddCandidate(*I);
20985       }
20986     }
20987   } else {
20988     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20989          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20990       TryToAddCandidate(*I);
20991   }
20992 
20993   return RootNode;
20994 }
20995 
20996 // We need to check that merging these stores does not cause a loop in the
20997 // DAG. Any store candidate may depend on another candidate indirectly through
20998 // its operands. Check in parallel by searching up from operands of candidates.
20999 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
21000     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
21001     SDNode *RootNode) {
21002   // FIXME: We should be able to truncate a full search of
21003   // predecessors by doing a BFS and keeping tabs the originating
21004   // stores from which worklist nodes come from in a similar way to
21005   // TokenFactor simplfication.
21006 
21007   SmallPtrSet<const SDNode *, 32> Visited;
21008   SmallVector<const SDNode *, 8> Worklist;
21009 
21010   // RootNode is a predecessor to all candidates so we need not search
21011   // past it. Add RootNode (peeking through TokenFactors). Do not count
21012   // these towards size check.
21013 
21014   Worklist.push_back(RootNode);
21015   while (!Worklist.empty()) {
21016     auto N = Worklist.pop_back_val();
21017     if (!Visited.insert(N).second)
21018       continue; // Already present in Visited.
21019     if (N->getOpcode() == ISD::TokenFactor) {
21020       for (SDValue Op : N->ops())
21021         Worklist.push_back(Op.getNode());
21022     }
21023   }
21024 
21025   // Don't count pruning nodes towards max.
21026   unsigned int Max = 1024 + Visited.size();
21027   // Search Ops of store candidates.
21028   for (unsigned i = 0; i < NumStores; ++i) {
21029     SDNode *N = StoreNodes[i].MemNode;
21030     // Of the 4 Store Operands:
21031     //   * Chain (Op 0) -> We have already considered these
21032     //                     in candidate selection, but only by following the
21033     //                     chain dependencies. We could still have a chain
21034     //                     dependency to a load, that has a non-chain dep to
21035     //                     another load, that depends on a store, etc. So it is
21036     //                     possible to have dependencies that consist of a mix
21037     //                     of chain and non-chain deps, and we need to include
21038     //                     chain operands in the analysis here..
21039     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21040     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21041     //                       but aren't necessarily fromt the same base node, so
21042     //                       cycles possible (e.g. via indexed store).
21043     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21044     //               non-indexed stores). Not constant on all targets (e.g. ARM)
21045     //               and so can participate in a cycle.
21046     for (const SDValue &Op : N->op_values())
21047       Worklist.push_back(Op.getNode());
21048   }
21049   // Search through DAG. We can stop early if we find a store node.
21050   for (unsigned i = 0; i < NumStores; ++i)
21051     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
21052                                      Max)) {
21053       // If the searching bail out, record the StoreNode and RootNode in the
21054       // StoreRootCountMap. If we have seen the pair many times over a limit,
21055       // we won't add the StoreNode into StoreNodes set again.
21056       if (Visited.size() >= Max) {
21057         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
21058         if (RootCount.first == RootNode)
21059           RootCount.second++;
21060         else
21061           RootCount = {RootNode, 1};
21062       }
21063       return false;
21064     }
21065   return true;
21066 }
21067 
21068 unsigned
21069 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
21070                                   int64_t ElementSizeBytes) const {
21071   while (true) {
21072     // Find a store past the width of the first store.
21073     size_t StartIdx = 0;
21074     while ((StartIdx + 1 < StoreNodes.size()) &&
21075            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
21076               StoreNodes[StartIdx + 1].OffsetFromBase)
21077       ++StartIdx;
21078 
21079     // Bail if we don't have enough candidates to merge.
21080     if (StartIdx + 1 >= StoreNodes.size())
21081       return 0;
21082 
21083     // Trim stores that overlapped with the first store.
21084     if (StartIdx)
21085       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
21086 
21087     // Scan the memory operations on the chain and find the first
21088     // non-consecutive store memory address.
21089     unsigned NumConsecutiveStores = 1;
21090     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
21091     // Check that the addresses are consecutive starting from the second
21092     // element in the list of stores.
21093     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
21094       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
21095       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21096         break;
21097       NumConsecutiveStores = i + 1;
21098     }
21099     if (NumConsecutiveStores > 1)
21100       return NumConsecutiveStores;
21101 
21102     // There are no consecutive stores at the start of the list.
21103     // Remove the first store and try again.
21104     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
21105   }
21106 }
21107 
21108 bool DAGCombiner::tryStoreMergeOfConstants(
21109     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21110     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
21111   LLVMContext &Context = *DAG.getContext();
21112   const DataLayout &DL = DAG.getDataLayout();
21113   int64_t ElementSizeBytes = MemVT.getStoreSize();
21114   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21115   bool MadeChange = false;
21116 
21117   // Store the constants into memory as one consecutive store.
21118   while (NumConsecutiveStores >= 2) {
21119     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21120     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21121     Align FirstStoreAlign = FirstInChain->getAlign();
21122     unsigned LastLegalType = 1;
21123     unsigned LastLegalVectorType = 1;
21124     bool LastIntegerTrunc = false;
21125     bool NonZero = false;
21126     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
21127     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21128       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
21129       SDValue StoredVal = ST->getValue();
21130       bool IsElementZero = false;
21131       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
21132         IsElementZero = C->isZero();
21133       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
21134         IsElementZero = C->getConstantFPValue()->isNullValue();
21135       else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
21136         IsElementZero = true;
21137       if (IsElementZero) {
21138         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
21139           FirstZeroAfterNonZero = i;
21140       }
21141       NonZero |= !IsElementZero;
21142 
21143       // Find a legal type for the constant store.
21144       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21145       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21146       unsigned IsFast = 0;
21147 
21148       // Break early when size is too large to be legal.
21149       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21150         break;
21151 
21152       if (TLI.isTypeLegal(StoreTy) &&
21153           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21154                                DAG.getMachineFunction()) &&
21155           TLI.allowsMemoryAccess(Context, DL, StoreTy,
21156                                  *FirstInChain->getMemOperand(), &IsFast) &&
21157           IsFast) {
21158         LastIntegerTrunc = false;
21159         LastLegalType = i + 1;
21160         // Or check whether a truncstore is legal.
21161       } else if (TLI.getTypeAction(Context, StoreTy) ==
21162                  TargetLowering::TypePromoteInteger) {
21163         EVT LegalizedStoredValTy =
21164             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
21165         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21166             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21167                                  DAG.getMachineFunction()) &&
21168             TLI.allowsMemoryAccess(Context, DL, StoreTy,
21169                                    *FirstInChain->getMemOperand(), &IsFast) &&
21170             IsFast) {
21171           LastIntegerTrunc = true;
21172           LastLegalType = i + 1;
21173         }
21174       }
21175 
21176       // We only use vectors if the target allows it and the function is not
21177       // marked with the noimplicitfloat attribute.
21178       if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
21179           AllowVectors) {
21180         // Find a legal type for the vector store.
21181         unsigned Elts = (i + 1) * NumMemElts;
21182         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21183         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
21184             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21185             TLI.allowsMemoryAccess(Context, DL, Ty,
21186                                    *FirstInChain->getMemOperand(), &IsFast) &&
21187             IsFast)
21188           LastLegalVectorType = i + 1;
21189       }
21190     }
21191 
21192     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
21193     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
21194     bool UseTrunc = LastIntegerTrunc && !UseVector;
21195 
21196     // Check if we found a legal integer type that creates a meaningful
21197     // merge.
21198     if (NumElem < 2) {
21199       // We know that candidate stores are in order and of correct
21200       // shape. While there is no mergeable sequence from the
21201       // beginning one may start later in the sequence. The only
21202       // reason a merge of size N could have failed where another of
21203       // the same size would not have, is if the alignment has
21204       // improved or we've dropped a non-zero value. Drop as many
21205       // candidates as we can here.
21206       unsigned NumSkip = 1;
21207       while ((NumSkip < NumConsecutiveStores) &&
21208              (NumSkip < FirstZeroAfterNonZero) &&
21209              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21210         NumSkip++;
21211 
21212       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21213       NumConsecutiveStores -= NumSkip;
21214       continue;
21215     }
21216 
21217     // Check that we can merge these candidates without causing a cycle.
21218     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21219                                                   RootNode)) {
21220       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21221       NumConsecutiveStores -= NumElem;
21222       continue;
21223     }
21224 
21225     MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
21226                                                   /*IsConstantSrc*/ true,
21227                                                   UseVector, UseTrunc);
21228 
21229     // Remove merged stores for next iteration.
21230     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21231     NumConsecutiveStores -= NumElem;
21232   }
21233   return MadeChange;
21234 }
21235 
21236 bool DAGCombiner::tryStoreMergeOfExtracts(
21237     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21238     EVT MemVT, SDNode *RootNode) {
21239   LLVMContext &Context = *DAG.getContext();
21240   const DataLayout &DL = DAG.getDataLayout();
21241   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21242   bool MadeChange = false;
21243 
21244   // Loop on Consecutive Stores on success.
21245   while (NumConsecutiveStores >= 2) {
21246     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21247     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21248     Align FirstStoreAlign = FirstInChain->getAlign();
21249     unsigned NumStoresToMerge = 1;
21250     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21251       // Find a legal type for the vector store.
21252       unsigned Elts = (i + 1) * NumMemElts;
21253       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21254       unsigned IsFast = 0;
21255 
21256       // Break early when size is too large to be legal.
21257       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
21258         break;
21259 
21260       if (TLI.isTypeLegal(Ty) &&
21261           TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21262           TLI.allowsMemoryAccess(Context, DL, Ty,
21263                                  *FirstInChain->getMemOperand(), &IsFast) &&
21264           IsFast)
21265         NumStoresToMerge = i + 1;
21266     }
21267 
21268     // Check if we found a legal integer type creating a meaningful
21269     // merge.
21270     if (NumStoresToMerge < 2) {
21271       // We know that candidate stores are in order and of correct
21272       // shape. While there is no mergeable sequence from the
21273       // beginning one may start later in the sequence. The only
21274       // reason a merge of size N could have failed where another of
21275       // the same size would not have, is if the alignment has
21276       // improved. Drop as many candidates as we can here.
21277       unsigned NumSkip = 1;
21278       while ((NumSkip < NumConsecutiveStores) &&
21279              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21280         NumSkip++;
21281 
21282       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21283       NumConsecutiveStores -= NumSkip;
21284       continue;
21285     }
21286 
21287     // Check that we can merge these candidates without causing a cycle.
21288     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
21289                                                   RootNode)) {
21290       StoreNodes.erase(StoreNodes.begin(),
21291                        StoreNodes.begin() + NumStoresToMerge);
21292       NumConsecutiveStores -= NumStoresToMerge;
21293       continue;
21294     }
21295 
21296     MadeChange |= mergeStoresOfConstantsOrVecElts(
21297         StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
21298         /*UseVector*/ true, /*UseTrunc*/ false);
21299 
21300     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
21301     NumConsecutiveStores -= NumStoresToMerge;
21302   }
21303   return MadeChange;
21304 }
21305 
21306 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
21307                                        unsigned NumConsecutiveStores, EVT MemVT,
21308                                        SDNode *RootNode, bool AllowVectors,
21309                                        bool IsNonTemporalStore,
21310                                        bool IsNonTemporalLoad) {
21311   LLVMContext &Context = *DAG.getContext();
21312   const DataLayout &DL = DAG.getDataLayout();
21313   int64_t ElementSizeBytes = MemVT.getStoreSize();
21314   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21315   bool MadeChange = false;
21316 
21317   // Look for load nodes which are used by the stored values.
21318   SmallVector<MemOpLink, 8> LoadNodes;
21319 
21320   // Find acceptable loads. Loads need to have the same chain (token factor),
21321   // must not be zext, volatile, indexed, and they must be consecutive.
21322   BaseIndexOffset LdBasePtr;
21323 
21324   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21325     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
21326     SDValue Val = peekThroughBitcasts(St->getValue());
21327     LoadSDNode *Ld = cast<LoadSDNode>(Val);
21328 
21329     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
21330     // If this is not the first ptr that we check.
21331     int64_t LdOffset = 0;
21332     if (LdBasePtr.getBase().getNode()) {
21333       // The base ptr must be the same.
21334       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
21335         break;
21336     } else {
21337       // Check that all other base pointers are the same as this one.
21338       LdBasePtr = LdPtr;
21339     }
21340 
21341     // We found a potential memory operand to merge.
21342     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
21343   }
21344 
21345   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
21346     Align RequiredAlignment;
21347     bool NeedRotate = false;
21348     if (LoadNodes.size() == 2) {
21349       // If we have load/store pair instructions and we only have two values,
21350       // don't bother merging.
21351       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
21352           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
21353         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
21354         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
21355         break;
21356       }
21357       // If the loads are reversed, see if we can rotate the halves into place.
21358       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
21359       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
21360       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
21361       if (Offset0 - Offset1 == ElementSizeBytes &&
21362           (hasOperation(ISD::ROTL, PairVT) ||
21363            hasOperation(ISD::ROTR, PairVT))) {
21364         std::swap(LoadNodes[0], LoadNodes[1]);
21365         NeedRotate = true;
21366       }
21367     }
21368     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21369     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21370     Align FirstStoreAlign = FirstInChain->getAlign();
21371     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
21372 
21373     // Scan the memory operations on the chain and find the first
21374     // non-consecutive load memory address. These variables hold the index in
21375     // the store node array.
21376 
21377     unsigned LastConsecutiveLoad = 1;
21378 
21379     // This variable refers to the size and not index in the array.
21380     unsigned LastLegalVectorType = 1;
21381     unsigned LastLegalIntegerType = 1;
21382     bool isDereferenceable = true;
21383     bool DoIntegerTruncate = false;
21384     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
21385     SDValue LoadChain = FirstLoad->getChain();
21386     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
21387       // All loads must share the same chain.
21388       if (LoadNodes[i].MemNode->getChain() != LoadChain)
21389         break;
21390 
21391       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
21392       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21393         break;
21394       LastConsecutiveLoad = i;
21395 
21396       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
21397         isDereferenceable = false;
21398 
21399       // Find a legal type for the vector store.
21400       unsigned Elts = (i + 1) * NumMemElts;
21401       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21402 
21403       // Break early when size is too large to be legal.
21404       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21405         break;
21406 
21407       unsigned IsFastSt = 0;
21408       unsigned IsFastLd = 0;
21409       // Don't try vector types if we need a rotate. We may still fail the
21410       // legality checks for the integer type, but we can't handle the rotate
21411       // case with vectors.
21412       // FIXME: We could use a shuffle in place of the rotate.
21413       if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
21414           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21415                                DAG.getMachineFunction()) &&
21416           TLI.allowsMemoryAccess(Context, DL, StoreTy,
21417                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
21418           IsFastSt &&
21419           TLI.allowsMemoryAccess(Context, DL, StoreTy,
21420                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
21421           IsFastLd) {
21422         LastLegalVectorType = i + 1;
21423       }
21424 
21425       // Find a legal type for the integer store.
21426       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21427       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21428       if (TLI.isTypeLegal(StoreTy) &&
21429           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21430                                DAG.getMachineFunction()) &&
21431           TLI.allowsMemoryAccess(Context, DL, StoreTy,
21432                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
21433           IsFastSt &&
21434           TLI.allowsMemoryAccess(Context, DL, StoreTy,
21435                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
21436           IsFastLd) {
21437         LastLegalIntegerType = i + 1;
21438         DoIntegerTruncate = false;
21439         // Or check whether a truncstore and extload is legal.
21440       } else if (TLI.getTypeAction(Context, StoreTy) ==
21441                  TargetLowering::TypePromoteInteger) {
21442         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
21443         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21444             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21445                                  DAG.getMachineFunction()) &&
21446             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
21447             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
21448             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
21449             TLI.allowsMemoryAccess(Context, DL, StoreTy,
21450                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
21451             IsFastSt &&
21452             TLI.allowsMemoryAccess(Context, DL, StoreTy,
21453                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
21454             IsFastLd) {
21455           LastLegalIntegerType = i + 1;
21456           DoIntegerTruncate = true;
21457         }
21458       }
21459     }
21460 
21461     // Only use vector types if the vector type is larger than the integer
21462     // type. If they are the same, use integers.
21463     bool UseVectorTy =
21464         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
21465     unsigned LastLegalType =
21466         std::max(LastLegalVectorType, LastLegalIntegerType);
21467 
21468     // We add +1 here because the LastXXX variables refer to location while
21469     // the NumElem refers to array/index size.
21470     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
21471     NumElem = std::min(LastLegalType, NumElem);
21472     Align FirstLoadAlign = FirstLoad->getAlign();
21473 
21474     if (NumElem < 2) {
21475       // We know that candidate stores are in order and of correct
21476       // shape. While there is no mergeable sequence from the
21477       // beginning one may start later in the sequence. The only
21478       // reason a merge of size N could have failed where another of
21479       // the same size would not have is if the alignment or either
21480       // the load or store has improved. Drop as many candidates as we
21481       // can here.
21482       unsigned NumSkip = 1;
21483       while ((NumSkip < LoadNodes.size()) &&
21484              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
21485              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21486         NumSkip++;
21487       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21488       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
21489       NumConsecutiveStores -= NumSkip;
21490       continue;
21491     }
21492 
21493     // Check that we can merge these candidates without causing a cycle.
21494     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21495                                                   RootNode)) {
21496       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21497       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21498       NumConsecutiveStores -= NumElem;
21499       continue;
21500     }
21501 
21502     // Find if it is better to use vectors or integers to load and store
21503     // to memory.
21504     EVT JointMemOpVT;
21505     if (UseVectorTy) {
21506       // Find a legal type for the vector store.
21507       unsigned Elts = NumElem * NumMemElts;
21508       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21509     } else {
21510       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
21511       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
21512     }
21513 
21514     SDLoc LoadDL(LoadNodes[0].MemNode);
21515     SDLoc StoreDL(StoreNodes[0].MemNode);
21516 
21517     // The merged loads are required to have the same incoming chain, so
21518     // using the first's chain is acceptable.
21519 
21520     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
21521     bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21522     AddToWorklist(NewStoreChain.getNode());
21523 
21524     MachineMemOperand::Flags LdMMOFlags =
21525         isDereferenceable ? MachineMemOperand::MODereferenceable
21526                           : MachineMemOperand::MONone;
21527     if (IsNonTemporalLoad)
21528       LdMMOFlags |= MachineMemOperand::MONonTemporal;
21529 
21530     LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
21531 
21532     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
21533                                               ? MachineMemOperand::MONonTemporal
21534                                               : MachineMemOperand::MONone;
21535 
21536     StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
21537 
21538     SDValue NewLoad, NewStore;
21539     if (UseVectorTy || !DoIntegerTruncate) {
21540       NewLoad = DAG.getLoad(
21541           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
21542           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
21543       SDValue StoreOp = NewLoad;
21544       if (NeedRotate) {
21545         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
21546         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
21547                "Unexpected type for rotate-able load pair");
21548         SDValue RotAmt =
21549             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
21550         // Target can convert to the identical ROTR if it does not have ROTL.
21551         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
21552       }
21553       NewStore = DAG.getStore(
21554           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
21555           CanReusePtrInfo ? FirstInChain->getPointerInfo()
21556                           : MachinePointerInfo(FirstStoreAS),
21557           FirstStoreAlign, StMMOFlags);
21558     } else { // This must be the truncstore/extload case
21559       EVT ExtendedTy =
21560           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
21561       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
21562                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
21563                                FirstLoad->getPointerInfo(), JointMemOpVT,
21564                                FirstLoadAlign, LdMMOFlags);
21565       NewStore = DAG.getTruncStore(
21566           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
21567           CanReusePtrInfo ? FirstInChain->getPointerInfo()
21568                           : MachinePointerInfo(FirstStoreAS),
21569           JointMemOpVT, FirstInChain->getAlign(),
21570           FirstInChain->getMemOperand()->getFlags());
21571     }
21572 
21573     // Transfer chain users from old loads to the new load.
21574     for (unsigned i = 0; i < NumElem; ++i) {
21575       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
21576       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
21577                                     SDValue(NewLoad.getNode(), 1));
21578     }
21579 
21580     // Replace all stores with the new store. Recursively remove corresponding
21581     // values if they are no longer used.
21582     for (unsigned i = 0; i < NumElem; ++i) {
21583       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
21584       CombineTo(StoreNodes[i].MemNode, NewStore);
21585       if (Val->use_empty())
21586         recursivelyDeleteUnusedNodes(Val.getNode());
21587     }
21588 
21589     MadeChange = true;
21590     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21591     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21592     NumConsecutiveStores -= NumElem;
21593   }
21594   return MadeChange;
21595 }
21596 
21597 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21598   if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
21599     return false;
21600 
21601   // TODO: Extend this function to merge stores of scalable vectors.
21602   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
21603   // store since we know <vscale x 16 x i8> is exactly twice as large as
21604   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
21605   EVT MemVT = St->getMemoryVT();
21606   if (MemVT.isScalableVT())
21607     return false;
21608   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
21609     return false;
21610 
21611   // This function cannot currently deal with non-byte-sized memory sizes.
21612   int64_t ElementSizeBytes = MemVT.getStoreSize();
21613   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
21614     return false;
21615 
21616   // Do not bother looking at stored values that are not constants, loads, or
21617   // extracted vector elements.
21618   SDValue StoredVal = peekThroughBitcasts(St->getValue());
21619   const StoreSource StoreSrc = getStoreSource(StoredVal);
21620   if (StoreSrc == StoreSource::Unknown)
21621     return false;
21622 
21623   SmallVector<MemOpLink, 8> StoreNodes;
21624   // Find potential store merge candidates by searching through chain sub-DAG
21625   SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
21626 
21627   // Check if there is anything to merge.
21628   if (StoreNodes.size() < 2)
21629     return false;
21630 
21631   // Sort the memory operands according to their distance from the
21632   // base pointer.
21633   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
21634     return LHS.OffsetFromBase < RHS.OffsetFromBase;
21635   });
21636 
21637   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
21638       Attribute::NoImplicitFloat);
21639   bool IsNonTemporalStore = St->isNonTemporal();
21640   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
21641                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
21642 
21643   // Store Merge attempts to merge the lowest stores. This generally
21644   // works out as if successful, as the remaining stores are checked
21645   // after the first collection of stores is merged. However, in the
21646   // case that a non-mergeable store is found first, e.g., {p[-2],
21647   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
21648   // mergeable cases. To prevent this, we prune such stores from the
21649   // front of StoreNodes here.
21650   bool MadeChange = false;
21651   while (StoreNodes.size() > 1) {
21652     unsigned NumConsecutiveStores =
21653         getConsecutiveStores(StoreNodes, ElementSizeBytes);
21654     // There are no more stores in the list to examine.
21655     if (NumConsecutiveStores == 0)
21656       return MadeChange;
21657 
21658     // We have at least 2 consecutive stores. Try to merge them.
21659     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
21660     switch (StoreSrc) {
21661     case StoreSource::Constant:
21662       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
21663                                              MemVT, RootNode, AllowVectors);
21664       break;
21665 
21666     case StoreSource::Extract:
21667       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
21668                                             MemVT, RootNode);
21669       break;
21670 
21671     case StoreSource::Load:
21672       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
21673                                          MemVT, RootNode, AllowVectors,
21674                                          IsNonTemporalStore, IsNonTemporalLoad);
21675       break;
21676 
21677     default:
21678       llvm_unreachable("Unhandled store source type");
21679     }
21680   }
21681 
21682   // Remember if we failed to optimize, to save compile time.
21683   if (!MadeChange)
21684     ChainsWithoutMergeableStores.insert(RootNode);
21685 
21686   return MadeChange;
21687 }
21688 
21689 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
21690   SDLoc SL(ST);
21691   SDValue ReplStore;
21692 
21693   // Replace the chain to avoid dependency.
21694   if (ST->isTruncatingStore()) {
21695     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
21696                                   ST->getBasePtr(), ST->getMemoryVT(),
21697                                   ST->getMemOperand());
21698   } else {
21699     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
21700                              ST->getMemOperand());
21701   }
21702 
21703   // Create token to keep both nodes around.
21704   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
21705                               MVT::Other, ST->getChain(), ReplStore);
21706 
21707   // Make sure the new and old chains are cleaned up.
21708   AddToWorklist(Token.getNode());
21709 
21710   // Don't add users to work list.
21711   return CombineTo(ST, Token, false);
21712 }
21713 
21714 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
21715   SDValue Value = ST->getValue();
21716   if (Value.getOpcode() == ISD::TargetConstantFP)
21717     return SDValue();
21718 
21719   if (!ISD::isNormalStore(ST))
21720     return SDValue();
21721 
21722   SDLoc DL(ST);
21723 
21724   SDValue Chain = ST->getChain();
21725   SDValue Ptr = ST->getBasePtr();
21726 
21727   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21728 
21729   // NOTE: If the original store is volatile, this transform must not increase
21730   // the number of stores.  For example, on x86-32 an f64 can be stored in one
21731   // processor operation but an i64 (which is not legal) requires two.  So the
21732   // transform should not be done in this case.
21733 
21734   SDValue Tmp;
21735   switch (CFP->getSimpleValueType(0).SimpleTy) {
21736   default:
21737     llvm_unreachable("Unknown FP type");
21738   case MVT::f16:    // We don't do this for these yet.
21739   case MVT::bf16:
21740   case MVT::f80:
21741   case MVT::f128:
21742   case MVT::ppcf128:
21743     return SDValue();
21744   case MVT::f32:
21745     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21746         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21747       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21748                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21749                             MVT::i32);
21750       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21751     }
21752 
21753     return SDValue();
21754   case MVT::f64:
21755     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21756          ST->isSimple()) ||
21757         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21758       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21759                             getZExtValue(), SDLoc(CFP), MVT::i64);
21760       return DAG.getStore(Chain, DL, Tmp,
21761                           Ptr, ST->getMemOperand());
21762     }
21763 
21764     if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21765         !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21766       // Many FP stores are not made apparent until after legalize, e.g. for
21767       // argument passing.  Since this is so common, custom legalize the
21768       // 64-bit integer store into two 32-bit stores.
21769       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
21770       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21771       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21772       if (DAG.getDataLayout().isBigEndian())
21773         std::swap(Lo, Hi);
21774 
21775       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21776       AAMDNodes AAInfo = ST->getAAInfo();
21777 
21778       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21779                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
21780       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(4), DL);
21781       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21782                                  ST->getPointerInfo().getWithOffset(4),
21783                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
21784       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21785                          St0, St1);
21786     }
21787 
21788     return SDValue();
21789   }
21790 }
21791 
21792 // (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21793 //
21794 // If a store of a load with an element inserted into it has no other
21795 // uses in between the chain, then we can consider the vector store
21796 // dead and replace it with just the single scalar element store.
21797 SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21798   SDLoc DL(ST);
21799   SDValue Value = ST->getValue();
21800   SDValue Ptr = ST->getBasePtr();
21801   SDValue Chain = ST->getChain();
21802   if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21803     return SDValue();
21804 
21805   SDValue Elt = Value.getOperand(1);
21806   SDValue Idx = Value.getOperand(2);
21807 
21808   // If the element isn't byte sized or is implicitly truncated then we can't
21809   // compute an offset.
21810   EVT EltVT = Elt.getValueType();
21811   if (!EltVT.isByteSized() ||
21812       EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21813     return SDValue();
21814 
21815   auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21816   if (!Ld || Ld->getBasePtr() != Ptr ||
21817       ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21818       !ISD::isNormalStore(ST) ||
21819       Ld->getAddressSpace() != ST->getAddressSpace() ||
21820       !Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1)))
21821     return SDValue();
21822 
21823   unsigned IsFast;
21824   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21825                               Elt.getValueType(), ST->getAddressSpace(),
21826                               ST->getAlign(), ST->getMemOperand()->getFlags(),
21827                               &IsFast) ||
21828       !IsFast)
21829     return SDValue();
21830 
21831   MachinePointerInfo PointerInfo(ST->getAddressSpace());
21832 
21833   // If the offset is a known constant then try to recover the pointer
21834   // info
21835   SDValue NewPtr;
21836   if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21837     unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21838     NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21839     PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21840   } else {
21841     NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21842   }
21843 
21844   return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21845                       ST->getMemOperand()->getFlags());
21846 }
21847 
21848 SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21849   AtomicSDNode *ST = cast<AtomicSDNode>(N);
21850   SDValue Val = ST->getVal();
21851   EVT VT = Val.getValueType();
21852   EVT MemVT = ST->getMemoryVT();
21853 
21854   if (MemVT.bitsLT(VT)) { // Is truncating store
21855     APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21856                                                    MemVT.getScalarSizeInBits());
21857     // See if we can simplify the operation with SimplifyDemandedBits, which
21858     // only works if the value has a single use.
21859     if (SimplifyDemandedBits(Val, TruncDemandedBits))
21860       return SDValue(N, 0);
21861   }
21862 
21863   return SDValue();
21864 }
21865 
21866 SDValue DAGCombiner::visitSTORE(SDNode *N) {
21867   StoreSDNode *ST  = cast<StoreSDNode>(N);
21868   SDValue Chain = ST->getChain();
21869   SDValue Value = ST->getValue();
21870   SDValue Ptr   = ST->getBasePtr();
21871 
21872   // If this is a store of a bit convert, store the input value if the
21873   // resultant store does not need a higher alignment than the original.
21874   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21875       ST->isUnindexed()) {
21876     EVT SVT = Value.getOperand(0).getValueType();
21877     // If the store is volatile, we only want to change the store type if the
21878     // resulting store is legal. Otherwise we might increase the number of
21879     // memory accesses. We don't care if the original type was legal or not
21880     // as we assume software couldn't rely on the number of accesses of an
21881     // illegal type.
21882     // TODO: May be able to relax for unordered atomics (see D66309)
21883     if (((!LegalOperations && ST->isSimple()) ||
21884          TLI.isOperationLegal(ISD::STORE, SVT)) &&
21885         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21886                                      DAG, *ST->getMemOperand())) {
21887       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21888                           ST->getMemOperand());
21889     }
21890   }
21891 
21892   // Turn 'store undef, Ptr' -> nothing.
21893   if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
21894     return Chain;
21895 
21896   // Try to infer better alignment information than the store already has.
21897   if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21898       !ST->isAtomic()) {
21899     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21900       if (*Alignment > ST->getAlign() &&
21901           isAligned(*Alignment, ST->getSrcValueOffset())) {
21902         SDValue NewStore =
21903             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21904                               ST->getMemoryVT(), *Alignment,
21905                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
21906         // NewStore will always be N as we are only refining the alignment
21907         assert(NewStore.getNode() == N);
21908         (void)NewStore;
21909       }
21910     }
21911   }
21912 
21913   // Try transforming a pair floating point load / store ops to integer
21914   // load / store ops.
21915   if (SDValue NewST = TransformFPLoadStorePair(N))
21916     return NewST;
21917 
21918   // Try transforming several stores into STORE (BSWAP).
21919   if (SDValue Store = mergeTruncStores(ST))
21920     return Store;
21921 
21922   if (ST->isUnindexed()) {
21923     // Walk up chain skipping non-aliasing memory nodes, on this store and any
21924     // adjacent stores.
21925     if (findBetterNeighborChains(ST)) {
21926       // replaceStoreChain uses CombineTo, which handled all of the worklist
21927       // manipulation. Return the original node to not do anything else.
21928       return SDValue(ST, 0);
21929     }
21930     Chain = ST->getChain();
21931   }
21932 
21933   // FIXME: is there such a thing as a truncating indexed store?
21934   if (ST->isTruncatingStore() && ST->isUnindexed() &&
21935       Value.getValueType().isInteger() &&
21936       (!isa<ConstantSDNode>(Value) ||
21937        !cast<ConstantSDNode>(Value)->isOpaque())) {
21938     // Convert a truncating store of a extension into a standard store.
21939     if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21940          Value.getOpcode() == ISD::SIGN_EXTEND ||
21941          Value.getOpcode() == ISD::ANY_EXTEND) &&
21942         Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21943         TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21944       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21945                           ST->getMemOperand());
21946 
21947     APInt TruncDemandedBits =
21948         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21949                              ST->getMemoryVT().getScalarSizeInBits());
21950 
21951     // See if we can simplify the operation with SimplifyDemandedBits, which
21952     // only works if the value has a single use.
21953     AddToWorklist(Value.getNode());
21954     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21955       // Re-visit the store if anything changed and the store hasn't been merged
21956       // with another node (N is deleted) SimplifyDemandedBits will add Value's
21957       // node back to the worklist if necessary, but we also need to re-visit
21958       // the Store node itself.
21959       if (N->getOpcode() != ISD::DELETED_NODE)
21960         AddToWorklist(N);
21961       return SDValue(N, 0);
21962     }
21963 
21964     // Otherwise, see if we can simplify the input to this truncstore with
21965     // knowledge that only the low bits are being used.  For example:
21966     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
21967     if (SDValue Shorter =
21968             TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21969       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21970                                ST->getMemOperand());
21971 
21972     // If we're storing a truncated constant, see if we can simplify it.
21973     // TODO: Move this to targetShrinkDemandedConstant?
21974     if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21975       if (!Cst->isOpaque()) {
21976         const APInt &CValue = Cst->getAPIntValue();
21977         APInt NewVal = CValue & TruncDemandedBits;
21978         if (NewVal != CValue) {
21979           SDValue Shorter =
21980               DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21981           return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21982                                    ST->getMemoryVT(), ST->getMemOperand());
21983         }
21984       }
21985   }
21986 
21987   // If this is a load followed by a store to the same location, then the store
21988   // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21989   // TODO: Add big-endian truncate support with test coverage.
21990   // TODO: Can relax for unordered atomics (see D66309)
21991   SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21992                          ? peekThroughTruncates(Value)
21993                          : Value;
21994   if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21995     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21996         ST->isUnindexed() && ST->isSimple() &&
21997         Ld->getAddressSpace() == ST->getAddressSpace() &&
21998         // There can't be any side effects between the load and store, such as
21999         // a call or store.
22000         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
22001       // The store is dead, remove it.
22002       return Chain;
22003     }
22004   }
22005 
22006   // Try scalarizing vector stores of loads where we only change one element
22007   if (SDValue NewST = replaceStoreOfInsertLoad(ST))
22008     return NewST;
22009 
22010   // TODO: Can relax for unordered atomics (see D66309)
22011   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
22012     if (ST->isUnindexed() && ST->isSimple() &&
22013         ST1->isUnindexed() && ST1->isSimple()) {
22014       if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22015           ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22016           ST->getAddressSpace() == ST1->getAddressSpace()) {
22017         // If this is a store followed by a store with the same value to the
22018         // same location, then the store is dead/noop.
22019         return Chain;
22020       }
22021 
22022       if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
22023           !ST1->getBasePtr().isUndef() &&
22024           ST->getAddressSpace() == ST1->getAddressSpace()) {
22025         // If we consider two stores and one smaller in size is a scalable
22026         // vector type and another one a bigger size store with a fixed type,
22027         // then we could not allow the scalable store removal because we don't
22028         // know its final size in the end.
22029         if (ST->getMemoryVT().isScalableVector() ||
22030             ST1->getMemoryVT().isScalableVector()) {
22031           if (ST1->getBasePtr() == Ptr &&
22032               TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
22033                                   ST->getMemoryVT().getStoreSize())) {
22034             CombineTo(ST1, ST1->getChain());
22035             return SDValue(N, 0);
22036           }
22037         } else {
22038           const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
22039           const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
22040           // If this is a store who's preceding store to a subset of the current
22041           // location and no one other node is chained to that store we can
22042           // effectively drop the store. Do not remove stores to undef as they
22043           // may be used as data sinks.
22044           if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
22045                               ChainBase,
22046                               ST1->getMemoryVT().getFixedSizeInBits())) {
22047             CombineTo(ST1, ST1->getChain());
22048             return SDValue(N, 0);
22049           }
22050         }
22051       }
22052     }
22053   }
22054 
22055   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
22056   // truncating store.  We can do this even if this is already a truncstore.
22057   if ((Value.getOpcode() == ISD::FP_ROUND ||
22058        Value.getOpcode() == ISD::TRUNCATE) &&
22059       Value->hasOneUse() && ST->isUnindexed() &&
22060       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
22061                                ST->getMemoryVT(), LegalOperations)) {
22062     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
22063                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
22064   }
22065 
22066   // Always perform this optimization before types are legal. If the target
22067   // prefers, also try this after legalization to catch stores that were created
22068   // by intrinsics or other nodes.
22069   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
22070     while (true) {
22071       // There can be multiple store sequences on the same chain.
22072       // Keep trying to merge store sequences until we are unable to do so
22073       // or until we merge the last store on the chain.
22074       bool Changed = mergeConsecutiveStores(ST);
22075       if (!Changed) break;
22076       // Return N as merge only uses CombineTo and no worklist clean
22077       // up is necessary.
22078       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
22079         return SDValue(N, 0);
22080     }
22081   }
22082 
22083   // Try transforming N to an indexed store.
22084   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
22085     return SDValue(N, 0);
22086 
22087   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
22088   //
22089   // Make sure to do this only after attempting to merge stores in order to
22090   //  avoid changing the types of some subset of stores due to visit order,
22091   //  preventing their merging.
22092   if (isa<ConstantFPSDNode>(ST->getValue())) {
22093     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
22094       return NewSt;
22095   }
22096 
22097   if (SDValue NewSt = splitMergedValStore(ST))
22098     return NewSt;
22099 
22100   return ReduceLoadOpStoreWidth(N);
22101 }
22102 
22103 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
22104   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
22105   if (!LifetimeEnd->hasOffset())
22106     return SDValue();
22107 
22108   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
22109                                         LifetimeEnd->getOffset(), false);
22110 
22111   // We walk up the chains to find stores.
22112   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
22113   while (!Chains.empty()) {
22114     SDValue Chain = Chains.pop_back_val();
22115     if (!Chain.hasOneUse())
22116       continue;
22117     switch (Chain.getOpcode()) {
22118     case ISD::TokenFactor:
22119       for (unsigned Nops = Chain.getNumOperands(); Nops;)
22120         Chains.push_back(Chain.getOperand(--Nops));
22121       break;
22122     case ISD::LIFETIME_START:
22123     case ISD::LIFETIME_END:
22124       // We can forward past any lifetime start/end that can be proven not to
22125       // alias the node.
22126       if (!mayAlias(Chain.getNode(), N))
22127         Chains.push_back(Chain.getOperand(0));
22128       break;
22129     case ISD::STORE: {
22130       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
22131       // TODO: Can relax for unordered atomics (see D66309)
22132       if (!ST->isSimple() || ST->isIndexed())
22133         continue;
22134       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
22135       // The bounds of a scalable store are not known until runtime, so this
22136       // store cannot be elided.
22137       if (StoreSize.isScalable())
22138         continue;
22139       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
22140       // If we store purely within object bounds just before its lifetime ends,
22141       // we can remove the store.
22142       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
22143                                    StoreSize.getFixedValue() * 8)) {
22144         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
22145                    dbgs() << "\nwithin LIFETIME_END of : ";
22146                    LifetimeEndBase.dump(); dbgs() << "\n");
22147         CombineTo(ST, ST->getChain());
22148         return SDValue(N, 0);
22149       }
22150     }
22151     }
22152   }
22153   return SDValue();
22154 }
22155 
22156 /// For the instruction sequence of store below, F and I values
22157 /// are bundled together as an i64 value before being stored into memory.
22158 /// Sometimes it is more efficent to generate separate stores for F and I,
22159 /// which can remove the bitwise instructions or sink them to colder places.
22160 ///
22161 ///   (store (or (zext (bitcast F to i32) to i64),
22162 ///              (shl (zext I to i64), 32)), addr)  -->
22163 ///   (store F, addr) and (store I, addr+4)
22164 ///
22165 /// Similarly, splitting for other merged store can also be beneficial, like:
22166 /// For pair of {i32, i32}, i64 store --> two i32 stores.
22167 /// For pair of {i32, i16}, i64 store --> two i32 stores.
22168 /// For pair of {i16, i16}, i32 store --> two i16 stores.
22169 /// For pair of {i16, i8},  i32 store --> two i16 stores.
22170 /// For pair of {i8, i8},   i16 store --> two i8 stores.
22171 ///
22172 /// We allow each target to determine specifically which kind of splitting is
22173 /// supported.
22174 ///
22175 /// The store patterns are commonly seen from the simple code snippet below
22176 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
22177 ///   void goo(const std::pair<int, float> &);
22178 ///   hoo() {
22179 ///     ...
22180 ///     goo(std::make_pair(tmp, ftmp));
22181 ///     ...
22182 ///   }
22183 ///
22184 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
22185   if (OptLevel == CodeGenOptLevel::None)
22186     return SDValue();
22187 
22188   // Can't change the number of memory accesses for a volatile store or break
22189   // atomicity for an atomic one.
22190   if (!ST->isSimple())
22191     return SDValue();
22192 
22193   SDValue Val = ST->getValue();
22194   SDLoc DL(ST);
22195 
22196   // Match OR operand.
22197   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
22198     return SDValue();
22199 
22200   // Match SHL operand and get Lower and Higher parts of Val.
22201   SDValue Op1 = Val.getOperand(0);
22202   SDValue Op2 = Val.getOperand(1);
22203   SDValue Lo, Hi;
22204   if (Op1.getOpcode() != ISD::SHL) {
22205     std::swap(Op1, Op2);
22206     if (Op1.getOpcode() != ISD::SHL)
22207       return SDValue();
22208   }
22209   Lo = Op2;
22210   Hi = Op1.getOperand(0);
22211   if (!Op1.hasOneUse())
22212     return SDValue();
22213 
22214   // Match shift amount to HalfValBitSize.
22215   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
22216   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
22217   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
22218     return SDValue();
22219 
22220   // Lo and Hi are zero-extended from int with size less equal than 32
22221   // to i64.
22222   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
22223       !Lo.getOperand(0).getValueType().isScalarInteger() ||
22224       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
22225       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
22226       !Hi.getOperand(0).getValueType().isScalarInteger() ||
22227       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
22228     return SDValue();
22229 
22230   // Use the EVT of low and high parts before bitcast as the input
22231   // of target query.
22232   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
22233                   ? Lo.getOperand(0).getValueType()
22234                   : Lo.getValueType();
22235   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
22236                    ? Hi.getOperand(0).getValueType()
22237                    : Hi.getValueType();
22238   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
22239     return SDValue();
22240 
22241   // Start to split store.
22242   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22243   AAMDNodes AAInfo = ST->getAAInfo();
22244 
22245   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
22246   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
22247   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
22248   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
22249 
22250   SDValue Chain = ST->getChain();
22251   SDValue Ptr = ST->getBasePtr();
22252   // Lower value store.
22253   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22254                              ST->getOriginalAlign(), MMOFlags, AAInfo);
22255   Ptr =
22256       DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
22257   // Higher value store.
22258   SDValue St1 = DAG.getStore(
22259       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
22260       ST->getOriginalAlign(), MMOFlags, AAInfo);
22261   return St1;
22262 }
22263 
22264 // Merge an insertion into an existing shuffle:
22265 // (insert_vector_elt (vector_shuffle X, Y, Mask),
22266 //                   .(extract_vector_elt X, N), InsIndex)
22267 //   --> (vector_shuffle X, Y, NewMask)
22268 //  and variations where shuffle operands may be CONCAT_VECTORS.
22269 static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask,
22270                                 SmallVectorImpl<int> &NewMask, SDValue Elt,
22271                                 unsigned InsIndex) {
22272   if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
22273       !isa<ConstantSDNode>(Elt.getOperand(1)))
22274     return false;
22275 
22276   // Vec's operand 0 is using indices from 0 to N-1 and
22277   // operand 1 from N to 2N - 1, where N is the number of
22278   // elements in the vectors.
22279   SDValue InsertVal0 = Elt.getOperand(0);
22280   int ElementOffset = -1;
22281 
22282   // We explore the inputs of the shuffle in order to see if we find the
22283   // source of the extract_vector_elt. If so, we can use it to modify the
22284   // shuffle rather than perform an insert_vector_elt.
22285   SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
22286   ArgWorkList.emplace_back(Mask.size(), Y);
22287   ArgWorkList.emplace_back(0, X);
22288 
22289   while (!ArgWorkList.empty()) {
22290     int ArgOffset;
22291     SDValue ArgVal;
22292     std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
22293 
22294     if (ArgVal == InsertVal0) {
22295       ElementOffset = ArgOffset;
22296       break;
22297     }
22298 
22299     // Peek through concat_vector.
22300     if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
22301       int CurrentArgOffset =
22302           ArgOffset + ArgVal.getValueType().getVectorNumElements();
22303       int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
22304       for (SDValue Op : reverse(ArgVal->ops())) {
22305         CurrentArgOffset -= Step;
22306         ArgWorkList.emplace_back(CurrentArgOffset, Op);
22307       }
22308 
22309       // Make sure we went through all the elements and did not screw up index
22310       // computation.
22311       assert(CurrentArgOffset == ArgOffset);
22312     }
22313   }
22314 
22315   // If we failed to find a match, see if we can replace an UNDEF shuffle
22316   // operand.
22317   if (ElementOffset == -1) {
22318     if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
22319       return false;
22320     ElementOffset = Mask.size();
22321     Y = InsertVal0;
22322   }
22323 
22324   NewMask.assign(Mask.begin(), Mask.end());
22325   NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
22326   assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
22327          "NewMask[InsIndex] is out of bound");
22328   return true;
22329 }
22330 
22331 // Merge an insertion into an existing shuffle:
22332 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
22333 // InsIndex)
22334 //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
22335 //   CONCAT_VECTORS.
22336 SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
22337   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22338          "Expected extract_vector_elt");
22339   SDValue InsertVal = N->getOperand(1);
22340   SDValue Vec = N->getOperand(0);
22341 
22342   auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
22343   if (!SVN || !Vec.hasOneUse())
22344     return SDValue();
22345 
22346   ArrayRef<int> Mask = SVN->getMask();
22347   SDValue X = Vec.getOperand(0);
22348   SDValue Y = Vec.getOperand(1);
22349 
22350   SmallVector<int, 16> NewMask(Mask);
22351   if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
22352     SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
22353         Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
22354     if (LegalShuffle)
22355       return LegalShuffle;
22356   }
22357 
22358   return SDValue();
22359 }
22360 
22361 // Convert a disguised subvector insertion into a shuffle:
22362 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
22363 // bitcast(shuffle (bitcast V), (extended X), Mask)
22364 // Note: We do not use an insert_subvector node because that requires a
22365 // legal subvector type.
22366 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
22367   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22368          "Expected extract_vector_elt");
22369   SDValue InsertVal = N->getOperand(1);
22370 
22371   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
22372       !InsertVal.getOperand(0).getValueType().isVector())
22373     return SDValue();
22374 
22375   SDValue SubVec = InsertVal.getOperand(0);
22376   SDValue DestVec = N->getOperand(0);
22377   EVT SubVecVT = SubVec.getValueType();
22378   EVT VT = DestVec.getValueType();
22379   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
22380   // If the source only has a single vector element, the cost of creating adding
22381   // it to a vector is likely to exceed the cost of a insert_vector_elt.
22382   if (NumSrcElts == 1)
22383     return SDValue();
22384   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
22385   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
22386 
22387   // Step 1: Create a shuffle mask that implements this insert operation. The
22388   // vector that we are inserting into will be operand 0 of the shuffle, so
22389   // those elements are just 'i'. The inserted subvector is in the first
22390   // positions of operand 1 of the shuffle. Example:
22391   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
22392   SmallVector<int, 16> Mask(NumMaskVals);
22393   for (unsigned i = 0; i != NumMaskVals; ++i) {
22394     if (i / NumSrcElts == InsIndex)
22395       Mask[i] = (i % NumSrcElts) + NumMaskVals;
22396     else
22397       Mask[i] = i;
22398   }
22399 
22400   // Bail out if the target can not handle the shuffle we want to create.
22401   EVT SubVecEltVT = SubVecVT.getVectorElementType();
22402   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
22403   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
22404     return SDValue();
22405 
22406   // Step 2: Create a wide vector from the inserted source vector by appending
22407   // undefined elements. This is the same size as our destination vector.
22408   SDLoc DL(N);
22409   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
22410   ConcatOps[0] = SubVec;
22411   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
22412 
22413   // Step 3: Shuffle in the padded subvector.
22414   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
22415   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
22416   AddToWorklist(PaddedSubV.getNode());
22417   AddToWorklist(DestVecBC.getNode());
22418   AddToWorklist(Shuf.getNode());
22419   return DAG.getBitcast(VT, Shuf);
22420 }
22421 
22422 // Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
22423 // possible and the new load will be quick. We use more loads but less shuffles
22424 // and inserts.
22425 SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
22426   EVT VT = N->getValueType(0);
22427 
22428   // InsIndex is expected to be the first of last lane.
22429   if (!VT.isFixedLengthVector() ||
22430       (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
22431     return SDValue();
22432 
22433   // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
22434   // depending on the InsIndex.
22435   auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
22436   SDValue Scalar = N->getOperand(1);
22437   if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
22438         return InsIndex == P.index() || P.value() < 0 ||
22439                (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
22440                (InsIndex == VT.getVectorNumElements() - 1 &&
22441                 P.value() == (int)P.index() + 1);
22442       }))
22443     return SDValue();
22444 
22445   // We optionally skip over an extend so long as both loads are extended in the
22446   // same way from the same type.
22447   unsigned Extend = 0;
22448   if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
22449       Scalar.getOpcode() == ISD::SIGN_EXTEND ||
22450       Scalar.getOpcode() == ISD::ANY_EXTEND) {
22451     Extend = Scalar.getOpcode();
22452     Scalar = Scalar.getOperand(0);
22453   }
22454 
22455   auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
22456   if (!ScalarLoad)
22457     return SDValue();
22458 
22459   SDValue Vec = Shuffle->getOperand(0);
22460   if (Extend) {
22461     if (Vec.getOpcode() != Extend)
22462       return SDValue();
22463     Vec = Vec.getOperand(0);
22464   }
22465   auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
22466   if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
22467     return SDValue();
22468 
22469   int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
22470   if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
22471       !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22472       ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22473       ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
22474     return SDValue();
22475 
22476   // Check that the offset between the pointers to produce a single continuous
22477   // load.
22478   if (InsIndex == 0) {
22479     if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
22480                                             -1))
22481       return SDValue();
22482   } else {
22483     if (!DAG.areNonVolatileConsecutiveLoads(
22484             VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
22485       return SDValue();
22486   }
22487 
22488   // And that the new unaligned load will be fast.
22489   unsigned IsFast = 0;
22490   Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
22491   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22492                               Vec.getValueType(), VecLoad->getAddressSpace(),
22493                               NewAlign, VecLoad->getMemOperand()->getFlags(),
22494                               &IsFast) ||
22495       !IsFast)
22496     return SDValue();
22497 
22498   // Calculate the new Ptr and create the new load.
22499   SDLoc DL(N);
22500   SDValue Ptr = ScalarLoad->getBasePtr();
22501   if (InsIndex != 0)
22502     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
22503                       DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
22504   MachinePointerInfo PtrInfo =
22505       InsIndex == 0 ? ScalarLoad->getPointerInfo()
22506                     : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
22507 
22508   SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
22509                              ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
22510   DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
22511   DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
22512   return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
22513 }
22514 
22515 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
22516   SDValue InVec = N->getOperand(0);
22517   SDValue InVal = N->getOperand(1);
22518   SDValue EltNo = N->getOperand(2);
22519   SDLoc DL(N);
22520 
22521   EVT VT = InVec.getValueType();
22522   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
22523 
22524   // Insert into out-of-bounds element is undefined.
22525   if (IndexC && VT.isFixedLengthVector() &&
22526       IndexC->getZExtValue() >= VT.getVectorNumElements())
22527     return DAG.getUNDEF(VT);
22528 
22529   // Remove redundant insertions:
22530   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
22531   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22532       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
22533     return InVec;
22534 
22535   if (!IndexC) {
22536     // If this is variable insert to undef vector, it might be better to splat:
22537     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
22538     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
22539       return DAG.getSplat(VT, DL, InVal);
22540     return SDValue();
22541   }
22542 
22543   if (VT.isScalableVector())
22544     return SDValue();
22545 
22546   unsigned NumElts = VT.getVectorNumElements();
22547 
22548   // We must know which element is being inserted for folds below here.
22549   unsigned Elt = IndexC->getZExtValue();
22550 
22551   // Handle <1 x ???> vector insertion special cases.
22552   if (NumElts == 1) {
22553     // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
22554     if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22555         InVal.getOperand(0).getValueType() == VT &&
22556         isNullConstant(InVal.getOperand(1)))
22557       return InVal.getOperand(0);
22558   }
22559 
22560   // Canonicalize insert_vector_elt dag nodes.
22561   // Example:
22562   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
22563   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
22564   //
22565   // Do this only if the child insert_vector node has one use; also
22566   // do this only if indices are both constants and Idx1 < Idx0.
22567   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
22568       && isa<ConstantSDNode>(InVec.getOperand(2))) {
22569     unsigned OtherElt = InVec.getConstantOperandVal(2);
22570     if (Elt < OtherElt) {
22571       // Swap nodes.
22572       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
22573                                   InVec.getOperand(0), InVal, EltNo);
22574       AddToWorklist(NewOp.getNode());
22575       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
22576                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
22577     }
22578   }
22579 
22580   if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
22581     return Shuf;
22582 
22583   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
22584     return Shuf;
22585 
22586   if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
22587     return Shuf;
22588 
22589   // Attempt to convert an insert_vector_elt chain into a legal build_vector.
22590   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
22591     // vXi1 vector - we don't need to recurse.
22592     if (NumElts == 1)
22593       return DAG.getBuildVector(VT, DL, {InVal});
22594 
22595     // If we haven't already collected the element, insert into the op list.
22596     EVT MaxEltVT = InVal.getValueType();
22597     auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
22598                                 unsigned Idx) {
22599       if (!Ops[Idx]) {
22600         Ops[Idx] = Elt;
22601         if (VT.isInteger()) {
22602           EVT EltVT = Elt.getValueType();
22603           MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
22604         }
22605       }
22606     };
22607 
22608     // Ensure all the operands are the same value type, fill any missing
22609     // operands with UNDEF and create the BUILD_VECTOR.
22610     auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
22611       assert(Ops.size() == NumElts && "Unexpected vector size");
22612       for (SDValue &Op : Ops) {
22613         if (Op)
22614           Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
22615         else
22616           Op = DAG.getUNDEF(MaxEltVT);
22617       }
22618       return DAG.getBuildVector(VT, DL, Ops);
22619     };
22620 
22621     SmallVector<SDValue, 8> Ops(NumElts, SDValue());
22622     Ops[Elt] = InVal;
22623 
22624     // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
22625     for (SDValue CurVec = InVec; CurVec;) {
22626       // UNDEF - build new BUILD_VECTOR from already inserted operands.
22627       if (CurVec.isUndef())
22628         return CanonicalizeBuildVector(Ops);
22629 
22630       // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
22631       if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
22632         for (unsigned I = 0; I != NumElts; ++I)
22633           AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
22634         return CanonicalizeBuildVector(Ops);
22635       }
22636 
22637       // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
22638       if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
22639         AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
22640         return CanonicalizeBuildVector(Ops);
22641       }
22642 
22643       // INSERT_VECTOR_ELT - insert operand and continue up the chain.
22644       if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
22645         if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
22646           if (CurIdx->getAPIntValue().ult(NumElts)) {
22647             unsigned Idx = CurIdx->getZExtValue();
22648             AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
22649 
22650             // Found entire BUILD_VECTOR.
22651             if (all_of(Ops, [](SDValue Op) { return !!Op; }))
22652               return CanonicalizeBuildVector(Ops);
22653 
22654             CurVec = CurVec->getOperand(0);
22655             continue;
22656           }
22657 
22658       // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
22659       // update the shuffle mask (and second operand if we started with unary
22660       // shuffle) and create a new legal shuffle.
22661       if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
22662         auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
22663         SDValue LHS = SVN->getOperand(0);
22664         SDValue RHS = SVN->getOperand(1);
22665         SmallVector<int, 16> Mask(SVN->getMask());
22666         bool Merged = true;
22667         for (auto I : enumerate(Ops)) {
22668           SDValue &Op = I.value();
22669           if (Op) {
22670             SmallVector<int, 16> NewMask;
22671             if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
22672               Merged = false;
22673               break;
22674             }
22675             Mask = std::move(NewMask);
22676           }
22677         }
22678         if (Merged)
22679           if (SDValue NewShuffle =
22680                   TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
22681             return NewShuffle;
22682       }
22683 
22684       // If all insertions are zero value, try to convert to AND mask.
22685       // TODO: Do this for -1 with OR mask?
22686       if (!LegalOperations && llvm::isNullConstant(InVal) &&
22687           all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
22688           count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
22689         SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
22690         SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
22691         SmallVector<SDValue, 8> Mask(NumElts);
22692         for (unsigned I = 0; I != NumElts; ++I)
22693           Mask[I] = Ops[I] ? Zero : AllOnes;
22694         return DAG.getNode(ISD::AND, DL, VT, CurVec,
22695                            DAG.getBuildVector(VT, DL, Mask));
22696       }
22697 
22698       // Failed to find a match in the chain - bail.
22699       break;
22700     }
22701 
22702     // See if we can fill in the missing constant elements as zeros.
22703     // TODO: Should we do this for any constant?
22704     APInt DemandedZeroElts = APInt::getZero(NumElts);
22705     for (unsigned I = 0; I != NumElts; ++I)
22706       if (!Ops[I])
22707         DemandedZeroElts.setBit(I);
22708 
22709     if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
22710       SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
22711                                     : DAG.getConstantFP(0, DL, MaxEltVT);
22712       for (unsigned I = 0; I != NumElts; ++I)
22713         if (!Ops[I])
22714           Ops[I] = Zero;
22715 
22716       return CanonicalizeBuildVector(Ops);
22717     }
22718   }
22719 
22720   return SDValue();
22721 }
22722 
22723 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22724                                                   SDValue EltNo,
22725                                                   LoadSDNode *OriginalLoad) {
22726   assert(OriginalLoad->isSimple());
22727 
22728   EVT ResultVT = EVE->getValueType(0);
22729   EVT VecEltVT = InVecVT.getVectorElementType();
22730 
22731   // If the vector element type is not a multiple of a byte then we are unable
22732   // to correctly compute an address to load only the extracted element as a
22733   // scalar.
22734   if (!VecEltVT.isByteSized())
22735     return SDValue();
22736 
22737   ISD::LoadExtType ExtTy =
22738       ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
22739   if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22740       !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22741     return SDValue();
22742 
22743   Align Alignment = OriginalLoad->getAlign();
22744   MachinePointerInfo MPI;
22745   SDLoc DL(EVE);
22746   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22747     int Elt = ConstEltNo->getZExtValue();
22748     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22749     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22750     Alignment = commonAlignment(Alignment, PtrOff);
22751   } else {
22752     // Discard the pointer info except the address space because the memory
22753     // operand can't represent this new access since the offset is variable.
22754     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22755     Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22756   }
22757 
22758   unsigned IsFast = 0;
22759   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22760                               OriginalLoad->getAddressSpace(), Alignment,
22761                               OriginalLoad->getMemOperand()->getFlags(),
22762                               &IsFast) ||
22763       !IsFast)
22764     return SDValue();
22765 
22766   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22767                                                InVecVT, EltNo);
22768 
22769   // We are replacing a vector load with a scalar load. The new load must have
22770   // identical memory op ordering to the original.
22771   SDValue Load;
22772   if (ResultVT.bitsGT(VecEltVT)) {
22773     // If the result type of vextract is wider than the load, then issue an
22774     // extending load instead.
22775     ISD::LoadExtType ExtType =
22776         TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22777                                                               : ISD::EXTLOAD;
22778     Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22779                           NewPtr, MPI, VecEltVT, Alignment,
22780                           OriginalLoad->getMemOperand()->getFlags(),
22781                           OriginalLoad->getAAInfo());
22782     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22783   } else {
22784     // The result type is narrower or the same width as the vector element
22785     Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22786                        Alignment, OriginalLoad->getMemOperand()->getFlags(),
22787                        OriginalLoad->getAAInfo());
22788     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22789     if (ResultVT.bitsLT(VecEltVT))
22790       Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22791     else
22792       Load = DAG.getBitcast(ResultVT, Load);
22793   }
22794   ++OpsNarrowed;
22795   return Load;
22796 }
22797 
22798 /// Transform a vector binary operation into a scalar binary operation by moving
22799 /// the math/logic after an extract element of a vector.
22800 static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
22801                                        const SDLoc &DL, bool LegalTypes) {
22802   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22803   SDValue Vec = ExtElt->getOperand(0);
22804   SDValue Index = ExtElt->getOperand(1);
22805   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22806   unsigned Opc = Vec.getOpcode();
22807   if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
22808       Vec->getNumValues() != 1)
22809     return SDValue();
22810 
22811   // Targets may want to avoid this to prevent an expensive register transfer.
22812   if (!TLI.shouldScalarizeBinop(Vec))
22813     return SDValue();
22814 
22815   EVT ResVT = ExtElt->getValueType(0);
22816   if (Opc == ISD::SETCC &&
22817       (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
22818     return SDValue();
22819 
22820   // Extracting an element of a vector constant is constant-folded, so this
22821   // transform is just replacing a vector op with a scalar op while moving the
22822   // extract.
22823   SDValue Op0 = Vec.getOperand(0);
22824   SDValue Op1 = Vec.getOperand(1);
22825   APInt SplatVal;
22826   if (!isAnyConstantBuildVector(Op0, true) &&
22827       !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
22828       !isAnyConstantBuildVector(Op1, true) &&
22829       !ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
22830     return SDValue();
22831 
22832   // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
22833   // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
22834   if (Opc == ISD::SETCC) {
22835     EVT OpVT = Op0.getValueType().getVectorElementType();
22836     Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
22837     Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
22838     SDValue NewVal = DAG.getSetCC(
22839         DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
22840     // We may need to sign- or zero-extend the result to match the same
22841     // behaviour as the vector version of SETCC.
22842     unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
22843     if (ResVT != MVT::i1 &&
22844         VecBoolContents != TargetLowering::UndefinedBooleanContent &&
22845         VecBoolContents != TLI.getBooleanContents(ResVT)) {
22846       if (VecBoolContents == TargetLowering::ZeroOrNegativeOneBooleanContent)
22847         NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
22848                              DAG.getValueType(MVT::i1));
22849       else
22850         NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
22851     }
22852     return NewVal;
22853   }
22854   Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
22855   Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
22856   return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
22857 }
22858 
22859 // Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22860 // recursively analyse all of it's users. and try to model themselves as
22861 // bit sequence extractions. If all of them agree on the new, narrower element
22862 // type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22863 // new element type, do so now.
22864 // This is mainly useful to recover from legalization that scalarized
22865 // the vector as wide elements, but tries to rebuild it with narrower elements.
22866 //
22867 // Some more nodes could be modelled if that helps cover interesting patterns.
22868 bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22869     SDNode *N) {
22870   // We perform this optimization post type-legalization because
22871   // the type-legalizer often scalarizes integer-promoted vectors.
22872   // Performing this optimization before may cause legalizaton cycles.
22873   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22874     return false;
22875 
22876   // TODO: Add support for big-endian.
22877   if (DAG.getDataLayout().isBigEndian())
22878     return false;
22879 
22880   SDValue VecOp = N->getOperand(0);
22881   EVT VecVT = VecOp.getValueType();
22882   assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22883 
22884   // We must start with a constant extraction index.
22885   auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22886   if (!IndexC)
22887     return false;
22888 
22889   assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22890          "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22891 
22892   // TODO: deal with the case of implicit anyext of the extraction.
22893   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22894   EVT ScalarVT = N->getValueType(0);
22895   if (VecVT.getScalarType() != ScalarVT)
22896     return false;
22897 
22898   // TODO: deal with the cases other than everything being integer-typed.
22899   if (!ScalarVT.isScalarInteger())
22900     return false;
22901 
22902   struct Entry {
22903     SDNode *Producer;
22904 
22905     // Which bits of VecOp does it contain?
22906     unsigned BitPos;
22907     int NumBits;
22908     // NOTE: the actual width of \p Producer may be wider than NumBits!
22909 
22910     Entry(Entry &&) = default;
22911     Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22912         : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22913 
22914     Entry() = delete;
22915     Entry(const Entry &) = delete;
22916     Entry &operator=(const Entry &) = delete;
22917     Entry &operator=(Entry &&) = delete;
22918   };
22919   SmallVector<Entry, 32> Worklist;
22920   SmallVector<Entry, 32> Leafs;
22921 
22922   // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22923   Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22924                         /*NumBits=*/VecEltBitWidth);
22925 
22926   while (!Worklist.empty()) {
22927     Entry E = Worklist.pop_back_val();
22928     // Does the node not even use any of the VecOp bits?
22929     if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22930           E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22931       return false; // Let's allow the other combines clean this up first.
22932     // Did we fail to model any of the users of the Producer?
22933     bool ProducerIsLeaf = false;
22934     // Look at each user of this Producer.
22935     for (SDNode *User : E.Producer->users()) {
22936       switch (User->getOpcode()) {
22937       // TODO: support ISD::BITCAST
22938       // TODO: support ISD::ANY_EXTEND
22939       // TODO: support ISD::ZERO_EXTEND
22940       // TODO: support ISD::SIGN_EXTEND
22941       case ISD::TRUNCATE:
22942         // Truncation simply means we keep position, but extract less bits.
22943         Worklist.emplace_back(User, E.BitPos,
22944                               /*NumBits=*/User->getValueSizeInBits(0));
22945         break;
22946       // TODO: support ISD::SRA
22947       // TODO: support ISD::SHL
22948       case ISD::SRL:
22949         // We should be shifting the Producer by a constant amount.
22950         if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22951             User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22952           // Logical right-shift means that we start extraction later,
22953           // but stop it at the same position we did previously.
22954           unsigned ShAmt = ShAmtC->getZExtValue();
22955           Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22956           break;
22957         }
22958         [[fallthrough]];
22959       default:
22960         // We can not model this user of the Producer.
22961         // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22962         ProducerIsLeaf = true;
22963         // Profitability check: all users that we can not model
22964         //                      must be ISD::BUILD_VECTOR's.
22965         if (User->getOpcode() != ISD::BUILD_VECTOR)
22966           return false;
22967         break;
22968       }
22969     }
22970     if (ProducerIsLeaf)
22971       Leafs.emplace_back(std::move(E));
22972   }
22973 
22974   unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22975 
22976   // If we are still at the same element granularity, give up,
22977   if (NewVecEltBitWidth == VecEltBitWidth)
22978     return false;
22979 
22980   // The vector width must be a multiple of the new element width.
22981   if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22982     return false;
22983 
22984   // All leafs must agree on the new element width.
22985   // All leafs must not expect any "padding" bits ontop of that width.
22986   // All leafs must start extraction from multiple of that width.
22987   if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22988         return (unsigned)E.NumBits == NewVecEltBitWidth &&
22989                E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22990                E.BitPos % NewVecEltBitWidth == 0;
22991       }))
22992     return false;
22993 
22994   EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22995   EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22996                                   VecVT.getSizeInBits() / NewVecEltBitWidth);
22997 
22998   if (LegalTypes &&
22999       !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
23000     return false;
23001 
23002   if (LegalOperations &&
23003       !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
23004         TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, NewVecVT)))
23005     return false;
23006 
23007   SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
23008   for (const Entry &E : Leafs) {
23009     SDLoc DL(E.Producer);
23010     unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
23011     assert(NewIndex < NewVecVT.getVectorNumElements() &&
23012            "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
23013     SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
23014                             DAG.getVectorIdxConstant(NewIndex, DL));
23015     CombineTo(E.Producer, V);
23016   }
23017 
23018   return true;
23019 }
23020 
23021 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
23022   SDValue VecOp = N->getOperand(0);
23023   SDValue Index = N->getOperand(1);
23024   EVT ScalarVT = N->getValueType(0);
23025   EVT VecVT = VecOp.getValueType();
23026   if (VecOp.isUndef())
23027     return DAG.getUNDEF(ScalarVT);
23028 
23029   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
23030   //
23031   // This only really matters if the index is non-constant since other combines
23032   // on the constant elements already work.
23033   SDLoc DL(N);
23034   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
23035       Index == VecOp.getOperand(2)) {
23036     SDValue Elt = VecOp.getOperand(1);
23037     AddUsersToWorklist(VecOp.getNode());
23038     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
23039   }
23040 
23041   // (vextract (scalar_to_vector val, 0) -> val
23042   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
23043     // Only 0'th element of SCALAR_TO_VECTOR is defined.
23044     if (DAG.isKnownNeverZero(Index))
23045       return DAG.getUNDEF(ScalarVT);
23046 
23047     // Check if the result type doesn't match the inserted element type.
23048     // The inserted element and extracted element may have mismatched bitwidth.
23049     // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
23050     SDValue InOp = VecOp.getOperand(0);
23051     if (InOp.getValueType() != ScalarVT) {
23052       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23053       if (InOp.getValueType().bitsGT(ScalarVT))
23054         return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
23055       return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
23056     }
23057     return InOp;
23058   }
23059 
23060   // extract_vector_elt of out-of-bounds element -> UNDEF
23061   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23062   if (IndexC && VecVT.isFixedLengthVector() &&
23063       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
23064     return DAG.getUNDEF(ScalarVT);
23065 
23066   // extract_vector_elt (build_vector x, y), 1 -> y
23067   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
23068        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
23069       TLI.isTypeLegal(VecVT)) {
23070     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
23071             VecVT.isFixedLengthVector()) &&
23072            "BUILD_VECTOR used for scalable vectors");
23073     unsigned IndexVal =
23074         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
23075     SDValue Elt = VecOp.getOperand(IndexVal);
23076     EVT InEltVT = Elt.getValueType();
23077 
23078     if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
23079         isNullConstant(Elt)) {
23080       // Sometimes build_vector's scalar input types do not match result type.
23081       if (ScalarVT == InEltVT)
23082         return Elt;
23083 
23084       // TODO: It may be useful to truncate if free if the build_vector
23085       // implicitly converts.
23086     }
23087   }
23088 
23089   if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
23090     return BO;
23091 
23092   if (VecVT.isScalableVector())
23093     return SDValue();
23094 
23095   // All the code from this point onwards assumes fixed width vectors, but it's
23096   // possible that some of the combinations could be made to work for scalable
23097   // vectors too.
23098   unsigned NumElts = VecVT.getVectorNumElements();
23099   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23100 
23101   // See if the extracted element is constant, in which case fold it if its
23102   // a legal fp immediate.
23103   if (IndexC && ScalarVT.isFloatingPoint()) {
23104     APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
23105     KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
23106     if (KnownElt.isConstant()) {
23107       APFloat CstFP =
23108           APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
23109       if (TLI.isFPImmLegal(CstFP, ScalarVT))
23110         return DAG.getConstantFP(CstFP, DL, ScalarVT);
23111     }
23112   }
23113 
23114   // TODO: These transforms should not require the 'hasOneUse' restriction, but
23115   // there are regressions on multiple targets without it. We can end up with a
23116   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
23117   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
23118       VecOp.hasOneUse()) {
23119     // The vector index of the LSBs of the source depend on the endian-ness.
23120     bool IsLE = DAG.getDataLayout().isLittleEndian();
23121     unsigned ExtractIndex = IndexC->getZExtValue();
23122     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
23123     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
23124     SDValue BCSrc = VecOp.getOperand(0);
23125     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
23126       return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
23127 
23128     // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
23129     if (LegalTypes && BCSrc.getValueType().isInteger() &&
23130         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23131         BCSrc.getScalarValueSizeInBits() ==
23132             BCSrc.getOperand(0).getScalarValueSizeInBits()) {
23133       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
23134       // trunc i64 X to i32
23135       SDValue X = BCSrc.getOperand(0);
23136       EVT XVT = X.getValueType();
23137       assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
23138              "Extract element and scalar to vector can't change element type "
23139              "from FP to integer.");
23140       unsigned XBitWidth = X.getValueSizeInBits();
23141       unsigned Scale = XBitWidth / VecEltBitWidth;
23142       BCTruncElt = IsLE ? 0 : Scale - 1;
23143 
23144       // An extract element return value type can be wider than its vector
23145       // operand element type. In that case, the high bits are undefined, so
23146       // it's possible that we may need to extend rather than truncate.
23147       if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
23148         assert(XBitWidth % VecEltBitWidth == 0 &&
23149                "Scalar bitwidth must be a multiple of vector element bitwidth");
23150 
23151         if (ExtractIndex != BCTruncElt) {
23152           unsigned ShiftIndex =
23153               IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
23154           X = DAG.getNode(
23155               ISD::SRL, DL, XVT, X,
23156               DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
23157         }
23158 
23159         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
23160       }
23161     }
23162   }
23163 
23164   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
23165   // We only perform this optimization before the op legalization phase because
23166   // we may introduce new vector instructions which are not backed by TD
23167   // patterns. For example on AVX, extracting elements from a wide vector
23168   // without using extract_subvector. However, if we can find an underlying
23169   // scalar value, then we can always use that.
23170   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
23171     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
23172     // Find the new index to extract from.
23173     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
23174 
23175     // Extracting an undef index is undef.
23176     if (OrigElt == -1)
23177       return DAG.getUNDEF(ScalarVT);
23178 
23179     // Select the right vector half to extract from.
23180     SDValue SVInVec;
23181     if (OrigElt < (int)NumElts) {
23182       SVInVec = VecOp.getOperand(0);
23183     } else {
23184       SVInVec = VecOp.getOperand(1);
23185       OrigElt -= NumElts;
23186     }
23187 
23188     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
23189       SDValue InOp = SVInVec.getOperand(OrigElt);
23190       if (InOp.getValueType() != ScalarVT) {
23191         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23192         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
23193       }
23194 
23195       return InOp;
23196     }
23197 
23198     // FIXME: We should handle recursing on other vector shuffles and
23199     // scalar_to_vector here as well.
23200 
23201     if (!LegalOperations ||
23202         // FIXME: Should really be just isOperationLegalOrCustom.
23203         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
23204         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
23205       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
23206                          DAG.getVectorIdxConstant(OrigElt, DL));
23207     }
23208   }
23209 
23210   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
23211   // simplify it based on the (valid) extraction indices.
23212   if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
23213         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23214                Use->getOperand(0) == VecOp &&
23215                isa<ConstantSDNode>(Use->getOperand(1));
23216       })) {
23217     APInt DemandedElts = APInt::getZero(NumElts);
23218     for (SDNode *User : VecOp->users()) {
23219       auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
23220       if (CstElt->getAPIntValue().ult(NumElts))
23221         DemandedElts.setBit(CstElt->getZExtValue());
23222     }
23223     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
23224       // We simplified the vector operand of this extract element. If this
23225       // extract is not dead, visit it again so it is folded properly.
23226       if (N->getOpcode() != ISD::DELETED_NODE)
23227         AddToWorklist(N);
23228       return SDValue(N, 0);
23229     }
23230     APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
23231     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
23232       // We simplified the vector operand of this extract element. If this
23233       // extract is not dead, visit it again so it is folded properly.
23234       if (N->getOpcode() != ISD::DELETED_NODE)
23235         AddToWorklist(N);
23236       return SDValue(N, 0);
23237     }
23238   }
23239 
23240   if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
23241     return SDValue(N, 0);
23242 
23243   // Everything under here is trying to match an extract of a loaded value.
23244   // If the result of load has to be truncated, then it's not necessarily
23245   // profitable.
23246   bool BCNumEltsChanged = false;
23247   EVT ExtVT = VecVT.getVectorElementType();
23248   EVT LVT = ExtVT;
23249   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
23250     return SDValue();
23251 
23252   if (VecOp.getOpcode() == ISD::BITCAST) {
23253     // Don't duplicate a load with other uses.
23254     if (!VecOp.hasOneUse())
23255       return SDValue();
23256 
23257     EVT BCVT = VecOp.getOperand(0).getValueType();
23258     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
23259       return SDValue();
23260     if (NumElts != BCVT.getVectorNumElements())
23261       BCNumEltsChanged = true;
23262     VecOp = VecOp.getOperand(0);
23263     ExtVT = BCVT.getVectorElementType();
23264   }
23265 
23266   // extract (vector load $addr), i --> load $addr + i * size
23267   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
23268       ISD::isNormalLoad(VecOp.getNode()) &&
23269       !Index->hasPredecessor(VecOp.getNode())) {
23270     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
23271     if (VecLoad && VecLoad->isSimple())
23272       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
23273   }
23274 
23275   // Perform only after legalization to ensure build_vector / vector_shuffle
23276   // optimizations have already been done.
23277   if (!LegalOperations || !IndexC)
23278     return SDValue();
23279 
23280   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
23281   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
23282   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
23283   int Elt = IndexC->getZExtValue();
23284   LoadSDNode *LN0 = nullptr;
23285   if (ISD::isNormalLoad(VecOp.getNode())) {
23286     LN0 = cast<LoadSDNode>(VecOp);
23287   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23288              VecOp.getOperand(0).getValueType() == ExtVT &&
23289              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
23290     // Don't duplicate a load with other uses.
23291     if (!VecOp.hasOneUse())
23292       return SDValue();
23293 
23294     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
23295   }
23296   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
23297     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
23298     // =>
23299     // (load $addr+1*size)
23300 
23301     // Don't duplicate a load with other uses.
23302     if (!VecOp.hasOneUse())
23303       return SDValue();
23304 
23305     // If the bit convert changed the number of elements, it is unsafe
23306     // to examine the mask.
23307     if (BCNumEltsChanged)
23308       return SDValue();
23309 
23310     // Select the input vector, guarding against out of range extract vector.
23311     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
23312     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
23313 
23314     if (VecOp.getOpcode() == ISD::BITCAST) {
23315       // Don't duplicate a load with other uses.
23316       if (!VecOp.hasOneUse())
23317         return SDValue();
23318 
23319       VecOp = VecOp.getOperand(0);
23320     }
23321     if (ISD::isNormalLoad(VecOp.getNode())) {
23322       LN0 = cast<LoadSDNode>(VecOp);
23323       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
23324       Index = DAG.getConstant(Elt, DL, Index.getValueType());
23325     }
23326   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
23327              VecVT.getVectorElementType() == ScalarVT &&
23328              (!LegalTypes ||
23329               TLI.isTypeLegal(
23330                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
23331     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
23332     //      -> extract_vector_elt a, 0
23333     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
23334     //      -> extract_vector_elt a, 1
23335     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
23336     //      -> extract_vector_elt b, 0
23337     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
23338     //      -> extract_vector_elt b, 1
23339     EVT ConcatVT = VecOp.getOperand(0).getValueType();
23340     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
23341     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
23342                                      Index.getValueType());
23343 
23344     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
23345     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
23346                               ConcatVT.getVectorElementType(),
23347                               ConcatOp, NewIdx);
23348     return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
23349   }
23350 
23351   // Make sure we found a non-volatile load and the extractelement is
23352   // the only use.
23353   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
23354     return SDValue();
23355 
23356   // If Idx was -1 above, Elt is going to be -1, so just return undef.
23357   if (Elt == -1)
23358     return DAG.getUNDEF(LVT);
23359 
23360   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
23361 }
23362 
23363 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
23364 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
23365   // We perform this optimization post type-legalization because
23366   // the type-legalizer often scalarizes integer-promoted vectors.
23367   // Performing this optimization before may create bit-casts which
23368   // will be type-legalized to complex code sequences.
23369   // We perform this optimization only before the operation legalizer because we
23370   // may introduce illegal operations.
23371   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
23372     return SDValue();
23373 
23374   unsigned NumInScalars = N->getNumOperands();
23375   SDLoc DL(N);
23376   EVT VT = N->getValueType(0);
23377 
23378   // Check to see if this is a BUILD_VECTOR of a bunch of values
23379   // which come from any_extend or zero_extend nodes. If so, we can create
23380   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
23381   // optimizations. We do not handle sign-extend because we can't fill the sign
23382   // using shuffles.
23383   EVT SourceType = MVT::Other;
23384   bool AllAnyExt = true;
23385 
23386   for (unsigned i = 0; i != NumInScalars; ++i) {
23387     SDValue In = N->getOperand(i);
23388     // Ignore undef inputs.
23389     if (In.isUndef()) continue;
23390 
23391     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
23392     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
23393 
23394     // Abort if the element is not an extension.
23395     if (!ZeroExt && !AnyExt) {
23396       SourceType = MVT::Other;
23397       break;
23398     }
23399 
23400     // The input is a ZeroExt or AnyExt. Check the original type.
23401     EVT InTy = In.getOperand(0).getValueType();
23402 
23403     // Check that all of the widened source types are the same.
23404     if (SourceType == MVT::Other)
23405       // First time.
23406       SourceType = InTy;
23407     else if (InTy != SourceType) {
23408       // Multiple income types. Abort.
23409       SourceType = MVT::Other;
23410       break;
23411     }
23412 
23413     // Check if all of the extends are ANY_EXTENDs.
23414     AllAnyExt &= AnyExt;
23415   }
23416 
23417   // In order to have valid types, all of the inputs must be extended from the
23418   // same source type and all of the inputs must be any or zero extend.
23419   // Scalar sizes must be a power of two.
23420   EVT OutScalarTy = VT.getScalarType();
23421   bool ValidTypes =
23422       SourceType != MVT::Other &&
23423       llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
23424       llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
23425 
23426   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
23427   // turn into a single shuffle instruction.
23428   if (!ValidTypes)
23429     return SDValue();
23430 
23431   // If we already have a splat buildvector, then don't fold it if it means
23432   // introducing zeros.
23433   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
23434     return SDValue();
23435 
23436   bool isLE = DAG.getDataLayout().isLittleEndian();
23437   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
23438   assert(ElemRatio > 1 && "Invalid element size ratio");
23439   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
23440                                DAG.getConstant(0, DL, SourceType);
23441 
23442   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
23443   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
23444 
23445   // Populate the new build_vector
23446   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
23447     SDValue Cast = N->getOperand(i);
23448     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
23449             Cast.getOpcode() == ISD::ZERO_EXTEND ||
23450             Cast.isUndef()) && "Invalid cast opcode");
23451     SDValue In;
23452     if (Cast.isUndef())
23453       In = DAG.getUNDEF(SourceType);
23454     else
23455       In = Cast->getOperand(0);
23456     unsigned Index = isLE ? (i * ElemRatio) :
23457                             (i * ElemRatio + (ElemRatio - 1));
23458 
23459     assert(Index < Ops.size() && "Invalid index");
23460     Ops[Index] = In;
23461   }
23462 
23463   // The type of the new BUILD_VECTOR node.
23464   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
23465   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
23466          "Invalid vector size");
23467   // Check if the new vector type is legal.
23468   if (!isTypeLegal(VecVT) ||
23469       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
23470        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
23471     return SDValue();
23472 
23473   // Make the new BUILD_VECTOR.
23474   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
23475 
23476   // The new BUILD_VECTOR node has the potential to be further optimized.
23477   AddToWorklist(BV.getNode());
23478   // Bitcast to the desired type.
23479   return DAG.getBitcast(VT, BV);
23480 }
23481 
23482 // Simplify (build_vec (trunc $1)
23483 //                     (trunc (srl $1 half-width))
23484 //                     (trunc (srl $1 (2 * half-width))))
23485 // to (bitcast $1)
23486 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
23487   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23488 
23489   EVT VT = N->getValueType(0);
23490 
23491   // Don't run this before LegalizeTypes if VT is legal.
23492   // Targets may have other preferences.
23493   if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
23494     return SDValue();
23495 
23496   // Only for little endian
23497   if (!DAG.getDataLayout().isLittleEndian())
23498     return SDValue();
23499 
23500   SDLoc DL(N);
23501   EVT OutScalarTy = VT.getScalarType();
23502   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
23503 
23504   // Only for power of two types to be sure that bitcast works well
23505   if (!isPowerOf2_64(ScalarTypeBitsize))
23506     return SDValue();
23507 
23508   unsigned NumInScalars = N->getNumOperands();
23509 
23510   // Look through bitcasts
23511   auto PeekThroughBitcast = [](SDValue Op) {
23512     if (Op.getOpcode() == ISD::BITCAST)
23513       return Op.getOperand(0);
23514     return Op;
23515   };
23516 
23517   // The source value where all the parts are extracted.
23518   SDValue Src;
23519   for (unsigned i = 0; i != NumInScalars; ++i) {
23520     SDValue In = PeekThroughBitcast(N->getOperand(i));
23521     // Ignore undef inputs.
23522     if (In.isUndef()) continue;
23523 
23524     if (In.getOpcode() != ISD::TRUNCATE)
23525       return SDValue();
23526 
23527     In = PeekThroughBitcast(In.getOperand(0));
23528 
23529     if (In.getOpcode() != ISD::SRL) {
23530       // For now only build_vec without shuffling, handle shifts here in the
23531       // future.
23532       if (i != 0)
23533         return SDValue();
23534 
23535       Src = In;
23536     } else {
23537       // In is SRL
23538       SDValue part = PeekThroughBitcast(In.getOperand(0));
23539 
23540       if (!Src) {
23541         Src = part;
23542       } else if (Src != part) {
23543         // Vector parts do not stem from the same variable
23544         return SDValue();
23545       }
23546 
23547       SDValue ShiftAmtVal = In.getOperand(1);
23548       if (!isa<ConstantSDNode>(ShiftAmtVal))
23549         return SDValue();
23550 
23551       uint64_t ShiftAmt = In.getConstantOperandVal(1);
23552 
23553       // The extracted value is not extracted at the right position
23554       if (ShiftAmt != i * ScalarTypeBitsize)
23555         return SDValue();
23556     }
23557   }
23558 
23559   // Only cast if the size is the same
23560   if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
23561     return SDValue();
23562 
23563   return DAG.getBitcast(VT, Src);
23564 }
23565 
23566 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
23567                                            ArrayRef<int> VectorMask,
23568                                            SDValue VecIn1, SDValue VecIn2,
23569                                            unsigned LeftIdx, bool DidSplitVec) {
23570   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
23571 
23572   EVT VT = N->getValueType(0);
23573   EVT InVT1 = VecIn1.getValueType();
23574   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
23575 
23576   unsigned NumElems = VT.getVectorNumElements();
23577   unsigned ShuffleNumElems = NumElems;
23578 
23579   // If we artificially split a vector in two already, then the offsets in the
23580   // operands will all be based off of VecIn1, even those in VecIn2.
23581   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
23582 
23583   uint64_t VTSize = VT.getFixedSizeInBits();
23584   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
23585   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
23586 
23587   assert(InVT2Size <= InVT1Size &&
23588          "Inputs must be sorted to be in non-increasing vector size order.");
23589 
23590   // We can't generate a shuffle node with mismatched input and output types.
23591   // Try to make the types match the type of the output.
23592   if (InVT1 != VT || InVT2 != VT) {
23593     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
23594       // If the output vector length is a multiple of both input lengths,
23595       // we can concatenate them and pad the rest with undefs.
23596       unsigned NumConcats = VTSize / InVT1Size;
23597       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
23598       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
23599       ConcatOps[0] = VecIn1;
23600       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
23601       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23602       VecIn2 = SDValue();
23603     } else if (InVT1Size == VTSize * 2) {
23604       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
23605         return SDValue();
23606 
23607       if (!VecIn2.getNode()) {
23608         // If we only have one input vector, and it's twice the size of the
23609         // output, split it in two.
23610         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
23611                              DAG.getVectorIdxConstant(NumElems, DL));
23612         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
23613         // Since we now have shorter input vectors, adjust the offset of the
23614         // second vector's start.
23615         Vec2Offset = NumElems;
23616       } else {
23617         assert(InVT2Size <= InVT1Size &&
23618                "Second input is not going to be larger than the first one.");
23619 
23620         // VecIn1 is wider than the output, and we have another, possibly
23621         // smaller input. Pad the smaller input with undefs, shuffle at the
23622         // input vector width, and extract the output.
23623         // The shuffle type is different than VT, so check legality again.
23624         if (LegalOperations &&
23625             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
23626           return SDValue();
23627 
23628         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
23629         // lower it back into a BUILD_VECTOR. So if the inserted type is
23630         // illegal, don't even try.
23631         if (InVT1 != InVT2) {
23632           if (!TLI.isTypeLegal(InVT2))
23633             return SDValue();
23634           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23635                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23636         }
23637         ShuffleNumElems = NumElems * 2;
23638       }
23639     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
23640       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
23641       ConcatOps[0] = VecIn2;
23642       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23643     } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
23644       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
23645           !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
23646         return SDValue();
23647       // If dest vector has less than two elements, then use shuffle and extract
23648       // from larger regs will cost even more.
23649       if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
23650         return SDValue();
23651       assert(InVT2Size <= InVT1Size &&
23652              "Second input is not going to be larger than the first one.");
23653 
23654       // VecIn1 is wider than the output, and we have another, possibly
23655       // smaller input. Pad the smaller input with undefs, shuffle at the
23656       // input vector width, and extract the output.
23657       // The shuffle type is different than VT, so check legality again.
23658       if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
23659         return SDValue();
23660 
23661       if (InVT1 != InVT2) {
23662         VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23663                              DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23664       }
23665       ShuffleNumElems = InVT1Size / VTSize * NumElems;
23666     } else {
23667       // TODO: Support cases where the length mismatch isn't exactly by a
23668       // factor of 2.
23669       // TODO: Move this check upwards, so that if we have bad type
23670       // mismatches, we don't create any DAG nodes.
23671       return SDValue();
23672     }
23673   }
23674 
23675   // Initialize mask to undef.
23676   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
23677 
23678   // Only need to run up to the number of elements actually used, not the
23679   // total number of elements in the shuffle - if we are shuffling a wider
23680   // vector, the high lanes should be set to undef.
23681   for (unsigned i = 0; i != NumElems; ++i) {
23682     if (VectorMask[i] <= 0)
23683       continue;
23684 
23685     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
23686     if (VectorMask[i] == (int)LeftIdx) {
23687       Mask[i] = ExtIndex;
23688     } else if (VectorMask[i] == (int)LeftIdx + 1) {
23689       Mask[i] = Vec2Offset + ExtIndex;
23690     }
23691   }
23692 
23693   // The type the input vectors may have changed above.
23694   InVT1 = VecIn1.getValueType();
23695 
23696   // If we already have a VecIn2, it should have the same type as VecIn1.
23697   // If we don't, get an undef/zero vector of the appropriate type.
23698   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
23699   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
23700 
23701   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
23702   if (ShuffleNumElems > NumElems)
23703     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
23704 
23705   return Shuffle;
23706 }
23707 
23708 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
23709   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23710 
23711   // First, determine where the build vector is not undef.
23712   // TODO: We could extend this to handle zero elements as well as undefs.
23713   int NumBVOps = BV->getNumOperands();
23714   int ZextElt = -1;
23715   for (int i = 0; i != NumBVOps; ++i) {
23716     SDValue Op = BV->getOperand(i);
23717     if (Op.isUndef())
23718       continue;
23719     if (ZextElt == -1)
23720       ZextElt = i;
23721     else
23722       return SDValue();
23723   }
23724   // Bail out if there's no non-undef element.
23725   if (ZextElt == -1)
23726     return SDValue();
23727 
23728   // The build vector contains some number of undef elements and exactly
23729   // one other element. That other element must be a zero-extended scalar
23730   // extracted from a vector at a constant index to turn this into a shuffle.
23731   // Also, require that the build vector does not implicitly truncate/extend
23732   // its elements.
23733   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
23734   EVT VT = BV->getValueType(0);
23735   SDValue Zext = BV->getOperand(ZextElt);
23736   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
23737       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23738       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
23739       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
23740     return SDValue();
23741 
23742   // The zero-extend must be a multiple of the source size, and we must be
23743   // building a vector of the same size as the source of the extract element.
23744   SDValue Extract = Zext.getOperand(0);
23745   unsigned DestSize = Zext.getValueSizeInBits();
23746   unsigned SrcSize = Extract.getValueSizeInBits();
23747   if (DestSize % SrcSize != 0 ||
23748       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
23749     return SDValue();
23750 
23751   // Create a shuffle mask that will combine the extracted element with zeros
23752   // and undefs.
23753   int ZextRatio = DestSize / SrcSize;
23754   int NumMaskElts = NumBVOps * ZextRatio;
23755   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23756   for (int i = 0; i != NumMaskElts; ++i) {
23757     if (i / ZextRatio == ZextElt) {
23758       // The low bits of the (potentially translated) extracted element map to
23759       // the source vector. The high bits map to zero. We will use a zero vector
23760       // as the 2nd source operand of the shuffle, so use the 1st element of
23761       // that vector (mask value is number-of-elements) for the high bits.
23762       int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23763       ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23764                                            : NumMaskElts;
23765     }
23766 
23767     // Undef elements of the build vector remain undef because we initialize
23768     // the shuffle mask with -1.
23769   }
23770 
23771   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23772   // bitcast (shuffle V, ZeroVec, VectorMask)
23773   SDLoc DL(BV);
23774   EVT VecVT = Extract.getOperand(0).getValueType();
23775   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23776   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23777   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23778                                              ZeroVec, ShufMask, DAG);
23779   if (!Shuf)
23780     return SDValue();
23781   return DAG.getBitcast(VT, Shuf);
23782 }
23783 
23784 // FIXME: promote to STLExtras.
23785 template <typename R, typename T>
23786 static auto getFirstIndexOf(R &&Range, const T &Val) {
23787   auto I = find(Range, Val);
23788   if (I == Range.end())
23789     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23790   return std::distance(Range.begin(), I);
23791 }
23792 
23793 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23794 // operations. If the types of the vectors we're extracting from allow it,
23795 // turn this into a vector_shuffle node.
23796 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23797   SDLoc DL(N);
23798   EVT VT = N->getValueType(0);
23799 
23800   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23801   if (!isTypeLegal(VT))
23802     return SDValue();
23803 
23804   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
23805     return V;
23806 
23807   // May only combine to shuffle after legalize if shuffle is legal.
23808   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23809     return SDValue();
23810 
23811   bool UsesZeroVector = false;
23812   unsigned NumElems = N->getNumOperands();
23813 
23814   // Record, for each element of the newly built vector, which input vector
23815   // that element comes from. -1 stands for undef, 0 for the zero vector,
23816   // and positive values for the input vectors.
23817   // VectorMask maps each element to its vector number, and VecIn maps vector
23818   // numbers to their initial SDValues.
23819 
23820   SmallVector<int, 8> VectorMask(NumElems, -1);
23821   SmallVector<SDValue, 8> VecIn;
23822   VecIn.push_back(SDValue());
23823 
23824   // If we have a single extract_element with a constant index, track the index
23825   // value.
23826   unsigned OneConstExtractIndex = ~0u;
23827 
23828   // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
23829   unsigned NumExtracts = 0;
23830 
23831   for (unsigned i = 0; i != NumElems; ++i) {
23832     SDValue Op = N->getOperand(i);
23833 
23834     if (Op.isUndef())
23835       continue;
23836 
23837     // See if we can use a blend with a zero vector.
23838     // TODO: Should we generalize this to a blend with an arbitrary constant
23839     // vector?
23840     if (isNullConstant(Op) || isNullFPConstant(Op)) {
23841       UsesZeroVector = true;
23842       VectorMask[i] = 0;
23843       continue;
23844     }
23845 
23846     // Not an undef or zero. If the input is something other than an
23847     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23848     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
23849       return SDValue();
23850 
23851     SDValue ExtractedFromVec = Op.getOperand(0);
23852     if (ExtractedFromVec.getValueType().isScalableVector())
23853       return SDValue();
23854     auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
23855     if (!ExtractIdx)
23856       return SDValue();
23857 
23858     if (ExtractIdx->getAsAPIntVal().uge(
23859             ExtractedFromVec.getValueType().getVectorNumElements()))
23860       return SDValue();
23861 
23862     // All inputs must have the same element type as the output.
23863     if (VT.getVectorElementType() !=
23864         ExtractedFromVec.getValueType().getVectorElementType())
23865       return SDValue();
23866 
23867     OneConstExtractIndex = ExtractIdx->getZExtValue();
23868     ++NumExtracts;
23869 
23870     // Have we seen this input vector before?
23871     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23872     // a map back from SDValues to numbers isn't worth it.
23873     int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23874     if (Idx == -1) { // A new source vector?
23875       Idx = VecIn.size();
23876       VecIn.push_back(ExtractedFromVec);
23877     }
23878 
23879     VectorMask[i] = Idx;
23880   }
23881 
23882   // If we didn't find at least one input vector, bail out.
23883   if (VecIn.size() < 2)
23884     return SDValue();
23885 
23886   // If all the Operands of BUILD_VECTOR extract from same
23887   // vector, then split the vector efficiently based on the maximum
23888   // vector access index and adjust the VectorMask and
23889   // VecIn accordingly.
23890   bool DidSplitVec = false;
23891   if (VecIn.size() == 2) {
23892     // If we only found a single constant indexed extract_vector_elt feeding the
23893     // build_vector, do not produce a more complicated shuffle if the extract is
23894     // cheap with other constant/undef elements. Skip broadcast patterns with
23895     // multiple uses in the build_vector.
23896 
23897     // TODO: This should be more aggressive about skipping the shuffle
23898     // formation, particularly if VecIn[1].hasOneUse(), and regardless of the
23899     // index.
23900     if (NumExtracts == 1 &&
23901         TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, VT) &&
23902         TLI.isTypeLegal(VT.getVectorElementType()) &&
23903         TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
23904       return SDValue();
23905 
23906     unsigned MaxIndex = 0;
23907     unsigned NearestPow2 = 0;
23908     SDValue Vec = VecIn.back();
23909     EVT InVT = Vec.getValueType();
23910     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23911 
23912     for (unsigned i = 0; i < NumElems; i++) {
23913       if (VectorMask[i] <= 0)
23914         continue;
23915       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23916       IndexVec[i] = Index;
23917       MaxIndex = std::max(MaxIndex, Index);
23918     }
23919 
23920     NearestPow2 = PowerOf2Ceil(MaxIndex);
23921     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23922         NumElems * 2 < NearestPow2) {
23923       unsigned SplitSize = NearestPow2 / 2;
23924       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23925                                      InVT.getVectorElementType(), SplitSize);
23926       if (TLI.isTypeLegal(SplitVT) &&
23927           SplitSize + SplitVT.getVectorNumElements() <=
23928               InVT.getVectorNumElements()) {
23929         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23930                                      DAG.getVectorIdxConstant(SplitSize, DL));
23931         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23932                                      DAG.getVectorIdxConstant(0, DL));
23933         VecIn.pop_back();
23934         VecIn.push_back(VecIn1);
23935         VecIn.push_back(VecIn2);
23936         DidSplitVec = true;
23937 
23938         for (unsigned i = 0; i < NumElems; i++) {
23939           if (VectorMask[i] <= 0)
23940             continue;
23941           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23942         }
23943       }
23944     }
23945   }
23946 
23947   // Sort input vectors by decreasing vector element count,
23948   // while preserving the relative order of equally-sized vectors.
23949   // Note that we keep the first "implicit zero vector as-is.
23950   SmallVector<SDValue, 8> SortedVecIn(VecIn);
23951   llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23952                     [](const SDValue &a, const SDValue &b) {
23953                       return a.getValueType().getVectorNumElements() >
23954                              b.getValueType().getVectorNumElements();
23955                     });
23956 
23957   // We now also need to rebuild the VectorMask, because it referenced element
23958   // order in VecIn, and we just sorted them.
23959   for (int &SourceVectorIndex : VectorMask) {
23960     if (SourceVectorIndex <= 0)
23961       continue;
23962     unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23963     assert(Idx > 0 && Idx < SortedVecIn.size() &&
23964            VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23965     SourceVectorIndex = Idx;
23966   }
23967 
23968   VecIn = std::move(SortedVecIn);
23969 
23970   // TODO: Should this fire if some of the input vectors has illegal type (like
23971   // it does now), or should we let legalization run its course first?
23972 
23973   // Shuffle phase:
23974   // Take pairs of vectors, and shuffle them so that the result has elements
23975   // from these vectors in the correct places.
23976   // For example, given:
23977   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23978   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23979   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23980   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23981   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23982   // We will generate:
23983   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23984   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23985   SmallVector<SDValue, 4> Shuffles;
23986   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23987     unsigned LeftIdx = 2 * In + 1;
23988     SDValue VecLeft = VecIn[LeftIdx];
23989     SDValue VecRight =
23990         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23991 
23992     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23993                                                 VecRight, LeftIdx, DidSplitVec))
23994       Shuffles.push_back(Shuffle);
23995     else
23996       return SDValue();
23997   }
23998 
23999   // If we need the zero vector as an "ingredient" in the blend tree, add it
24000   // to the list of shuffles.
24001   if (UsesZeroVector)
24002     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
24003                                       : DAG.getConstantFP(0.0, DL, VT));
24004 
24005   // If we only have one shuffle, we're done.
24006   if (Shuffles.size() == 1)
24007     return Shuffles[0];
24008 
24009   // Update the vector mask to point to the post-shuffle vectors.
24010   for (int &Vec : VectorMask)
24011     if (Vec == 0)
24012       Vec = Shuffles.size() - 1;
24013     else
24014       Vec = (Vec - 1) / 2;
24015 
24016   // More than one shuffle. Generate a binary tree of blends, e.g. if from
24017   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
24018   // generate:
24019   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
24020   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
24021   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
24022   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
24023   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
24024   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
24025   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
24026 
24027   // Make sure the initial size of the shuffle list is even.
24028   if (Shuffles.size() % 2)
24029     Shuffles.push_back(DAG.getUNDEF(VT));
24030 
24031   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
24032     if (CurSize % 2) {
24033       Shuffles[CurSize] = DAG.getUNDEF(VT);
24034       CurSize++;
24035     }
24036     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
24037       int Left = 2 * In;
24038       int Right = 2 * In + 1;
24039       SmallVector<int, 8> Mask(NumElems, -1);
24040       SDValue L = Shuffles[Left];
24041       ArrayRef<int> LMask;
24042       bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
24043                            L.use_empty() && L.getOperand(1).isUndef() &&
24044                            L.getOperand(0).getValueType() == L.getValueType();
24045       if (IsLeftShuffle) {
24046         LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
24047         L = L.getOperand(0);
24048       }
24049       SDValue R = Shuffles[Right];
24050       ArrayRef<int> RMask;
24051       bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
24052                             R.use_empty() && R.getOperand(1).isUndef() &&
24053                             R.getOperand(0).getValueType() == R.getValueType();
24054       if (IsRightShuffle) {
24055         RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
24056         R = R.getOperand(0);
24057       }
24058       for (unsigned I = 0; I != NumElems; ++I) {
24059         if (VectorMask[I] == Left) {
24060           Mask[I] = I;
24061           if (IsLeftShuffle)
24062             Mask[I] = LMask[I];
24063           VectorMask[I] = In;
24064         } else if (VectorMask[I] == Right) {
24065           Mask[I] = I + NumElems;
24066           if (IsRightShuffle)
24067             Mask[I] = RMask[I] + NumElems;
24068           VectorMask[I] = In;
24069         }
24070       }
24071 
24072       Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
24073     }
24074   }
24075   return Shuffles[0];
24076 }
24077 
24078 // Try to turn a build vector of zero extends of extract vector elts into a
24079 // a vector zero extend and possibly an extract subvector.
24080 // TODO: Support sign extend?
24081 // TODO: Allow undef elements?
24082 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
24083   if (LegalOperations)
24084     return SDValue();
24085 
24086   EVT VT = N->getValueType(0);
24087 
24088   bool FoundZeroExtend = false;
24089   SDValue Op0 = N->getOperand(0);
24090   auto checkElem = [&](SDValue Op) -> int64_t {
24091     unsigned Opc = Op.getOpcode();
24092     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
24093     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
24094         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24095         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
24096       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
24097         return C->getZExtValue();
24098     return -1;
24099   };
24100 
24101   // Make sure the first element matches
24102   // (zext (extract_vector_elt X, C))
24103   // Offset must be a constant multiple of the
24104   // known-minimum vector length of the result type.
24105   int64_t Offset = checkElem(Op0);
24106   if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
24107     return SDValue();
24108 
24109   unsigned NumElems = N->getNumOperands();
24110   SDValue In = Op0.getOperand(0).getOperand(0);
24111   EVT InSVT = In.getValueType().getScalarType();
24112   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
24113 
24114   // Don't create an illegal input type after type legalization.
24115   if (LegalTypes && !TLI.isTypeLegal(InVT))
24116     return SDValue();
24117 
24118   // Ensure all the elements come from the same vector and are adjacent.
24119   for (unsigned i = 1; i != NumElems; ++i) {
24120     if ((Offset + i) != checkElem(N->getOperand(i)))
24121       return SDValue();
24122   }
24123 
24124   SDLoc DL(N);
24125   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
24126                    Op0.getOperand(0).getOperand(1));
24127   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
24128                      VT, In);
24129 }
24130 
24131 // If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
24132 // and all other elements being constant zero's, granularize the BUILD_VECTOR's
24133 // element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
24134 // This patten can appear during legalization.
24135 //
24136 // NOTE: This can be generalized to allow more than a single
24137 //       non-constant-zero op, UNDEF's, and to be KnownBits-based,
24138 SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
24139   // Don't run this after legalization. Targets may have other preferences.
24140   if (Level >= AfterLegalizeDAG)
24141     return SDValue();
24142 
24143   // FIXME: support big-endian.
24144   if (DAG.getDataLayout().isBigEndian())
24145     return SDValue();
24146 
24147   EVT VT = N->getValueType(0);
24148   EVT OpVT = N->getOperand(0).getValueType();
24149   assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
24150 
24151   EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24152 
24153   if (!TLI.isTypeLegal(OpIntVT) ||
24154       (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
24155     return SDValue();
24156 
24157   unsigned EltBitwidth = VT.getScalarSizeInBits();
24158   // NOTE: the actual width of operands may be wider than that!
24159 
24160   // Analyze all operands of this BUILD_VECTOR. What is the largest number of
24161   // active bits they all have? We'll want to truncate them all to that width.
24162   unsigned ActiveBits = 0;
24163   APInt KnownZeroOps(VT.getVectorNumElements(), 0);
24164   for (auto I : enumerate(N->ops())) {
24165     SDValue Op = I.value();
24166     // FIXME: support UNDEF elements?
24167     if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
24168       unsigned OpActiveBits =
24169           Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
24170       if (OpActiveBits == 0) {
24171         KnownZeroOps.setBit(I.index());
24172         continue;
24173       }
24174       // Profitability check: don't allow non-zero constant operands.
24175       return SDValue();
24176     }
24177     // Profitability check: there must only be a single non-zero operand,
24178     // and it must be the first operand of the BUILD_VECTOR.
24179     if (I.index() != 0)
24180       return SDValue();
24181     // The operand must be a zero-extension itself.
24182     // FIXME: this could be generalized to known leading zeros check.
24183     if (Op.getOpcode() != ISD::ZERO_EXTEND)
24184       return SDValue();
24185     unsigned CurrActiveBits =
24186         Op.getOperand(0).getValueSizeInBits().getFixedValue();
24187     assert(!ActiveBits && "Already encountered non-constant-zero operand?");
24188     ActiveBits = CurrActiveBits;
24189     // We want to at least halve the element size.
24190     if (2 * ActiveBits > EltBitwidth)
24191       return SDValue();
24192   }
24193 
24194   // This BUILD_VECTOR must have at least one non-constant-zero operand.
24195   if (ActiveBits == 0)
24196     return SDValue();
24197 
24198   // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
24199   // into how many chunks can we split our element width?
24200   EVT NewScalarIntVT, NewIntVT;
24201   std::optional<unsigned> Factor;
24202   // We can split the element into at least two chunks, but not into more
24203   // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
24204   // for which the element width is a multiple of it,
24205   // and the resulting types/operations on that chunk width are legal.
24206   assert(2 * ActiveBits <= EltBitwidth &&
24207          "We know that half or less bits of the element are active.");
24208   for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
24209     if (EltBitwidth % Scale != 0)
24210       continue;
24211     unsigned ChunkBitwidth = EltBitwidth / Scale;
24212     assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
24213     NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
24214     NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
24215                                 Scale * N->getNumOperands());
24216     if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
24217         (LegalOperations &&
24218          !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
24219            TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, NewIntVT))))
24220       continue;
24221     Factor = Scale;
24222     break;
24223   }
24224   if (!Factor)
24225     return SDValue();
24226 
24227   SDLoc DL(N);
24228   SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
24229 
24230   // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
24231   SmallVector<SDValue, 16> NewOps;
24232   NewOps.reserve(NewIntVT.getVectorNumElements());
24233   for (auto I : enumerate(N->ops())) {
24234     SDValue Op = I.value();
24235     assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
24236     unsigned SrcOpIdx = I.index();
24237     if (KnownZeroOps[SrcOpIdx]) {
24238       NewOps.append(*Factor, ZeroOp);
24239       continue;
24240     }
24241     Op = DAG.getBitcast(OpIntVT, Op);
24242     Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
24243     NewOps.emplace_back(Op);
24244     NewOps.append(*Factor - 1, ZeroOp);
24245   }
24246   assert(NewOps.size() == NewIntVT.getVectorNumElements());
24247   SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
24248   NewBV = DAG.getBitcast(VT, NewBV);
24249   return NewBV;
24250 }
24251 
24252 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
24253   EVT VT = N->getValueType(0);
24254 
24255   // A vector built entirely of undefs is undef.
24256   if (ISD::allOperandsUndef(N))
24257     return DAG.getUNDEF(VT);
24258 
24259   // If this is a splat of a bitcast from another vector, change to a
24260   // concat_vector.
24261   // For example:
24262   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
24263   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
24264   //
24265   // If X is a build_vector itself, the concat can become a larger build_vector.
24266   // TODO: Maybe this is useful for non-splat too?
24267   if (!LegalOperations) {
24268     SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
24269     // Only change build_vector to a concat_vector if the splat value type is
24270     // same as the vector element type.
24271     if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
24272       Splat = peekThroughBitcasts(Splat);
24273       EVT SrcVT = Splat.getValueType();
24274       if (SrcVT.isVector()) {
24275         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
24276         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
24277                                      SrcVT.getVectorElementType(), NumElts);
24278         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
24279           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
24280           SDValue Concat =
24281               DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
24282           return DAG.getBitcast(VT, Concat);
24283         }
24284       }
24285     }
24286   }
24287 
24288   // Check if we can express BUILD VECTOR via subvector extract.
24289   if (!LegalTypes && (N->getNumOperands() > 1)) {
24290     SDValue Op0 = N->getOperand(0);
24291     auto checkElem = [&](SDValue Op) -> uint64_t {
24292       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
24293           (Op0.getOperand(0) == Op.getOperand(0)))
24294         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
24295           return CNode->getZExtValue();
24296       return -1;
24297     };
24298 
24299     int Offset = checkElem(Op0);
24300     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
24301       if (Offset + i != checkElem(N->getOperand(i))) {
24302         Offset = -1;
24303         break;
24304       }
24305     }
24306 
24307     if ((Offset == 0) &&
24308         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
24309       return Op0.getOperand(0);
24310     if ((Offset != -1) &&
24311         ((Offset % N->getValueType(0).getVectorNumElements()) ==
24312          0)) // IDX must be multiple of output size.
24313       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
24314                          Op0.getOperand(0), Op0.getOperand(1));
24315   }
24316 
24317   if (SDValue V = convertBuildVecZextToZext(N))
24318     return V;
24319 
24320   if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
24321     return V;
24322 
24323   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
24324     return V;
24325 
24326   if (SDValue V = reduceBuildVecTruncToBitCast(N))
24327     return V;
24328 
24329   if (SDValue V = reduceBuildVecToShuffle(N))
24330     return V;
24331 
24332   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
24333   // Do this late as some of the above may replace the splat.
24334   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
24335     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
24336       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
24337       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
24338     }
24339 
24340   return SDValue();
24341 }
24342 
24343 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
24344   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24345   EVT OpVT = N->getOperand(0).getValueType();
24346 
24347   // If the operands are legal vectors, leave them alone.
24348   if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
24349     return SDValue();
24350 
24351   SDLoc DL(N);
24352   EVT VT = N->getValueType(0);
24353   SmallVector<SDValue, 8> Ops;
24354   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24355 
24356   // Keep track of what we encounter.
24357   EVT AnyFPVT;
24358 
24359   for (const SDValue &Op : N->ops()) {
24360     if (ISD::BITCAST == Op.getOpcode() &&
24361         !Op.getOperand(0).getValueType().isVector())
24362       Ops.push_back(Op.getOperand(0));
24363     else if (ISD::UNDEF == Op.getOpcode())
24364       Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
24365     else
24366       return SDValue();
24367 
24368     // Note whether we encounter an integer or floating point scalar.
24369     // If it's neither, bail out, it could be something weird like x86mmx.
24370     EVT LastOpVT = Ops.back().getValueType();
24371     if (LastOpVT.isFloatingPoint())
24372       AnyFPVT = LastOpVT;
24373     else if (!LastOpVT.isInteger())
24374       return SDValue();
24375   }
24376 
24377   // If any of the operands is a floating point scalar bitcast to a vector,
24378   // use floating point types throughout, and bitcast everything.
24379   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
24380   if (AnyFPVT != EVT()) {
24381     SVT = AnyFPVT;
24382     for (SDValue &Op : Ops) {
24383       if (Op.getValueType() == SVT)
24384         continue;
24385       if (Op.isUndef())
24386         Op = DAG.getNode(ISD::UNDEF, DL, SVT);
24387       else
24388         Op = DAG.getBitcast(SVT, Op);
24389     }
24390   }
24391 
24392   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
24393                                VT.getSizeInBits() / SVT.getSizeInBits());
24394   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
24395 }
24396 
24397 // Attempt to merge nested concat_vectors/undefs.
24398 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
24399 //  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
24400 static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
24401                                                   SelectionDAG &DAG) {
24402   EVT VT = N->getValueType(0);
24403 
24404   // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
24405   EVT SubVT;
24406   SDValue FirstConcat;
24407   for (const SDValue &Op : N->ops()) {
24408     if (Op.isUndef())
24409       continue;
24410     if (Op.getOpcode() != ISD::CONCAT_VECTORS)
24411       return SDValue();
24412     if (!FirstConcat) {
24413       SubVT = Op.getOperand(0).getValueType();
24414       if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
24415         return SDValue();
24416       FirstConcat = Op;
24417       continue;
24418     }
24419     if (SubVT != Op.getOperand(0).getValueType())
24420       return SDValue();
24421   }
24422   assert(FirstConcat && "Concat of all-undefs found");
24423 
24424   SmallVector<SDValue> ConcatOps;
24425   for (const SDValue &Op : N->ops()) {
24426     if (Op.isUndef()) {
24427       ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
24428       continue;
24429     }
24430     ConcatOps.append(Op->op_begin(), Op->op_end());
24431   }
24432   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
24433 }
24434 
24435 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
24436 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
24437 // most two distinct vectors the same size as the result, attempt to turn this
24438 // into a legal shuffle.
24439 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
24440   EVT VT = N->getValueType(0);
24441   EVT OpVT = N->getOperand(0).getValueType();
24442 
24443   // We currently can't generate an appropriate shuffle for a scalable vector.
24444   if (VT.isScalableVector())
24445     return SDValue();
24446 
24447   int NumElts = VT.getVectorNumElements();
24448   int NumOpElts = OpVT.getVectorNumElements();
24449 
24450   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
24451   SmallVector<int, 8> Mask;
24452 
24453   for (SDValue Op : N->ops()) {
24454     Op = peekThroughBitcasts(Op);
24455 
24456     // UNDEF nodes convert to UNDEF shuffle mask values.
24457     if (Op.isUndef()) {
24458       Mask.append((unsigned)NumOpElts, -1);
24459       continue;
24460     }
24461 
24462     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24463       return SDValue();
24464 
24465     // What vector are we extracting the subvector from and at what index?
24466     SDValue ExtVec = Op.getOperand(0);
24467     int ExtIdx = Op.getConstantOperandVal(1);
24468 
24469     // We want the EVT of the original extraction to correctly scale the
24470     // extraction index.
24471     EVT ExtVT = ExtVec.getValueType();
24472     ExtVec = peekThroughBitcasts(ExtVec);
24473 
24474     // UNDEF nodes convert to UNDEF shuffle mask values.
24475     if (ExtVec.isUndef()) {
24476       Mask.append((unsigned)NumOpElts, -1);
24477       continue;
24478     }
24479 
24480     // Ensure that we are extracting a subvector from a vector the same
24481     // size as the result.
24482     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
24483       return SDValue();
24484 
24485     // Scale the subvector index to account for any bitcast.
24486     int NumExtElts = ExtVT.getVectorNumElements();
24487     if (0 == (NumExtElts % NumElts))
24488       ExtIdx /= (NumExtElts / NumElts);
24489     else if (0 == (NumElts % NumExtElts))
24490       ExtIdx *= (NumElts / NumExtElts);
24491     else
24492       return SDValue();
24493 
24494     // At most we can reference 2 inputs in the final shuffle.
24495     if (SV0.isUndef() || SV0 == ExtVec) {
24496       SV0 = ExtVec;
24497       for (int i = 0; i != NumOpElts; ++i)
24498         Mask.push_back(i + ExtIdx);
24499     } else if (SV1.isUndef() || SV1 == ExtVec) {
24500       SV1 = ExtVec;
24501       for (int i = 0; i != NumOpElts; ++i)
24502         Mask.push_back(i + ExtIdx + NumElts);
24503     } else {
24504       return SDValue();
24505     }
24506   }
24507 
24508   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24509   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
24510                                      DAG.getBitcast(VT, SV1), Mask, DAG);
24511 }
24512 
24513 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
24514   unsigned CastOpcode = N->getOperand(0).getOpcode();
24515   switch (CastOpcode) {
24516   case ISD::SINT_TO_FP:
24517   case ISD::UINT_TO_FP:
24518   case ISD::FP_TO_SINT:
24519   case ISD::FP_TO_UINT:
24520     // TODO: Allow more opcodes?
24521     //  case ISD::BITCAST:
24522     //  case ISD::TRUNCATE:
24523     //  case ISD::ZERO_EXTEND:
24524     //  case ISD::SIGN_EXTEND:
24525     //  case ISD::FP_EXTEND:
24526     break;
24527   default:
24528     return SDValue();
24529   }
24530 
24531   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
24532   if (!SrcVT.isVector())
24533     return SDValue();
24534 
24535   // All operands of the concat must be the same kind of cast from the same
24536   // source type.
24537   SmallVector<SDValue, 4> SrcOps;
24538   for (SDValue Op : N->ops()) {
24539     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
24540         Op.getOperand(0).getValueType() != SrcVT)
24541       return SDValue();
24542     SrcOps.push_back(Op.getOperand(0));
24543   }
24544 
24545   // The wider cast must be supported by the target. This is unusual because
24546   // the operation support type parameter depends on the opcode. In addition,
24547   // check the other type in the cast to make sure this is really legal.
24548   EVT VT = N->getValueType(0);
24549   EVT SrcEltVT = SrcVT.getVectorElementType();
24550   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
24551   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
24552   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24553   switch (CastOpcode) {
24554   case ISD::SINT_TO_FP:
24555   case ISD::UINT_TO_FP:
24556     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
24557         !TLI.isTypeLegal(VT))
24558       return SDValue();
24559     break;
24560   case ISD::FP_TO_SINT:
24561   case ISD::FP_TO_UINT:
24562     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
24563         !TLI.isTypeLegal(ConcatSrcVT))
24564       return SDValue();
24565     break;
24566   default:
24567     llvm_unreachable("Unexpected cast opcode");
24568   }
24569 
24570   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
24571   SDLoc DL(N);
24572   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
24573   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
24574 }
24575 
24576 // See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
24577 // the operands is a SHUFFLE_VECTOR, and all other operands are also operands
24578 // to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
24579 static SDValue combineConcatVectorOfShuffleAndItsOperands(
24580     SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24581     bool LegalOperations) {
24582   EVT VT = N->getValueType(0);
24583   EVT OpVT = N->getOperand(0).getValueType();
24584   if (VT.isScalableVector())
24585     return SDValue();
24586 
24587   // For now, only allow simple 2-operand concatenations.
24588   if (N->getNumOperands() != 2)
24589     return SDValue();
24590 
24591   // Don't create illegal types/shuffles when not allowed to.
24592   if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
24593       (LegalOperations &&
24594        !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)))
24595     return SDValue();
24596 
24597   // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
24598   // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
24599   // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
24600   // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
24601   // (4) and for now, the SHUFFLE_VECTOR must be unary.
24602   ShuffleVectorSDNode *SVN = nullptr;
24603   for (SDValue Op : N->ops()) {
24604     if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
24605         CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
24606         all_of(N->ops(), [CurSVN](SDValue Op) {
24607           // FIXME: can we allow UNDEF operands?
24608           return !Op.isUndef() &&
24609                  (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
24610         })) {
24611       SVN = CurSVN;
24612       break;
24613     }
24614   }
24615   if (!SVN)
24616     return SDValue();
24617 
24618   // We are going to pad the shuffle operands, so any indice, that was picking
24619   // from the second operand, must be adjusted.
24620   SmallVector<int, 16> AdjustedMask;
24621   AdjustedMask.reserve(SVN->getMask().size());
24622   assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
24623   append_range(AdjustedMask, SVN->getMask());
24624 
24625   // Identity masks for the operands of the (padded) shuffle.
24626   SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
24627   MutableArrayRef<int> FirstShufOpIdentityMask =
24628       MutableArrayRef<int>(IdentityMask)
24629           .take_front(OpVT.getVectorNumElements());
24630   MutableArrayRef<int> SecondShufOpIdentityMask =
24631       MutableArrayRef<int>(IdentityMask).take_back(OpVT.getVectorNumElements());
24632   std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
24633   std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
24634             VT.getVectorNumElements());
24635 
24636   // New combined shuffle mask.
24637   SmallVector<int, 32> Mask;
24638   Mask.reserve(VT.getVectorNumElements());
24639   for (SDValue Op : N->ops()) {
24640     assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
24641     if (Op.getNode() == SVN) {
24642       append_range(Mask, AdjustedMask);
24643       continue;
24644     }
24645     if (Op == SVN->getOperand(0)) {
24646       append_range(Mask, FirstShufOpIdentityMask);
24647       continue;
24648     }
24649     if (Op == SVN->getOperand(1)) {
24650       append_range(Mask, SecondShufOpIdentityMask);
24651       continue;
24652     }
24653     llvm_unreachable("Unexpected operand!");
24654   }
24655 
24656   // Don't create illegal shuffle masks.
24657   if (!TLI.isShuffleMaskLegal(Mask, VT))
24658     return SDValue();
24659 
24660   // Pad the shuffle operands with UNDEF.
24661   SDLoc dl(N);
24662   std::array<SDValue, 2> ShufOps;
24663   for (auto I : zip(SVN->ops(), ShufOps)) {
24664     SDValue ShufOp = std::get<0>(I);
24665     SDValue &NewShufOp = std::get<1>(I);
24666     if (ShufOp.isUndef())
24667       NewShufOp = DAG.getUNDEF(VT);
24668     else {
24669       SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
24670                                           DAG.getUNDEF(OpVT));
24671       ShufOpParts[0] = ShufOp;
24672       NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
24673     }
24674   }
24675   // Finally, create the new wide shuffle.
24676   return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
24677 }
24678 
24679 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
24680   // If we only have one input vector, we don't need to do any concatenation.
24681   if (N->getNumOperands() == 1)
24682     return N->getOperand(0);
24683 
24684   // Check if all of the operands are undefs.
24685   EVT VT = N->getValueType(0);
24686   if (ISD::allOperandsUndef(N))
24687     return DAG.getUNDEF(VT);
24688 
24689   // Optimize concat_vectors where all but the first of the vectors are undef.
24690   if (all_of(drop_begin(N->ops()),
24691              [](const SDValue &Op) { return Op.isUndef(); })) {
24692     SDValue In = N->getOperand(0);
24693     assert(In.getValueType().isVector() && "Must concat vectors");
24694 
24695     // If the input is a concat_vectors, just make a larger concat by padding
24696     // with smaller undefs.
24697     //
24698     // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
24699     // here could cause an infinite loop. That legalizing happens when LegalDAG
24700     // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
24701     // scalable.
24702     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
24703         !(LegalDAG && In.getValueType().isScalableVector())) {
24704       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
24705       SmallVector<SDValue, 4> Ops(In->ops());
24706       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
24707       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24708     }
24709 
24710     SDValue Scalar = peekThroughOneUseBitcasts(In);
24711 
24712     // concat_vectors(scalar_to_vector(scalar), undef) ->
24713     //     scalar_to_vector(scalar)
24714     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24715          Scalar.hasOneUse()) {
24716       EVT SVT = Scalar.getValueType().getVectorElementType();
24717       if (SVT == Scalar.getOperand(0).getValueType())
24718         Scalar = Scalar.getOperand(0);
24719     }
24720 
24721     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
24722     if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
24723       // If the bitcast type isn't legal, it might be a trunc of a legal type;
24724       // look through the trunc so we can still do the transform:
24725       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
24726       if (Scalar->getOpcode() == ISD::TRUNCATE &&
24727           !TLI.isTypeLegal(Scalar.getValueType()) &&
24728           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
24729         Scalar = Scalar->getOperand(0);
24730 
24731       EVT SclTy = Scalar.getValueType();
24732 
24733       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
24734         return SDValue();
24735 
24736       // Bail out if the vector size is not a multiple of the scalar size.
24737       if (VT.getSizeInBits() % SclTy.getSizeInBits())
24738         return SDValue();
24739 
24740       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
24741       if (VNTNumElms < 2)
24742         return SDValue();
24743 
24744       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
24745       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
24746         return SDValue();
24747 
24748       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
24749       return DAG.getBitcast(VT, Res);
24750     }
24751   }
24752 
24753   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
24754   // We have already tested above for an UNDEF only concatenation.
24755   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
24756   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
24757   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
24758     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
24759   };
24760   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
24761     SmallVector<SDValue, 8> Opnds;
24762     EVT SVT = VT.getScalarType();
24763 
24764     EVT MinVT = SVT;
24765     if (!SVT.isFloatingPoint()) {
24766       // If BUILD_VECTOR are from built from integer, they may have different
24767       // operand types. Get the smallest type and truncate all operands to it.
24768       bool FoundMinVT = false;
24769       for (const SDValue &Op : N->ops())
24770         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24771           EVT OpSVT = Op.getOperand(0).getValueType();
24772           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
24773           FoundMinVT = true;
24774         }
24775       assert(FoundMinVT && "Concat vector type mismatch");
24776     }
24777 
24778     for (const SDValue &Op : N->ops()) {
24779       EVT OpVT = Op.getValueType();
24780       unsigned NumElts = OpVT.getVectorNumElements();
24781 
24782       if (ISD::UNDEF == Op.getOpcode())
24783         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24784 
24785       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24786         if (SVT.isFloatingPoint()) {
24787           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24788           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24789         } else {
24790           for (unsigned i = 0; i != NumElts; ++i)
24791             Opnds.push_back(
24792                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24793         }
24794       }
24795     }
24796 
24797     assert(VT.getVectorNumElements() == Opnds.size() &&
24798            "Concat vector type mismatch");
24799     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24800   }
24801 
24802   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24803   // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24804   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
24805     return V;
24806 
24807   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24808     // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24809     if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
24810       return V;
24811 
24812     // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24813     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
24814       return V;
24815   }
24816 
24817   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24818     return V;
24819 
24820   if (SDValue V = combineConcatVectorOfShuffleAndItsOperands(
24821           N, DAG, TLI, LegalTypes, LegalOperations))
24822     return V;
24823 
24824   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24825   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24826   // operands and look for a CONCAT operations that place the incoming vectors
24827   // at the exact same location.
24828   //
24829   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24830   SDValue SingleSource = SDValue();
24831   unsigned PartNumElem =
24832       N->getOperand(0).getValueType().getVectorMinNumElements();
24833 
24834   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24835     SDValue Op = N->getOperand(i);
24836 
24837     if (Op.isUndef())
24838       continue;
24839 
24840     // Check if this is the identity extract:
24841     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24842       return SDValue();
24843 
24844     // Find the single incoming vector for the extract_subvector.
24845     if (SingleSource.getNode()) {
24846       if (Op.getOperand(0) != SingleSource)
24847         return SDValue();
24848     } else {
24849       SingleSource = Op.getOperand(0);
24850 
24851       // Check the source type is the same as the type of the result.
24852       // If not, this concat may extend the vector, so we can not
24853       // optimize it away.
24854       if (SingleSource.getValueType() != N->getValueType(0))
24855         return SDValue();
24856     }
24857 
24858     // Check that we are reading from the identity index.
24859     unsigned IdentityIndex = i * PartNumElem;
24860     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24861       return SDValue();
24862   }
24863 
24864   if (SingleSource.getNode())
24865     return SingleSource;
24866 
24867   return SDValue();
24868 }
24869 
24870 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24871 // if the subvector can be sourced for free.
24872 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
24873   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24874       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24875     return V.getOperand(1);
24876   }
24877   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24878   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24879       V.getOperand(0).getValueType() == SubVT &&
24880       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24881     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24882     return V.getOperand(SubIdx);
24883   }
24884   return SDValue();
24885 }
24886 
24887 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
24888                                               SelectionDAG &DAG,
24889                                               bool LegalOperations) {
24890   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24891   SDValue BinOp = Extract->getOperand(0);
24892   unsigned BinOpcode = BinOp.getOpcode();
24893   if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24894     return SDValue();
24895 
24896   EVT VecVT = BinOp.getValueType();
24897   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24898   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24899     return SDValue();
24900 
24901   SDValue Index = Extract->getOperand(1);
24902   EVT SubVT = Extract->getValueType(0);
24903   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24904     return SDValue();
24905 
24906   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24907   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24908 
24909   // TODO: We could handle the case where only 1 operand is being inserted by
24910   //       creating an extract of the other operand, but that requires checking
24911   //       number of uses and/or costs.
24912   if (!Sub0 || !Sub1)
24913     return SDValue();
24914 
24915   // We are inserting both operands of the wide binop only to extract back
24916   // to the narrow vector size. Eliminate all of the insert/extract:
24917   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24918   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24919                      BinOp->getFlags());
24920 }
24921 
24922 /// If we are extracting a subvector produced by a wide binary operator try
24923 /// to use a narrow binary operator and/or avoid concatenation and extraction.
24924 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
24925                                           bool LegalOperations) {
24926   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24927   // some of these bailouts with other transforms.
24928 
24929   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24930     return V;
24931 
24932   // The extract index must be a constant, so we can map it to a concat operand.
24933   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24934   if (!ExtractIndexC)
24935     return SDValue();
24936 
24937   // We are looking for an optionally bitcasted wide vector binary operator
24938   // feeding an extract subvector.
24939   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24940   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24941   unsigned BOpcode = BinOp.getOpcode();
24942   if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24943     return SDValue();
24944 
24945   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24946   // reduced to the unary fneg when it is visited, and we probably want to deal
24947   // with fneg in a target-specific way.
24948   if (BOpcode == ISD::FSUB) {
24949     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24950     if (C && C->getValueAPF().isNegZero())
24951       return SDValue();
24952   }
24953 
24954   // The binop must be a vector type, so we can extract some fraction of it.
24955   EVT WideBVT = BinOp.getValueType();
24956   // The optimisations below currently assume we are dealing with fixed length
24957   // vectors. It is possible to add support for scalable vectors, but at the
24958   // moment we've done no analysis to prove whether they are profitable or not.
24959   if (!WideBVT.isFixedLengthVector())
24960     return SDValue();
24961 
24962   EVT VT = Extract->getValueType(0);
24963   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24964   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24965          "Extract index is not a multiple of the vector length.");
24966 
24967   // Bail out if this is not a proper multiple width extraction.
24968   unsigned WideWidth = WideBVT.getSizeInBits();
24969   unsigned NarrowWidth = VT.getSizeInBits();
24970   if (WideWidth % NarrowWidth != 0)
24971     return SDValue();
24972 
24973   // Bail out if we are extracting a fraction of a single operation. This can
24974   // occur because we potentially looked through a bitcast of the binop.
24975   unsigned NarrowingRatio = WideWidth / NarrowWidth;
24976   unsigned WideNumElts = WideBVT.getVectorNumElements();
24977   if (WideNumElts % NarrowingRatio != 0)
24978     return SDValue();
24979 
24980   // Bail out if the target does not support a narrower version of the binop.
24981   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24982                                    WideNumElts / NarrowingRatio);
24983   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24984                                              LegalOperations))
24985     return SDValue();
24986 
24987   // If extraction is cheap, we don't need to look at the binop operands
24988   // for concat ops. The narrow binop alone makes this transform profitable.
24989   // We can't just reuse the original extract index operand because we may have
24990   // bitcasted.
24991   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24992   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24993   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24994       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24995     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24996     SDLoc DL(Extract);
24997     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24998     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24999                             BinOp.getOperand(0), NewExtIndex);
25000     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25001                             BinOp.getOperand(1), NewExtIndex);
25002     SDValue NarrowBinOp =
25003         DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
25004     return DAG.getBitcast(VT, NarrowBinOp);
25005   }
25006 
25007   // Only handle the case where we are doubling and then halving. A larger ratio
25008   // may require more than two narrow binops to replace the wide binop.
25009   if (NarrowingRatio != 2)
25010     return SDValue();
25011 
25012   // TODO: The motivating case for this transform is an x86 AVX1 target. That
25013   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
25014   // flavors, but no other 256-bit integer support. This could be extended to
25015   // handle any binop, but that may require fixing/adding other folds to avoid
25016   // codegen regressions.
25017   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
25018     return SDValue();
25019 
25020   // We need at least one concatenation operation of a binop operand to make
25021   // this transform worthwhile. The concat must double the input vector sizes.
25022   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
25023     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
25024       return V.getOperand(ConcatOpNum);
25025     return SDValue();
25026   };
25027   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
25028   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
25029 
25030   if (SubVecL || SubVecR) {
25031     // If a binop operand was not the result of a concat, we must extract a
25032     // half-sized operand for our new narrow binop:
25033     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
25034     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
25035     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
25036     SDLoc DL(Extract);
25037     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25038     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
25039                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25040                                       BinOp.getOperand(0), IndexC);
25041 
25042     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
25043                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25044                                       BinOp.getOperand(1), IndexC);
25045 
25046     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
25047     return DAG.getBitcast(VT, NarrowBinOp);
25048   }
25049 
25050   return SDValue();
25051 }
25052 
25053 /// If we are extracting a subvector from a wide vector load, convert to a
25054 /// narrow load to eliminate the extraction:
25055 /// (extract_subvector (load wide vector)) --> (load narrow vector)
25056 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
25057   // TODO: Add support for big-endian. The offset calculation must be adjusted.
25058   if (DAG.getDataLayout().isBigEndian())
25059     return SDValue();
25060 
25061   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
25062   if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
25063     return SDValue();
25064 
25065   // Allow targets to opt-out.
25066   EVT VT = Extract->getValueType(0);
25067 
25068   // We can only create byte sized loads.
25069   if (!VT.isByteSized())
25070     return SDValue();
25071 
25072   unsigned Index = Extract->getConstantOperandVal(1);
25073   unsigned NumElts = VT.getVectorMinNumElements();
25074   // A fixed length vector being extracted from a scalable vector
25075   // may not be any *smaller* than the scalable one.
25076   if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
25077     return SDValue();
25078 
25079   // The definition of EXTRACT_SUBVECTOR states that the index must be a
25080   // multiple of the minimum number of elements in the result type.
25081   assert(Index % NumElts == 0 && "The extract subvector index is not a "
25082                                  "multiple of the result's element count");
25083 
25084   // It's fine to use TypeSize here as we know the offset will not be negative.
25085   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
25086 
25087   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25088   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
25089     return SDValue();
25090 
25091   // The narrow load will be offset from the base address of the old load if
25092   // we are extracting from something besides index 0 (little-endian).
25093   SDLoc DL(Extract);
25094 
25095   // TODO: Use "BaseIndexOffset" to make this more effective.
25096   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
25097 
25098   LocationSize StoreSize = LocationSize::precise(VT.getStoreSize());
25099   MachineFunction &MF = DAG.getMachineFunction();
25100   MachineMemOperand *MMO;
25101   if (Offset.isScalable()) {
25102     MachinePointerInfo MPI =
25103         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
25104     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
25105   } else
25106     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
25107                                   StoreSize);
25108 
25109   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
25110   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
25111   return NewLd;
25112 }
25113 
25114 /// Given  EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
25115 /// try to produce  VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
25116 ///                                EXTRACT_SUBVECTOR(Op?, ?),
25117 ///                                Mask'))
25118 /// iff it is legal and profitable to do so. Notably, the trimmed mask
25119 /// (containing only the elements that are extracted)
25120 /// must reference at most two subvectors.
25121 static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
25122                                                      SelectionDAG &DAG,
25123                                                      const TargetLowering &TLI,
25124                                                      bool LegalOperations) {
25125   assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
25126          "Must only be called on EXTRACT_SUBVECTOR's");
25127 
25128   SDValue N0 = N->getOperand(0);
25129 
25130   // Only deal with non-scalable vectors.
25131   EVT NarrowVT = N->getValueType(0);
25132   EVT WideVT = N0.getValueType();
25133   if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
25134     return SDValue();
25135 
25136   // The operand must be a shufflevector.
25137   auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
25138   if (!WideShuffleVector)
25139     return SDValue();
25140 
25141   // The old shuffleneeds to go away.
25142   if (!WideShuffleVector->hasOneUse())
25143     return SDValue();
25144 
25145   // And the narrow shufflevector that we'll form must be legal.
25146   if (LegalOperations &&
25147       !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
25148     return SDValue();
25149 
25150   uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
25151   int NumEltsExtracted = NarrowVT.getVectorNumElements();
25152   assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
25153          "Extract index is not a multiple of the output vector length.");
25154 
25155   int WideNumElts = WideVT.getVectorNumElements();
25156 
25157   SmallVector<int, 16> NewMask;
25158   NewMask.reserve(NumEltsExtracted);
25159   SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
25160       DemandedSubvectors;
25161 
25162   // Try to decode the wide mask into narrow mask from at most two subvectors.
25163   for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
25164                                                   NumEltsExtracted)) {
25165     assert((M >= -1) && (M < (2 * WideNumElts)) &&
25166            "Out-of-bounds shuffle mask?");
25167 
25168     if (M < 0) {
25169       // Does not depend on operands, does not require adjustment.
25170       NewMask.emplace_back(M);
25171       continue;
25172     }
25173 
25174     // From which operand of the shuffle does this shuffle mask element pick?
25175     int WideShufOpIdx = M / WideNumElts;
25176     // Which element of that operand is picked?
25177     int OpEltIdx = M % WideNumElts;
25178 
25179     assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
25180            "Shuffle mask vector decomposition failure.");
25181 
25182     // And which NumEltsExtracted-sized subvector of that operand is that?
25183     int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
25184     // And which element within that subvector of that operand is that?
25185     int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
25186 
25187     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
25188            "Shuffle mask subvector decomposition failure.");
25189 
25190     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
25191             WideShufOpIdx * WideNumElts) == M &&
25192            "Shuffle mask full decomposition failure.");
25193 
25194     SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
25195 
25196     if (Op.isUndef()) {
25197       // Picking from an undef operand. Let's adjust mask instead.
25198       NewMask.emplace_back(-1);
25199       continue;
25200     }
25201 
25202     const std::pair<SDValue, int> DemandedSubvector =
25203         std::make_pair(Op, OpSubvecIdx);
25204 
25205     if (DemandedSubvectors.insert(DemandedSubvector)) {
25206       if (DemandedSubvectors.size() > 2)
25207         return SDValue(); // We can't handle more than two subvectors.
25208       // How many elements into the WideVT does this subvector start?
25209       int Index = NumEltsExtracted * OpSubvecIdx;
25210       // Bail out if the extraction isn't going to be cheap.
25211       if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
25212         return SDValue();
25213     }
25214 
25215     // Ok, but from which operand of the new shuffle will this element pick?
25216     int NewOpIdx =
25217         getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
25218     assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
25219 
25220     int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
25221     NewMask.emplace_back(AdjM);
25222   }
25223   assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
25224   assert(DemandedSubvectors.size() <= 2 &&
25225          "Should have ended up demanding at most two subvectors.");
25226 
25227   // Did we discover that the shuffle does not actually depend on operands?
25228   if (DemandedSubvectors.empty())
25229     return DAG.getUNDEF(NarrowVT);
25230 
25231   // Profitability check: only deal with extractions from the first subvector
25232   // unless the mask becomes an identity mask.
25233   if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
25234       any_of(NewMask, [](int M) { return M < 0; }))
25235     for (auto &DemandedSubvector : DemandedSubvectors)
25236       if (DemandedSubvector.second != 0)
25237         return SDValue();
25238 
25239   // We still perform the exact same EXTRACT_SUBVECTOR,  just on different
25240   // operand[s]/index[es], so there is no point in checking for it's legality.
25241 
25242   // Do not turn a legal shuffle into an illegal one.
25243   if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
25244       !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
25245     return SDValue();
25246 
25247   SDLoc DL(N);
25248 
25249   SmallVector<SDValue, 2> NewOps;
25250   for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
25251            &DemandedSubvector : DemandedSubvectors) {
25252     // How many elements into the WideVT does this subvector start?
25253     int Index = NumEltsExtracted * DemandedSubvector.second;
25254     SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
25255     NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
25256                                     DemandedSubvector.first, IndexC));
25257   }
25258   assert((NewOps.size() == 1 || NewOps.size() == 2) &&
25259          "Should end up with either one or two ops");
25260 
25261   // If we ended up with only one operand, pad with an undef.
25262   if (NewOps.size() == 1)
25263     NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
25264 
25265   return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
25266 }
25267 
25268 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
25269   EVT NVT = N->getValueType(0);
25270   SDValue V = N->getOperand(0);
25271   uint64_t ExtIdx = N->getConstantOperandVal(1);
25272   SDLoc DL(N);
25273 
25274   // Extract from UNDEF is UNDEF.
25275   if (V.isUndef())
25276     return DAG.getUNDEF(NVT);
25277 
25278   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
25279     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
25280       return NarrowLoad;
25281 
25282   // Combine an extract of an extract into a single extract_subvector.
25283   // ext (ext X, C), 0 --> ext X, C
25284   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
25285     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
25286                                     V.getConstantOperandVal(1)) &&
25287         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
25288       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
25289                          V.getOperand(1));
25290     }
25291   }
25292 
25293   // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
25294   if (V.getOpcode() == ISD::SPLAT_VECTOR)
25295     if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
25296       if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
25297         return DAG.getSplatVector(NVT, DL, V.getOperand(0));
25298 
25299   // extract_subvector(insert_subvector(x,y,c1),c2)
25300   //  --> extract_subvector(y,c2-c1)
25301   // iff we're just extracting from the inserted subvector.
25302   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
25303     SDValue InsSub = V.getOperand(1);
25304     EVT InsSubVT = InsSub.getValueType();
25305     unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
25306     unsigned InsIdx = V.getConstantOperandVal(2);
25307     unsigned NumSubElts = NVT.getVectorMinNumElements();
25308     if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
25309         TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
25310         InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
25311         V.getValueType().isFixedLengthVector())
25312       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
25313                          DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
25314   }
25315 
25316   // Try to move vector bitcast after extract_subv by scaling extraction index:
25317   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
25318   if (V.getOpcode() == ISD::BITCAST &&
25319       V.getOperand(0).getValueType().isVector() &&
25320       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
25321     SDValue SrcOp = V.getOperand(0);
25322     EVT SrcVT = SrcOp.getValueType();
25323     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
25324     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
25325     if ((SrcNumElts % DestNumElts) == 0) {
25326       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
25327       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
25328       EVT NewExtVT =
25329           EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
25330       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
25331         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
25332         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
25333                                          V.getOperand(0), NewIndex);
25334         return DAG.getBitcast(NVT, NewExtract);
25335       }
25336     }
25337     if ((DestNumElts % SrcNumElts) == 0) {
25338       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
25339       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
25340         ElementCount NewExtEC =
25341             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
25342         EVT ScalarVT = SrcVT.getScalarType();
25343         if ((ExtIdx % DestSrcRatio) == 0) {
25344           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
25345           EVT NewExtVT =
25346               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
25347           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
25348             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
25349             SDValue NewExtract =
25350                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
25351                             V.getOperand(0), NewIndex);
25352             return DAG.getBitcast(NVT, NewExtract);
25353           }
25354           if (NewExtEC.isScalar() &&
25355               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
25356             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
25357             SDValue NewExtract =
25358                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
25359                             V.getOperand(0), NewIndex);
25360             return DAG.getBitcast(NVT, NewExtract);
25361           }
25362         }
25363       }
25364     }
25365   }
25366 
25367   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
25368     unsigned ExtNumElts = NVT.getVectorMinNumElements();
25369     EVT ConcatSrcVT = V.getOperand(0).getValueType();
25370     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
25371            "Concat and extract subvector do not change element type");
25372     assert((ExtIdx % ExtNumElts) == 0 &&
25373            "Extract index is not a multiple of the input vector length.");
25374 
25375     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
25376     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
25377 
25378     // If the concatenated source types match this extract, it's a direct
25379     // simplification:
25380     // extract_subvec (concat V1, V2, ...), i --> Vi
25381     if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
25382       return V.getOperand(ConcatOpIdx);
25383 
25384     // If the concatenated source vectors are a multiple length of this extract,
25385     // then extract a fraction of one of those source vectors directly from a
25386     // concat operand. Example:
25387     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
25388     //   v2i8 extract_subvec v8i8 Y, 6
25389     if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
25390         ConcatSrcNumElts % ExtNumElts == 0) {
25391       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
25392       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
25393              "Trying to extract from >1 concat operand?");
25394       assert(NewExtIdx % ExtNumElts == 0 &&
25395              "Extract index is not a multiple of the input vector length.");
25396       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
25397       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
25398                          V.getOperand(ConcatOpIdx), NewIndexC);
25399     }
25400   }
25401 
25402   if (SDValue V =
25403           foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
25404     return V;
25405 
25406   V = peekThroughBitcasts(V);
25407 
25408   // If the input is a build vector. Try to make a smaller build vector.
25409   if (V.getOpcode() == ISD::BUILD_VECTOR) {
25410     EVT InVT = V.getValueType();
25411     unsigned ExtractSize = NVT.getSizeInBits();
25412     unsigned EltSize = InVT.getScalarSizeInBits();
25413     // Only do this if we won't split any elements.
25414     if (ExtractSize % EltSize == 0) {
25415       unsigned NumElems = ExtractSize / EltSize;
25416       EVT EltVT = InVT.getVectorElementType();
25417       EVT ExtractVT =
25418           NumElems == 1 ? EltVT
25419                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
25420       if ((Level < AfterLegalizeDAG ||
25421            (NumElems == 1 ||
25422             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
25423           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
25424         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
25425 
25426         if (NumElems == 1) {
25427           SDValue Src = V->getOperand(IdxVal);
25428           if (EltVT != Src.getValueType())
25429             Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
25430           return DAG.getBitcast(NVT, Src);
25431         }
25432 
25433         // Extract the pieces from the original build_vector.
25434         SDValue BuildVec =
25435             DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
25436         return DAG.getBitcast(NVT, BuildVec);
25437       }
25438     }
25439   }
25440 
25441   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
25442     // Handle only simple case where vector being inserted and vector
25443     // being extracted are of same size.
25444     EVT SmallVT = V.getOperand(1).getValueType();
25445     if (!NVT.bitsEq(SmallVT))
25446       return SDValue();
25447 
25448     // Combine:
25449     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
25450     // Into:
25451     //    indices are equal or bit offsets are equal => V1
25452     //    otherwise => (extract_subvec V1, ExtIdx)
25453     uint64_t InsIdx = V.getConstantOperandVal(2);
25454     if (InsIdx * SmallVT.getScalarSizeInBits() ==
25455         ExtIdx * NVT.getScalarSizeInBits()) {
25456       if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
25457         return SDValue();
25458 
25459       return DAG.getBitcast(NVT, V.getOperand(1));
25460     }
25461     return DAG.getNode(
25462         ISD::EXTRACT_SUBVECTOR, DL, NVT,
25463         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
25464         N->getOperand(1));
25465   }
25466 
25467   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
25468     return NarrowBOp;
25469 
25470   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
25471     return SDValue(N, 0);
25472 
25473   return SDValue();
25474 }
25475 
25476 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
25477 /// followed by concatenation. Narrow vector ops may have better performance
25478 /// than wide ops, and this can unlock further narrowing of other vector ops.
25479 /// Targets can invert this transform later if it is not profitable.
25480 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
25481                                          SelectionDAG &DAG) {
25482   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
25483   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
25484       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
25485       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
25486     return SDValue();
25487 
25488   // Split the wide shuffle mask into halves. Any mask element that is accessing
25489   // operand 1 is offset down to account for narrowing of the vectors.
25490   ArrayRef<int> Mask = Shuf->getMask();
25491   EVT VT = Shuf->getValueType(0);
25492   unsigned NumElts = VT.getVectorNumElements();
25493   unsigned HalfNumElts = NumElts / 2;
25494   SmallVector<int, 16> Mask0(HalfNumElts, -1);
25495   SmallVector<int, 16> Mask1(HalfNumElts, -1);
25496   for (unsigned i = 0; i != NumElts; ++i) {
25497     if (Mask[i] == -1)
25498       continue;
25499     // If we reference the upper (undef) subvector then the element is undef.
25500     if ((Mask[i] % NumElts) >= HalfNumElts)
25501       continue;
25502     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
25503     if (i < HalfNumElts)
25504       Mask0[i] = M;
25505     else
25506       Mask1[i - HalfNumElts] = M;
25507   }
25508 
25509   // Ask the target if this is a valid transform.
25510   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25511   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
25512                                 HalfNumElts);
25513   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
25514       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
25515     return SDValue();
25516 
25517   // shuffle (concat X, undef), (concat Y, undef), Mask -->
25518   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
25519   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
25520   SDLoc DL(Shuf);
25521   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
25522   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
25523   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
25524 }
25525 
25526 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
25527 // or turn a shuffle of a single concat into simpler shuffle then concat.
25528 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
25529   EVT VT = N->getValueType(0);
25530   unsigned NumElts = VT.getVectorNumElements();
25531 
25532   SDValue N0 = N->getOperand(0);
25533   SDValue N1 = N->getOperand(1);
25534   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25535   ArrayRef<int> Mask = SVN->getMask();
25536 
25537   SmallVector<SDValue, 4> Ops;
25538   EVT ConcatVT = N0.getOperand(0).getValueType();
25539   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
25540   unsigned NumConcats = NumElts / NumElemsPerConcat;
25541 
25542   auto IsUndefMaskElt = [](int i) { return i == -1; };
25543 
25544   // Special case: shuffle(concat(A,B)) can be more efficiently represented
25545   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
25546   // half vector elements.
25547   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
25548       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
25549                    IsUndefMaskElt)) {
25550     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
25551                               N0.getOperand(1),
25552                               Mask.slice(0, NumElemsPerConcat));
25553     N1 = DAG.getUNDEF(ConcatVT);
25554     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
25555   }
25556 
25557   // Look at every vector that's inserted. We're looking for exact
25558   // subvector-sized copies from a concatenated vector
25559   for (unsigned I = 0; I != NumConcats; ++I) {
25560     unsigned Begin = I * NumElemsPerConcat;
25561     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
25562 
25563     // Make sure we're dealing with a copy.
25564     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
25565       Ops.push_back(DAG.getUNDEF(ConcatVT));
25566       continue;
25567     }
25568 
25569     int OpIdx = -1;
25570     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
25571       if (IsUndefMaskElt(SubMask[i]))
25572         continue;
25573       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
25574         return SDValue();
25575       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
25576       if (0 <= OpIdx && EltOpIdx != OpIdx)
25577         return SDValue();
25578       OpIdx = EltOpIdx;
25579     }
25580     assert(0 <= OpIdx && "Unknown concat_vectors op");
25581 
25582     if (OpIdx < (int)N0.getNumOperands())
25583       Ops.push_back(N0.getOperand(OpIdx));
25584     else
25585       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
25586   }
25587 
25588   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25589 }
25590 
25591 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25592 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25593 //
25594 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
25595 // a simplification in some sense, but it isn't appropriate in general: some
25596 // BUILD_VECTORs are substantially cheaper than others. The general case
25597 // of a BUILD_VECTOR requires inserting each element individually (or
25598 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
25599 // all constants is a single constant pool load.  A BUILD_VECTOR where each
25600 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
25601 // are undef lowers to a small number of element insertions.
25602 //
25603 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
25604 // We don't fold shuffles where one side is a non-zero constant, and we don't
25605 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
25606 // non-constant operands. This seems to work out reasonably well in practice.
25607 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
25608                                        SelectionDAG &DAG,
25609                                        const TargetLowering &TLI) {
25610   EVT VT = SVN->getValueType(0);
25611   unsigned NumElts = VT.getVectorNumElements();
25612   SDValue N0 = SVN->getOperand(0);
25613   SDValue N1 = SVN->getOperand(1);
25614 
25615   if (!N0->hasOneUse())
25616     return SDValue();
25617 
25618   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
25619   // discussed above.
25620   if (!N1.isUndef()) {
25621     if (!N1->hasOneUse())
25622       return SDValue();
25623 
25624     bool N0AnyConst = isAnyConstantBuildVector(N0);
25625     bool N1AnyConst = isAnyConstantBuildVector(N1);
25626     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
25627       return SDValue();
25628     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
25629       return SDValue();
25630   }
25631 
25632   // If both inputs are splats of the same value then we can safely merge this
25633   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
25634   bool IsSplat = false;
25635   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
25636   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
25637   if (BV0 && BV1)
25638     if (SDValue Splat0 = BV0->getSplatValue())
25639       IsSplat = (Splat0 == BV1->getSplatValue());
25640 
25641   SmallVector<SDValue, 8> Ops;
25642   SmallSet<SDValue, 16> DuplicateOps;
25643   for (int M : SVN->getMask()) {
25644     SDValue Op = DAG.getUNDEF(VT.getScalarType());
25645     if (M >= 0) {
25646       int Idx = M < (int)NumElts ? M : M - NumElts;
25647       SDValue &S = (M < (int)NumElts ? N0 : N1);
25648       if (S.getOpcode() == ISD::BUILD_VECTOR) {
25649         Op = S.getOperand(Idx);
25650       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
25651         SDValue Op0 = S.getOperand(0);
25652         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
25653       } else {
25654         // Operand can't be combined - bail out.
25655         return SDValue();
25656       }
25657     }
25658 
25659     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
25660     // generating a splat; semantically, this is fine, but it's likely to
25661     // generate low-quality code if the target can't reconstruct an appropriate
25662     // shuffle.
25663     if (!Op.isUndef() && !isIntOrFPConstant(Op))
25664       if (!IsSplat && !DuplicateOps.insert(Op).second)
25665         return SDValue();
25666 
25667     Ops.push_back(Op);
25668   }
25669 
25670   // BUILD_VECTOR requires all inputs to be of the same type, find the
25671   // maximum type and extend them all.
25672   EVT SVT = VT.getScalarType();
25673   if (SVT.isInteger())
25674     for (SDValue &Op : Ops)
25675       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
25676   if (SVT != VT.getScalarType())
25677     for (SDValue &Op : Ops)
25678       Op = Op.isUndef() ? DAG.getUNDEF(SVT)
25679                         : (TLI.isZExtFree(Op.getValueType(), SVT)
25680                                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
25681                                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
25682   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
25683 }
25684 
25685 // Match shuffles that can be converted to *_vector_extend_in_reg.
25686 // This is often generated during legalization.
25687 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
25688 // and returns the EVT to which the extension should be performed.
25689 // NOTE: this assumes that the src is the first operand of the shuffle.
25690 static std::optional<EVT> canCombineShuffleToExtendVectorInreg(
25691     unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
25692     SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25693     bool LegalOperations) {
25694   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25695 
25696   // TODO Add support for big-endian when we have a test case.
25697   if (!VT.isInteger() || IsBigEndian)
25698     return std::nullopt;
25699 
25700   unsigned NumElts = VT.getVectorNumElements();
25701   unsigned EltSizeInBits = VT.getScalarSizeInBits();
25702 
25703   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
25704   // power-of-2 extensions as they are the most likely.
25705   // FIXME: should try Scale == NumElts case too,
25706   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
25707     // The vector width must be a multiple of Scale.
25708     if (NumElts % Scale != 0)
25709       continue;
25710 
25711     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
25712     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
25713 
25714     if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
25715         (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
25716       continue;
25717 
25718     if (Match(Scale))
25719       return OutVT;
25720   }
25721 
25722   return std::nullopt;
25723 }
25724 
25725 // Match shuffles that can be converted to any_vector_extend_in_reg.
25726 // This is often generated during legalization.
25727 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
25728 static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN,
25729                                                     SelectionDAG &DAG,
25730                                                     const TargetLowering &TLI,
25731                                                     bool LegalOperations) {
25732   EVT VT = SVN->getValueType(0);
25733   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25734 
25735   // TODO Add support for big-endian when we have a test case.
25736   if (!VT.isInteger() || IsBigEndian)
25737     return SDValue();
25738 
25739   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
25740   auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
25741                       Mask = SVN->getMask()](unsigned Scale) {
25742     for (unsigned i = 0; i != NumElts; ++i) {
25743       if (Mask[i] < 0)
25744         continue;
25745       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
25746         continue;
25747       return false;
25748     }
25749     return true;
25750   };
25751 
25752   unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
25753   SDValue N0 = SVN->getOperand(0);
25754   // Never create an illegal type. Only create unsupported operations if we
25755   // are pre-legalization.
25756   std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25757       Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
25758   if (!OutVT)
25759     return SDValue();
25760   return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
25761 }
25762 
25763 // Match shuffles that can be converted to zero_extend_vector_inreg.
25764 // This is often generated during legalization.
25765 // e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25766 static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN,
25767                                                      SelectionDAG &DAG,
25768                                                      const TargetLowering &TLI,
25769                                                      bool LegalOperations) {
25770   bool LegalTypes = true;
25771   EVT VT = SVN->getValueType(0);
25772   assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
25773   unsigned NumElts = VT.getVectorNumElements();
25774   unsigned EltSizeInBits = VT.getScalarSizeInBits();
25775 
25776   // TODO: add support for big-endian when we have a test case.
25777   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25778   if (!VT.isInteger() || IsBigEndian)
25779     return SDValue();
25780 
25781   SmallVector<int, 16> Mask(SVN->getMask());
25782   auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25783     for (int &Indice : Mask) {
25784       if (Indice < 0)
25785         continue;
25786       int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25787       int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25788       Fn(Indice, OpIdx, OpEltIdx);
25789     }
25790   };
25791 
25792   // Which elements of which operand does this shuffle demand?
25793   std::array<APInt, 2> OpsDemandedElts;
25794   for (APInt &OpDemandedElts : OpsDemandedElts)
25795     OpDemandedElts = APInt::getZero(NumElts);
25796   ForEachDecomposedIndice(
25797       [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25798         OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25799       });
25800 
25801   // Element-wise(!), which of these demanded elements are know to be zero?
25802   std::array<APInt, 2> OpsKnownZeroElts;
25803   for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25804     std::get<2>(I) =
25805         DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25806 
25807   // Manifest zeroable element knowledge in the shuffle mask.
25808   // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25809   //       this is a local invention, but it won't leak into DAG.
25810   // FIXME: should we not manifest them, but just check when matching?
25811   bool HadZeroableElts = false;
25812   ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25813                               int &Indice, int OpIdx, int OpEltIdx) {
25814     if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25815       Indice = -2; // Zeroable element.
25816       HadZeroableElts = true;
25817     }
25818   });
25819 
25820   // Don't proceed unless we've refined at least one zeroable mask indice.
25821   // If we didn't, then we are still trying to match the same shuffle mask
25822   // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25823   // and evidently failed. Proceeding will lead to endless combine loops.
25824   if (!HadZeroableElts)
25825     return SDValue();
25826 
25827   // The shuffle may be more fine-grained than we want. Widen elements first.
25828   // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25829   SmallVector<int, 16> ScaledMask;
25830   getShuffleMaskWithWidestElts(Mask, ScaledMask);
25831   assert(Mask.size() >= ScaledMask.size() &&
25832          Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25833   int Prescale = Mask.size() / ScaledMask.size();
25834 
25835   NumElts = ScaledMask.size();
25836   EltSizeInBits *= Prescale;
25837 
25838   EVT PrescaledVT = EVT::getVectorVT(
25839       *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25840       NumElts);
25841 
25842   if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25843     return SDValue();
25844 
25845   // For example,
25846   // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25847   // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25848   auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25849     assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25850            "Unexpected mask scaling factor.");
25851     ArrayRef<int> Mask = ScaledMask;
25852     for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25853          SrcElt != NumSrcElts; ++SrcElt) {
25854       // Analyze the shuffle mask in Scale-sized chunks.
25855       ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25856       assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25857       Mask = Mask.drop_front(MaskChunk.size());
25858       // The first indice in this chunk must be SrcElt, but not zero!
25859       // FIXME: undef should be fine, but that results in more-defined result.
25860       if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25861         return false;
25862       // The rest of the indices in this chunk must be zeros.
25863       // FIXME: undef should be fine, but that results in more-defined result.
25864       if (!all_of(MaskChunk.drop_front(1),
25865                   [](int Indice) { return Indice == -2; }))
25866         return false;
25867     }
25868     assert(Mask.empty() && "Did not process the whole mask?");
25869     return true;
25870   };
25871 
25872   unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25873   for (bool Commuted : {false, true}) {
25874     SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25875     if (Commuted)
25876       ShuffleVectorSDNode::commuteMask(ScaledMask);
25877     std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25878         Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25879         LegalOperations);
25880     if (OutVT)
25881       return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25882                                             DAG.getBitcast(PrescaledVT, Op)));
25883   }
25884   return SDValue();
25885 }
25886 
25887 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25888 // each source element of a large type into the lowest elements of a smaller
25889 // destination type. This is often generated during legalization.
25890 // If the source node itself was a '*_extend_vector_inreg' node then we should
25891 // then be able to remove it.
25892 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
25893                                         SelectionDAG &DAG) {
25894   EVT VT = SVN->getValueType(0);
25895   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25896 
25897   // TODO Add support for big-endian when we have a test case.
25898   if (!VT.isInteger() || IsBigEndian)
25899     return SDValue();
25900 
25901   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
25902 
25903   unsigned Opcode = N0.getOpcode();
25904   if (!ISD::isExtVecInRegOpcode(Opcode))
25905     return SDValue();
25906 
25907   SDValue N00 = N0.getOperand(0);
25908   ArrayRef<int> Mask = SVN->getMask();
25909   unsigned NumElts = VT.getVectorNumElements();
25910   unsigned EltSizeInBits = VT.getScalarSizeInBits();
25911   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25912   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25913 
25914   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25915     return SDValue();
25916   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25917 
25918   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25919   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25920   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25921   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25922     for (unsigned i = 0; i != NumElts; ++i) {
25923       if (Mask[i] < 0)
25924         continue;
25925       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25926         continue;
25927       return false;
25928     }
25929     return true;
25930   };
25931 
25932   // At the moment we just handle the case where we've truncated back to the
25933   // same size as before the extension.
25934   // TODO: handle more extension/truncation cases as cases arise.
25935   if (EltSizeInBits != ExtSrcSizeInBits)
25936     return SDValue();
25937 
25938   // We can remove *extend_vector_inreg only if the truncation happens at
25939   // the same scale as the extension.
25940   if (isTruncate(ExtScale))
25941     return DAG.getBitcast(VT, N00);
25942 
25943   return SDValue();
25944 }
25945 
25946 // Combine shuffles of splat-shuffles of the form:
25947 // shuffle (shuffle V, undef, splat-mask), undef, M
25948 // If splat-mask contains undef elements, we need to be careful about
25949 // introducing undef's in the folded mask which are not the result of composing
25950 // the masks of the shuffles.
25951 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
25952                                         SelectionDAG &DAG) {
25953   EVT VT = Shuf->getValueType(0);
25954   unsigned NumElts = VT.getVectorNumElements();
25955 
25956   if (!Shuf->getOperand(1).isUndef())
25957     return SDValue();
25958 
25959   // See if this unary non-splat shuffle actually *is* a splat shuffle,
25960   // in disguise, with all demanded elements being identical.
25961   // FIXME: this can be done per-operand.
25962   if (!Shuf->isSplat()) {
25963     APInt DemandedElts(NumElts, 0);
25964     for (int Idx : Shuf->getMask()) {
25965       if (Idx < 0)
25966         continue; // Ignore sentinel indices.
25967       assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25968       DemandedElts.setBit(Idx);
25969     }
25970     assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25971     APInt UndefElts;
25972     if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25973       // Even if all demanded elements are splat, some of them could be undef.
25974       // Which lowest demanded element is *not* known-undef?
25975       std::optional<unsigned> MinNonUndefIdx;
25976       for (int Idx : Shuf->getMask()) {
25977         if (Idx < 0 || UndefElts[Idx])
25978           continue; // Ignore sentinel indices, and undef elements.
25979         MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25980       }
25981       if (!MinNonUndefIdx)
25982         return DAG.getUNDEF(VT); // All undef - result is undef.
25983       assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25984       SmallVector<int, 8> SplatMask(Shuf->getMask());
25985       for (int &Idx : SplatMask) {
25986         if (Idx < 0)
25987           continue; // Passthrough sentinel indices.
25988         // Otherwise, just pick the lowest demanded non-undef element.
25989         // Or sentinel undef, if we know we'd pick a known-undef element.
25990         Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25991       }
25992       assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25993       return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25994                                   Shuf->getOperand(1), SplatMask);
25995     }
25996   }
25997 
25998   // If the inner operand is a known splat with no undefs, just return that directly.
25999   // TODO: Create DemandedElts mask from Shuf's mask.
26000   // TODO: Allow undef elements and merge with the shuffle code below.
26001   if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
26002     return Shuf->getOperand(0);
26003 
26004   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26005   if (!Splat || !Splat->isSplat())
26006     return SDValue();
26007 
26008   ArrayRef<int> ShufMask = Shuf->getMask();
26009   ArrayRef<int> SplatMask = Splat->getMask();
26010   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
26011 
26012   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
26013   // every undef mask element in the splat-shuffle has a corresponding undef
26014   // element in the user-shuffle's mask or if the composition of mask elements
26015   // would result in undef.
26016   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
26017   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
26018   //   In this case it is not legal to simplify to the splat-shuffle because we
26019   //   may be exposing the users of the shuffle an undef element at index 1
26020   //   which was not there before the combine.
26021   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
26022   //   In this case the composition of masks yields SplatMask, so it's ok to
26023   //   simplify to the splat-shuffle.
26024   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
26025   //   In this case the composed mask includes all undef elements of SplatMask
26026   //   and in addition sets element zero to undef. It is safe to simplify to
26027   //   the splat-shuffle.
26028   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
26029                                        ArrayRef<int> SplatMask) {
26030     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
26031       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
26032           SplatMask[UserMask[i]] != -1)
26033         return false;
26034     return true;
26035   };
26036   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
26037     return Shuf->getOperand(0);
26038 
26039   // Create a new shuffle with a mask that is composed of the two shuffles'
26040   // masks.
26041   SmallVector<int, 32> NewMask;
26042   for (int Idx : ShufMask)
26043     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
26044 
26045   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
26046                               Splat->getOperand(0), Splat->getOperand(1),
26047                               NewMask);
26048 }
26049 
26050 // Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
26051 // the mask can be treated as a larger type.
26052 static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN,
26053                                        SelectionDAG &DAG,
26054                                        const TargetLowering &TLI,
26055                                        bool LegalOperations) {
26056   SDValue Op0 = SVN->getOperand(0);
26057   SDValue Op1 = SVN->getOperand(1);
26058   EVT VT = SVN->getValueType(0);
26059   if (Op0.getOpcode() != ISD::BITCAST)
26060     return SDValue();
26061   EVT InVT = Op0.getOperand(0).getValueType();
26062   if (!InVT.isVector() ||
26063       (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
26064                           Op1.getOperand(0).getValueType() != InVT)))
26065     return SDValue();
26066   if (isAnyConstantBuildVector(Op0.getOperand(0)) &&
26067       (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
26068     return SDValue();
26069 
26070   int VTLanes = VT.getVectorNumElements();
26071   int InLanes = InVT.getVectorNumElements();
26072   if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
26073       (LegalOperations &&
26074        !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT)))
26075     return SDValue();
26076   int Factor = VTLanes / InLanes;
26077 
26078   // Check that each group of lanes in the mask are either undef or make a valid
26079   // mask for the wider lane type.
26080   ArrayRef<int> Mask = SVN->getMask();
26081   SmallVector<int> NewMask;
26082   if (!widenShuffleMaskElts(Factor, Mask, NewMask))
26083     return SDValue();
26084 
26085   if (!TLI.isShuffleMaskLegal(NewMask, InVT))
26086     return SDValue();
26087 
26088   // Create the new shuffle with the new mask and bitcast it back to the
26089   // original type.
26090   SDLoc DL(SVN);
26091   Op0 = Op0.getOperand(0);
26092   Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
26093   SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
26094   return DAG.getBitcast(VT, NewShuf);
26095 }
26096 
26097 /// Combine shuffle of shuffle of the form:
26098 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
26099 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
26100                                      SelectionDAG &DAG) {
26101   if (!OuterShuf->getOperand(1).isUndef())
26102     return SDValue();
26103   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
26104   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
26105     return SDValue();
26106 
26107   ArrayRef<int> OuterMask = OuterShuf->getMask();
26108   ArrayRef<int> InnerMask = InnerShuf->getMask();
26109   unsigned NumElts = OuterMask.size();
26110   assert(NumElts == InnerMask.size() && "Mask length mismatch");
26111   SmallVector<int, 32> CombinedMask(NumElts, -1);
26112   int SplatIndex = -1;
26113   for (unsigned i = 0; i != NumElts; ++i) {
26114     // Undef lanes remain undef.
26115     int OuterMaskElt = OuterMask[i];
26116     if (OuterMaskElt == -1)
26117       continue;
26118 
26119     // Peek through the shuffle masks to get the underlying source element.
26120     int InnerMaskElt = InnerMask[OuterMaskElt];
26121     if (InnerMaskElt == -1)
26122       continue;
26123 
26124     // Initialize the splatted element.
26125     if (SplatIndex == -1)
26126       SplatIndex = InnerMaskElt;
26127 
26128     // Non-matching index - this is not a splat.
26129     if (SplatIndex != InnerMaskElt)
26130       return SDValue();
26131 
26132     CombinedMask[i] = InnerMaskElt;
26133   }
26134   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
26135           getSplatIndex(CombinedMask) != -1) &&
26136          "Expected a splat mask");
26137 
26138   // TODO: The transform may be a win even if the mask is not legal.
26139   EVT VT = OuterShuf->getValueType(0);
26140   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
26141   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
26142     return SDValue();
26143 
26144   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
26145                               InnerShuf->getOperand(1), CombinedMask);
26146 }
26147 
26148 /// If the shuffle mask is taking exactly one element from the first vector
26149 /// operand and passing through all other elements from the second vector
26150 /// operand, return the index of the mask element that is choosing an element
26151 /// from the first operand. Otherwise, return -1.
26152 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
26153   int MaskSize = Mask.size();
26154   int EltFromOp0 = -1;
26155   // TODO: This does not match if there are undef elements in the shuffle mask.
26156   // Should we ignore undefs in the shuffle mask instead? The trade-off is
26157   // removing an instruction (a shuffle), but losing the knowledge that some
26158   // vector lanes are not needed.
26159   for (int i = 0; i != MaskSize; ++i) {
26160     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
26161       // We're looking for a shuffle of exactly one element from operand 0.
26162       if (EltFromOp0 != -1)
26163         return -1;
26164       EltFromOp0 = i;
26165     } else if (Mask[i] != i + MaskSize) {
26166       // Nothing from operand 1 can change lanes.
26167       return -1;
26168     }
26169   }
26170   return EltFromOp0;
26171 }
26172 
26173 /// If a shuffle inserts exactly one element from a source vector operand into
26174 /// another vector operand and we can access the specified element as a scalar,
26175 /// then we can eliminate the shuffle.
26176 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
26177                                       SelectionDAG &DAG) {
26178   // First, check if we are taking one element of a vector and shuffling that
26179   // element into another vector.
26180   ArrayRef<int> Mask = Shuf->getMask();
26181   SmallVector<int, 16> CommutedMask(Mask);
26182   SDValue Op0 = Shuf->getOperand(0);
26183   SDValue Op1 = Shuf->getOperand(1);
26184   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
26185   if (ShufOp0Index == -1) {
26186     // Commute mask and check again.
26187     ShuffleVectorSDNode::commuteMask(CommutedMask);
26188     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
26189     if (ShufOp0Index == -1)
26190       return SDValue();
26191     // Commute operands to match the commuted shuffle mask.
26192     std::swap(Op0, Op1);
26193     Mask = CommutedMask;
26194   }
26195 
26196   // The shuffle inserts exactly one element from operand 0 into operand 1.
26197   // Now see if we can access that element as a scalar via a real insert element
26198   // instruction.
26199   // TODO: We can try harder to locate the element as a scalar. Examples: it
26200   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
26201   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
26202          "Shuffle mask value must be from operand 0");
26203 
26204   SDValue Elt;
26205   if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
26206                                 m_SpecificInt(Mask[ShufOp0Index])))) {
26207     // There's an existing insertelement with constant insertion index, so we
26208     // don't need to check the legality/profitability of a replacement operation
26209     // that differs at most in the constant value. The target should be able to
26210     // lower any of those in a similar way. If not, legalization will expand
26211     // this to a scalar-to-vector plus shuffle.
26212     //
26213     // Note that the shuffle may move the scalar from the position that the
26214     // insert element used. Therefore, our new insert element occurs at the
26215     // shuffle's mask index value, not the insert's index value.
26216     //
26217     // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
26218     SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
26219     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
26220                        Op1, Elt, NewInsIndex);
26221   }
26222 
26223   return SDValue();
26224 }
26225 
26226 /// If we have a unary shuffle of a shuffle, see if it can be folded away
26227 /// completely. This has the potential to lose undef knowledge because the first
26228 /// shuffle may not have an undef mask element where the second one does. So
26229 /// only call this after doing simplifications based on demanded elements.
26230 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
26231   // shuf (shuf0 X, Y, Mask0), undef, Mask
26232   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26233   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
26234     return SDValue();
26235 
26236   ArrayRef<int> Mask = Shuf->getMask();
26237   ArrayRef<int> Mask0 = Shuf0->getMask();
26238   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
26239     // Ignore undef elements.
26240     if (Mask[i] == -1)
26241       continue;
26242     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
26243 
26244     // Is the element of the shuffle operand chosen by this shuffle the same as
26245     // the element chosen by the shuffle operand itself?
26246     if (Mask0[Mask[i]] != Mask0[i])
26247       return SDValue();
26248   }
26249   // Every element of this shuffle is identical to the result of the previous
26250   // shuffle, so we can replace this value.
26251   return Shuf->getOperand(0);
26252 }
26253 
26254 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
26255   EVT VT = N->getValueType(0);
26256   unsigned NumElts = VT.getVectorNumElements();
26257 
26258   SDValue N0 = N->getOperand(0);
26259   SDValue N1 = N->getOperand(1);
26260 
26261   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
26262 
26263   // Canonicalize shuffle undef, undef -> undef
26264   if (N0.isUndef() && N1.isUndef())
26265     return DAG.getUNDEF(VT);
26266 
26267   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
26268 
26269   // Canonicalize shuffle v, v -> v, undef
26270   if (N0 == N1)
26271     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
26272                                 createUnaryMask(SVN->getMask(), NumElts));
26273 
26274   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
26275   if (N0.isUndef())
26276     return DAG.getCommutedVectorShuffle(*SVN);
26277 
26278   // Remove references to rhs if it is undef
26279   if (N1.isUndef()) {
26280     bool Changed = false;
26281     SmallVector<int, 8> NewMask;
26282     for (unsigned i = 0; i != NumElts; ++i) {
26283       int Idx = SVN->getMaskElt(i);
26284       if (Idx >= (int)NumElts) {
26285         Idx = -1;
26286         Changed = true;
26287       }
26288       NewMask.push_back(Idx);
26289     }
26290     if (Changed)
26291       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
26292   }
26293 
26294   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
26295     return InsElt;
26296 
26297   // A shuffle of a single vector that is a splatted value can always be folded.
26298   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
26299     return V;
26300 
26301   if (SDValue V = formSplatFromShuffles(SVN, DAG))
26302     return V;
26303 
26304   // If it is a splat, check if the argument vector is another splat or a
26305   // build_vector.
26306   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
26307     int SplatIndex = SVN->getSplatIndex();
26308     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
26309         TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
26310       // splat (vector_bo L, R), Index -->
26311       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
26312       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
26313       SDLoc DL(N);
26314       EVT EltVT = VT.getScalarType();
26315       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
26316       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
26317       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
26318       SDValue NewBO =
26319           DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
26320       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
26321       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
26322       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
26323     }
26324 
26325     // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
26326     // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
26327     if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
26328         N0.hasOneUse()) {
26329       if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
26330         return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
26331 
26332       if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT)
26333         if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
26334           if (Idx->getAPIntValue() == SplatIndex)
26335             return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
26336 
26337       // Look through a bitcast if LE and splatting lane 0, through to a
26338       // scalar_to_vector or a build_vector.
26339       if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
26340           SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
26341           (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR ||
26342            N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) {
26343         EVT N00VT = N0.getOperand(0).getValueType();
26344         if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
26345             VT.isInteger() && N00VT.isInteger()) {
26346           EVT InVT =
26347               TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType());
26348           SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0),
26349                                           SDLoc(N), InVT);
26350           return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
26351         }
26352       }
26353     }
26354 
26355     // If this is a bit convert that changes the element type of the vector but
26356     // not the number of vector elements, look through it.  Be careful not to
26357     // look though conversions that change things like v4f32 to v2f64.
26358     SDNode *V = N0.getNode();
26359     if (V->getOpcode() == ISD::BITCAST) {
26360       SDValue ConvInput = V->getOperand(0);
26361       if (ConvInput.getValueType().isVector() &&
26362           ConvInput.getValueType().getVectorNumElements() == NumElts)
26363         V = ConvInput.getNode();
26364     }
26365 
26366     if (V->getOpcode() == ISD::BUILD_VECTOR) {
26367       assert(V->getNumOperands() == NumElts &&
26368              "BUILD_VECTOR has wrong number of operands");
26369       SDValue Base;
26370       bool AllSame = true;
26371       for (unsigned i = 0; i != NumElts; ++i) {
26372         if (!V->getOperand(i).isUndef()) {
26373           Base = V->getOperand(i);
26374           break;
26375         }
26376       }
26377       // Splat of <u, u, u, u>, return <u, u, u, u>
26378       if (!Base.getNode())
26379         return N0;
26380       for (unsigned i = 0; i != NumElts; ++i) {
26381         if (V->getOperand(i) != Base) {
26382           AllSame = false;
26383           break;
26384         }
26385       }
26386       // Splat of <x, x, x, x>, return <x, x, x, x>
26387       if (AllSame)
26388         return N0;
26389 
26390       // Canonicalize any other splat as a build_vector, but avoid defining any
26391       // undefined elements in the mask.
26392       SDValue Splatted = V->getOperand(SplatIndex);
26393       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
26394       EVT EltVT = Splatted.getValueType();
26395 
26396       for (unsigned i = 0; i != NumElts; ++i) {
26397         if (SVN->getMaskElt(i) < 0)
26398           Ops[i] = DAG.getUNDEF(EltVT);
26399       }
26400 
26401       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
26402 
26403       // We may have jumped through bitcasts, so the type of the
26404       // BUILD_VECTOR may not match the type of the shuffle.
26405       if (V->getValueType(0) != VT)
26406         NewBV = DAG.getBitcast(VT, NewBV);
26407       return NewBV;
26408     }
26409   }
26410 
26411   // Simplify source operands based on shuffle mask.
26412   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
26413     return SDValue(N, 0);
26414 
26415   // This is intentionally placed after demanded elements simplification because
26416   // it could eliminate knowledge of undef elements created by this shuffle.
26417   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
26418     return ShufOp;
26419 
26420   // Match shuffles that can be converted to any_vector_extend_in_reg.
26421   if (SDValue V =
26422           combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
26423     return V;
26424 
26425   // Combine "truncate_vector_in_reg" style shuffles.
26426   if (SDValue V = combineTruncationShuffle(SVN, DAG))
26427     return V;
26428 
26429   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
26430       Level < AfterLegalizeVectorOps &&
26431       (N1.isUndef() ||
26432       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
26433        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
26434     if (SDValue V = partitionShuffleOfConcats(N, DAG))
26435       return V;
26436   }
26437 
26438   // A shuffle of a concat of the same narrow vector can be reduced to use
26439   // only low-half elements of a concat with undef:
26440   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
26441   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
26442       N0.getNumOperands() == 2 &&
26443       N0.getOperand(0) == N0.getOperand(1)) {
26444     int HalfNumElts = (int)NumElts / 2;
26445     SmallVector<int, 8> NewMask;
26446     for (unsigned i = 0; i != NumElts; ++i) {
26447       int Idx = SVN->getMaskElt(i);
26448       if (Idx >= HalfNumElts) {
26449         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
26450         Idx -= HalfNumElts;
26451       }
26452       NewMask.push_back(Idx);
26453     }
26454     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
26455       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
26456       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
26457                                    N0.getOperand(0), UndefVec);
26458       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
26459     }
26460   }
26461 
26462   // See if we can replace a shuffle with an insert_subvector.
26463   // e.g. v2i32 into v8i32:
26464   // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
26465   // --> insert_subvector(lhs,rhs1,4).
26466   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
26467       TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
26468     auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
26469       // Ensure RHS subvectors are legal.
26470       assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
26471       EVT SubVT = RHS.getOperand(0).getValueType();
26472       int NumSubVecs = RHS.getNumOperands();
26473       int NumSubElts = SubVT.getVectorNumElements();
26474       assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
26475       if (!TLI.isTypeLegal(SubVT))
26476         return SDValue();
26477 
26478       // Don't bother if we have an unary shuffle (matches undef + LHS elts).
26479       if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
26480         return SDValue();
26481 
26482       // Search [NumSubElts] spans for RHS sequence.
26483       // TODO: Can we avoid nested loops to increase performance?
26484       SmallVector<int> InsertionMask(NumElts);
26485       for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
26486         for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
26487           // Reset mask to identity.
26488           std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
26489 
26490           // Add subvector insertion.
26491           std::iota(InsertionMask.begin() + SubIdx,
26492                     InsertionMask.begin() + SubIdx + NumSubElts,
26493                     NumElts + (SubVec * NumSubElts));
26494 
26495           // See if the shuffle mask matches the reference insertion mask.
26496           bool MatchingShuffle = true;
26497           for (int i = 0; i != (int)NumElts; ++i) {
26498             int ExpectIdx = InsertionMask[i];
26499             int ActualIdx = Mask[i];
26500             if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
26501               MatchingShuffle = false;
26502               break;
26503             }
26504           }
26505 
26506           if (MatchingShuffle)
26507             return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
26508                                RHS.getOperand(SubVec),
26509                                DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
26510         }
26511       }
26512       return SDValue();
26513     };
26514     ArrayRef<int> Mask = SVN->getMask();
26515     if (N1.getOpcode() == ISD::CONCAT_VECTORS)
26516       if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
26517         return InsertN1;
26518     if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
26519       SmallVector<int> CommuteMask(Mask);
26520       ShuffleVectorSDNode::commuteMask(CommuteMask);
26521       if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
26522         return InsertN0;
26523     }
26524   }
26525 
26526   // If we're not performing a select/blend shuffle, see if we can convert the
26527   // shuffle into a AND node, with all the out-of-lane elements are known zero.
26528   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26529     bool IsInLaneMask = true;
26530     ArrayRef<int> Mask = SVN->getMask();
26531     SmallVector<int, 16> ClearMask(NumElts, -1);
26532     APInt DemandedLHS = APInt::getZero(NumElts);
26533     APInt DemandedRHS = APInt::getZero(NumElts);
26534     for (int I = 0; I != (int)NumElts; ++I) {
26535       int M = Mask[I];
26536       if (M < 0)
26537         continue;
26538       ClearMask[I] = M == I ? I : (I + NumElts);
26539       IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
26540       if (M != I) {
26541         APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
26542         Demanded.setBit(M % NumElts);
26543       }
26544     }
26545     // TODO: Should we try to mask with N1 as well?
26546     if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
26547         (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
26548         (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
26549       SDLoc DL(N);
26550       EVT IntVT = VT.changeVectorElementTypeToInteger();
26551       EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
26552       // Transform the type to a legal type so that the buildvector constant
26553       // elements are not illegal. Make sure that the result is larger than the
26554       // original type, incase the value is split into two (eg i64->i32).
26555       if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
26556         IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
26557       if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
26558         SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
26559         SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
26560         SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
26561         for (int I = 0; I != (int)NumElts; ++I)
26562           if (0 <= Mask[I])
26563             AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
26564 
26565         // See if a clear mask is legal instead of going via
26566         // XformToShuffleWithZero which loses UNDEF mask elements.
26567         if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
26568           return DAG.getBitcast(
26569               VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
26570                                       DAG.getConstant(0, DL, IntVT), ClearMask));
26571 
26572         if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
26573           return DAG.getBitcast(
26574               VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
26575                               DAG.getBuildVector(IntVT, DL, AndMask)));
26576       }
26577     }
26578   }
26579 
26580   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26581   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26582   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
26583     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
26584       return Res;
26585 
26586   // If this shuffle only has a single input that is a bitcasted shuffle,
26587   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
26588   // back to their original types.
26589   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
26590       N1.isUndef() && Level < AfterLegalizeVectorOps &&
26591       TLI.isTypeLegal(VT)) {
26592 
26593     SDValue BC0 = peekThroughOneUseBitcasts(N0);
26594     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
26595       EVT SVT = VT.getScalarType();
26596       EVT InnerVT = BC0->getValueType(0);
26597       EVT InnerSVT = InnerVT.getScalarType();
26598 
26599       // Determine which shuffle works with the smaller scalar type.
26600       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
26601       EVT ScaleSVT = ScaleVT.getScalarType();
26602 
26603       if (TLI.isTypeLegal(ScaleVT) &&
26604           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
26605           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
26606         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26607         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26608 
26609         // Scale the shuffle masks to the smaller scalar type.
26610         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
26611         SmallVector<int, 8> InnerMask;
26612         SmallVector<int, 8> OuterMask;
26613         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
26614         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
26615 
26616         // Merge the shuffle masks.
26617         SmallVector<int, 8> NewMask;
26618         for (int M : OuterMask)
26619           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
26620 
26621         // Test for shuffle mask legality over both commutations.
26622         SDValue SV0 = BC0->getOperand(0);
26623         SDValue SV1 = BC0->getOperand(1);
26624         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26625         if (!LegalMask) {
26626           std::swap(SV0, SV1);
26627           ShuffleVectorSDNode::commuteMask(NewMask);
26628           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26629         }
26630 
26631         if (LegalMask) {
26632           SV0 = DAG.getBitcast(ScaleVT, SV0);
26633           SV1 = DAG.getBitcast(ScaleVT, SV1);
26634           return DAG.getBitcast(
26635               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
26636         }
26637       }
26638     }
26639   }
26640 
26641   // Match shuffles of bitcasts, so long as the mask can be treated as the
26642   // larger type.
26643   if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
26644     return V;
26645 
26646   // Compute the combined shuffle mask for a shuffle with SV0 as the first
26647   // operand, and SV1 as the second operand.
26648   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
26649   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
26650   auto MergeInnerShuffle =
26651       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
26652                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
26653                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
26654                      SmallVectorImpl<int> &Mask) -> bool {
26655     // Don't try to fold splats; they're likely to simplify somehow, or they
26656     // might be free.
26657     if (OtherSVN->isSplat())
26658       return false;
26659 
26660     SV0 = SV1 = SDValue();
26661     Mask.clear();
26662 
26663     for (unsigned i = 0; i != NumElts; ++i) {
26664       int Idx = SVN->getMaskElt(i);
26665       if (Idx < 0) {
26666         // Propagate Undef.
26667         Mask.push_back(Idx);
26668         continue;
26669       }
26670 
26671       if (Commute)
26672         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
26673 
26674       SDValue CurrentVec;
26675       if (Idx < (int)NumElts) {
26676         // This shuffle index refers to the inner shuffle N0. Lookup the inner
26677         // shuffle mask to identify which vector is actually referenced.
26678         Idx = OtherSVN->getMaskElt(Idx);
26679         if (Idx < 0) {
26680           // Propagate Undef.
26681           Mask.push_back(Idx);
26682           continue;
26683         }
26684         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
26685                                           : OtherSVN->getOperand(1);
26686       } else {
26687         // This shuffle index references an element within N1.
26688         CurrentVec = N1;
26689       }
26690 
26691       // Simple case where 'CurrentVec' is UNDEF.
26692       if (CurrentVec.isUndef()) {
26693         Mask.push_back(-1);
26694         continue;
26695       }
26696 
26697       // Canonicalize the shuffle index. We don't know yet if CurrentVec
26698       // will be the first or second operand of the combined shuffle.
26699       Idx = Idx % NumElts;
26700       if (!SV0.getNode() || SV0 == CurrentVec) {
26701         // Ok. CurrentVec is the left hand side.
26702         // Update the mask accordingly.
26703         SV0 = CurrentVec;
26704         Mask.push_back(Idx);
26705         continue;
26706       }
26707       if (!SV1.getNode() || SV1 == CurrentVec) {
26708         // Ok. CurrentVec is the right hand side.
26709         // Update the mask accordingly.
26710         SV1 = CurrentVec;
26711         Mask.push_back(Idx + NumElts);
26712         continue;
26713       }
26714 
26715       // Last chance - see if the vector is another shuffle and if it
26716       // uses one of the existing candidate shuffle ops.
26717       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
26718         int InnerIdx = CurrentSVN->getMaskElt(Idx);
26719         if (InnerIdx < 0) {
26720           Mask.push_back(-1);
26721           continue;
26722         }
26723         SDValue InnerVec = (InnerIdx < (int)NumElts)
26724                                ? CurrentSVN->getOperand(0)
26725                                : CurrentSVN->getOperand(1);
26726         if (InnerVec.isUndef()) {
26727           Mask.push_back(-1);
26728           continue;
26729         }
26730         InnerIdx %= NumElts;
26731         if (InnerVec == SV0) {
26732           Mask.push_back(InnerIdx);
26733           continue;
26734         }
26735         if (InnerVec == SV1) {
26736           Mask.push_back(InnerIdx + NumElts);
26737           continue;
26738         }
26739       }
26740 
26741       // Bail out if we cannot convert the shuffle pair into a single shuffle.
26742       return false;
26743     }
26744 
26745     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26746       return true;
26747 
26748     // Avoid introducing shuffles with illegal mask.
26749     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26750     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26751     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26752     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
26753     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
26754     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
26755     if (TLI.isShuffleMaskLegal(Mask, VT))
26756       return true;
26757 
26758     std::swap(SV0, SV1);
26759     ShuffleVectorSDNode::commuteMask(Mask);
26760     return TLI.isShuffleMaskLegal(Mask, VT);
26761   };
26762 
26763   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26764     // Canonicalize shuffles according to rules:
26765     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
26766     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
26767     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
26768     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26769         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
26770       // The incoming shuffle must be of the same type as the result of the
26771       // current shuffle.
26772       assert(N1->getOperand(0).getValueType() == VT &&
26773              "Shuffle types don't match");
26774 
26775       SDValue SV0 = N1->getOperand(0);
26776       SDValue SV1 = N1->getOperand(1);
26777       bool HasSameOp0 = N0 == SV0;
26778       bool IsSV1Undef = SV1.isUndef();
26779       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
26780         // Commute the operands of this shuffle so merging below will trigger.
26781         return DAG.getCommutedVectorShuffle(*SVN);
26782     }
26783 
26784     // Canonicalize splat shuffles to the RHS to improve merging below.
26785     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26786     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26787         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26788         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26789         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26790       return DAG.getCommutedVectorShuffle(*SVN);
26791     }
26792 
26793     // Try to fold according to rules:
26794     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26795     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26796     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26797     // Don't try to fold shuffles with illegal type.
26798     // Only fold if this shuffle is the only user of the other shuffle.
26799     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26800     for (int i = 0; i != 2; ++i) {
26801       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26802           N->isOnlyUserOf(N->getOperand(i).getNode())) {
26803         // The incoming shuffle must be of the same type as the result of the
26804         // current shuffle.
26805         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26806         assert(OtherSV->getOperand(0).getValueType() == VT &&
26807                "Shuffle types don't match");
26808 
26809         SDValue SV0, SV1;
26810         SmallVector<int, 4> Mask;
26811         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26812                               SV0, SV1, Mask)) {
26813           // Check if all indices in Mask are Undef. In case, propagate Undef.
26814           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26815             return DAG.getUNDEF(VT);
26816 
26817           return DAG.getVectorShuffle(VT, SDLoc(N),
26818                                       SV0 ? SV0 : DAG.getUNDEF(VT),
26819                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26820         }
26821       }
26822     }
26823 
26824     // Merge shuffles through binops if we are able to merge it with at least
26825     // one other shuffles.
26826     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26827     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26828     unsigned SrcOpcode = N0.getOpcode();
26829     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26830         (N1.isUndef() ||
26831          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26832       // Get binop source ops, or just pass on the undef.
26833       SDValue Op00 = N0.getOperand(0);
26834       SDValue Op01 = N0.getOperand(1);
26835       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26836       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26837       // TODO: We might be able to relax the VT check but we don't currently
26838       // have any isBinOp() that has different result/ops VTs so play safe until
26839       // we have test coverage.
26840       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26841           Op01.getValueType() == VT && Op11.getValueType() == VT &&
26842           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26843            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26844            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26845            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26846         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26847                                         SmallVectorImpl<int> &Mask, bool LeftOp,
26848                                         bool Commute) {
26849           SDValue InnerN = Commute ? N1 : N0;
26850           SDValue Op0 = LeftOp ? Op00 : Op01;
26851           SDValue Op1 = LeftOp ? Op10 : Op11;
26852           if (Commute)
26853             std::swap(Op0, Op1);
26854           // Only accept the merged shuffle if we don't introduce undef elements,
26855           // or the inner shuffle already contained undef elements.
26856           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26857           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26858                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26859                                    Mask) &&
26860                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26861                   llvm::none_of(Mask, [](int M) { return M < 0; }));
26862         };
26863 
26864         // Ensure we don't increase the number of shuffles - we must merge a
26865         // shuffle from at least one of the LHS and RHS ops.
26866         bool MergedLeft = false;
26867         SDValue LeftSV0, LeftSV1;
26868         SmallVector<int, 4> LeftMask;
26869         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26870             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26871           MergedLeft = true;
26872         } else {
26873           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26874           LeftSV0 = Op00, LeftSV1 = Op10;
26875         }
26876 
26877         bool MergedRight = false;
26878         SDValue RightSV0, RightSV1;
26879         SmallVector<int, 4> RightMask;
26880         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26881             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26882           MergedRight = true;
26883         } else {
26884           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26885           RightSV0 = Op01, RightSV1 = Op11;
26886         }
26887 
26888         if (MergedLeft || MergedRight) {
26889           SDLoc DL(N);
26890           SDValue LHS = DAG.getVectorShuffle(
26891               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26892               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26893           SDValue RHS = DAG.getVectorShuffle(
26894               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26895               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26896           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26897         }
26898       }
26899     }
26900   }
26901 
26902   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26903     return V;
26904 
26905   // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26906   // Perform this really late, because it could eliminate knowledge
26907   // of undef elements created by this shuffle.
26908   if (Level < AfterLegalizeTypes)
26909     if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26910                                                           LegalOperations))
26911       return V;
26912 
26913   return SDValue();
26914 }
26915 
26916 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26917   EVT VT = N->getValueType(0);
26918   if (!VT.isFixedLengthVector())
26919     return SDValue();
26920 
26921   // Try to convert a scalar binop with an extracted vector element to a vector
26922   // binop. This is intended to reduce potentially expensive register moves.
26923   // TODO: Check if both operands are extracted.
26924   // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26925   // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26926   SDValue Scalar = N->getOperand(0);
26927   unsigned Opcode = Scalar.getOpcode();
26928   EVT VecEltVT = VT.getScalarType();
26929   if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26930       TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26931       Scalar.getOperand(0).getValueType() == VecEltVT &&
26932       Scalar.getOperand(1).getValueType() == VecEltVT &&
26933       Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26934       Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26935       DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26936     // Match an extract element and get a shuffle mask equivalent.
26937     SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26938 
26939     for (int i : {0, 1}) {
26940       // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26941       // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26942       SDValue EE = Scalar.getOperand(i);
26943       auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26944       if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26945           EE.getOperand(0).getValueType() == VT &&
26946           isa<ConstantSDNode>(EE.getOperand(1))) {
26947         // Mask = {ExtractIndex, undef, undef....}
26948         ShufMask[0] = EE.getConstantOperandVal(1);
26949         // Make sure the shuffle is legal if we are crossing lanes.
26950         if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26951           SDLoc DL(N);
26952           SDValue V[] = {EE.getOperand(0),
26953                          DAG.getConstant(C->getAPIntValue(), DL, VT)};
26954           SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26955           return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26956                                       ShufMask);
26957         }
26958       }
26959     }
26960   }
26961 
26962   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26963   // with a VECTOR_SHUFFLE and possible truncate.
26964   if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26965       !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26966     return SDValue();
26967 
26968   // If we have an implicit truncate, truncate here if it is legal.
26969   if (VecEltVT != Scalar.getValueType() &&
26970       Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26971     SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26972     return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26973   }
26974 
26975   auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26976   if (!ExtIndexC)
26977     return SDValue();
26978 
26979   SDValue SrcVec = Scalar.getOperand(0);
26980   EVT SrcVT = SrcVec.getValueType();
26981   unsigned SrcNumElts = SrcVT.getVectorNumElements();
26982   unsigned VTNumElts = VT.getVectorNumElements();
26983   if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26984     // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26985     SmallVector<int, 8> Mask(SrcNumElts, -1);
26986     Mask[0] = ExtIndexC->getZExtValue();
26987     SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26988         SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26989     if (!LegalShuffle)
26990       return SDValue();
26991 
26992     // If the initial vector is the same size, the shuffle is the result.
26993     if (VT == SrcVT)
26994       return LegalShuffle;
26995 
26996     // If not, shorten the shuffled vector.
26997     if (VTNumElts != SrcNumElts) {
26998       SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26999       EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
27000                                    SrcVT.getVectorElementType(), VTNumElts);
27001       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
27002                          ZeroIdx);
27003     }
27004   }
27005 
27006   return SDValue();
27007 }
27008 
27009 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
27010   EVT VT = N->getValueType(0);
27011   SDValue N0 = N->getOperand(0);
27012   SDValue N1 = N->getOperand(1);
27013   SDValue N2 = N->getOperand(2);
27014   uint64_t InsIdx = N->getConstantOperandVal(2);
27015 
27016   // If inserting an UNDEF, just return the original vector.
27017   if (N1.isUndef())
27018     return N0;
27019 
27020   // If this is an insert of an extracted vector into an undef vector, we can
27021   // just use the input to the extract if the types match, and can simplify
27022   // in some cases even if they don't.
27023   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
27024       N1.getOperand(1) == N2) {
27025     EVT SrcVT = N1.getOperand(0).getValueType();
27026     if (SrcVT == VT)
27027       return N1.getOperand(0);
27028     // TODO: To remove the zero check, need to adjust the offset to
27029     // a multiple of the new src type.
27030     if (isNullConstant(N2)) {
27031       if (VT.knownBitsGE(SrcVT) &&
27032           !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
27033         return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27034                            VT, N0, N1.getOperand(0), N2);
27035       else if (VT.knownBitsLE(SrcVT) &&
27036                !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
27037         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
27038                            VT, N1.getOperand(0), N2);
27039     }
27040   }
27041 
27042   // Handle case where we've ended up inserting back into the source vector
27043   // we extracted the subvector from.
27044   // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
27045   if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
27046       N1.getOperand(1) == N2)
27047     return N0;
27048 
27049   // Simplify scalar inserts into an undef vector:
27050   // insert_subvector undef, (splat X), N2 -> splat X
27051   if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
27052     if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
27053       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
27054 
27055   // If we are inserting a bitcast value into an undef, with the same
27056   // number of elements, just use the bitcast input of the extract.
27057   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
27058   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
27059   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
27060       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
27061       N1.getOperand(0).getOperand(1) == N2 &&
27062       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
27063           VT.getVectorElementCount() &&
27064       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
27065           VT.getSizeInBits()) {
27066     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
27067   }
27068 
27069   // If both N1 and N2 are bitcast values on which insert_subvector
27070   // would makes sense, pull the bitcast through.
27071   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
27072   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
27073   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
27074     SDValue CN0 = N0.getOperand(0);
27075     SDValue CN1 = N1.getOperand(0);
27076     EVT CN0VT = CN0.getValueType();
27077     EVT CN1VT = CN1.getValueType();
27078     if (CN0VT.isVector() && CN1VT.isVector() &&
27079         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
27080         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
27081       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27082                                       CN0.getValueType(), CN0, CN1, N2);
27083       return DAG.getBitcast(VT, NewINSERT);
27084     }
27085   }
27086 
27087   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
27088   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
27089   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
27090   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27091       N0.getOperand(1).getValueType() == N1.getValueType() &&
27092       N0.getOperand(2) == N2)
27093     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
27094                        N1, N2);
27095 
27096   // Eliminate an intermediate insert into an undef vector:
27097   // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
27098   // insert_subvector undef, X, 0
27099   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
27100       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
27101       isNullConstant(N2))
27102     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
27103                        N1.getOperand(1), N2);
27104 
27105   // Push subvector bitcasts to the output, adjusting the index as we go.
27106   // insert_subvector(bitcast(v), bitcast(s), c1)
27107   // -> bitcast(insert_subvector(v, s, c2))
27108   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
27109       N1.getOpcode() == ISD::BITCAST) {
27110     SDValue N0Src = peekThroughBitcasts(N0);
27111     SDValue N1Src = peekThroughBitcasts(N1);
27112     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
27113     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
27114     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
27115         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
27116       EVT NewVT;
27117       SDLoc DL(N);
27118       SDValue NewIdx;
27119       LLVMContext &Ctx = *DAG.getContext();
27120       ElementCount NumElts = VT.getVectorElementCount();
27121       unsigned EltSizeInBits = VT.getScalarSizeInBits();
27122       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
27123         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
27124         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
27125         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
27126       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
27127         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
27128         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
27129           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
27130                                    NumElts.divideCoefficientBy(Scale));
27131           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
27132         }
27133       }
27134       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
27135         SDValue Res = DAG.getBitcast(NewVT, N0Src);
27136         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
27137         return DAG.getBitcast(VT, Res);
27138       }
27139     }
27140   }
27141 
27142   // Canonicalize insert_subvector dag nodes.
27143   // Example:
27144   // (insert_subvector (insert_subvector A, Idx0), Idx1)
27145   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
27146   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
27147       N1.getValueType() == N0.getOperand(1).getValueType()) {
27148     unsigned OtherIdx = N0.getConstantOperandVal(2);
27149     if (InsIdx < OtherIdx) {
27150       // Swap nodes.
27151       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
27152                                   N0.getOperand(0), N1, N2);
27153       AddToWorklist(NewOp.getNode());
27154       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
27155                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
27156     }
27157   }
27158 
27159   // If the input vector is a concatenation, and the insert replaces
27160   // one of the pieces, we can optimize into a single concat_vectors.
27161   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
27162       N0.getOperand(0).getValueType() == N1.getValueType() &&
27163       N0.getOperand(0).getValueType().isScalableVector() ==
27164           N1.getValueType().isScalableVector()) {
27165     unsigned Factor = N1.getValueType().getVectorMinNumElements();
27166     SmallVector<SDValue, 8> Ops(N0->ops());
27167     Ops[InsIdx / Factor] = N1;
27168     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
27169   }
27170 
27171   // Simplify source operands based on insertion.
27172   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
27173     return SDValue(N, 0);
27174 
27175   return SDValue();
27176 }
27177 
27178 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
27179   SDValue N0 = N->getOperand(0);
27180 
27181   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
27182   if (N0->getOpcode() == ISD::FP16_TO_FP)
27183     return N0->getOperand(0);
27184 
27185   return SDValue();
27186 }
27187 
27188 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
27189   auto Op = N->getOpcode();
27190   assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) &&
27191          "opcode should be FP16_TO_FP or BF16_TO_FP.");
27192   SDValue N0 = N->getOperand(0);
27193 
27194   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
27195   // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27196   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
27197     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
27198     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
27199       return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
27200     }
27201   }
27202 
27203   // Sometimes constants manage to survive very late in the pipeline, e.g.,
27204   // because they are wrapped inside the <1 x f16> type. Try one last time to
27205   // get rid of them.
27206   SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
27207                                               N->getValueType(0), {N0});
27208   return Folded;
27209 }
27210 
27211 SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
27212   SDValue N0 = N->getOperand(0);
27213 
27214   // fold (fp_to_bf16 (bf16_to_fp op)) -> op
27215   if (N0->getOpcode() == ISD::BF16_TO_FP)
27216     return N0->getOperand(0);
27217 
27218   return SDValue();
27219 }
27220 
27221 SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
27222   // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27223   return visitFP16_TO_FP(N);
27224 }
27225 
27226 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
27227   SDValue N0 = N->getOperand(0);
27228   EVT VT = N0.getValueType();
27229   unsigned Opcode = N->getOpcode();
27230 
27231   // VECREDUCE over 1-element vector is just an extract.
27232   if (VT.getVectorElementCount().isScalar()) {
27233     SDLoc dl(N);
27234     SDValue Res =
27235         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
27236                     DAG.getVectorIdxConstant(0, dl));
27237     if (Res.getValueType() != N->getValueType(0))
27238       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
27239     return Res;
27240   }
27241 
27242   // On an boolean vector an and/or reduction is the same as a umin/umax
27243   // reduction. Convert them if the latter is legal while the former isn't.
27244   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
27245     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
27246         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
27247     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
27248         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
27249         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
27250       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
27251   }
27252 
27253   // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
27254   // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
27255   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27256       TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
27257     SDValue Vec = N0.getOperand(0);
27258     SDValue Subvec = N0.getOperand(1);
27259     if ((Opcode == ISD::VECREDUCE_OR &&
27260          (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
27261         (Opcode == ISD::VECREDUCE_AND &&
27262          (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
27263       return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
27264   }
27265 
27266   // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
27267   // Same for zext and anyext, and for and/or/xor reductions.
27268   if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
27269        Opcode == ISD::VECREDUCE_XOR) &&
27270       (N0.getOpcode() == ISD::SIGN_EXTEND ||
27271        N0.getOpcode() == ISD::ZERO_EXTEND ||
27272        N0.getOpcode() == ISD::ANY_EXTEND) &&
27273       TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
27274     SDValue Red = DAG.getNode(Opcode, SDLoc(N),
27275                               N0.getOperand(0).getValueType().getScalarType(),
27276                               N0.getOperand(0));
27277     return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
27278   }
27279   return SDValue();
27280 }
27281 
27282 SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
27283   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
27284 
27285   // FSUB -> FMA combines:
27286   if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
27287     AddToWorklist(Fused.getNode());
27288     return Fused;
27289   }
27290   return SDValue();
27291 }
27292 
27293 SDValue DAGCombiner::visitVPOp(SDNode *N) {
27294 
27295   if (N->getOpcode() == ISD::VP_GATHER)
27296     if (SDValue SD = visitVPGATHER(N))
27297       return SD;
27298 
27299   if (N->getOpcode() == ISD::VP_SCATTER)
27300     if (SDValue SD = visitVPSCATTER(N))
27301       return SD;
27302 
27303   if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
27304     if (SDValue SD = visitVP_STRIDED_LOAD(N))
27305       return SD;
27306 
27307   if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
27308     if (SDValue SD = visitVP_STRIDED_STORE(N))
27309       return SD;
27310 
27311   // VP operations in which all vector elements are disabled - either by
27312   // determining that the mask is all false or that the EVL is 0 - can be
27313   // eliminated.
27314   bool AreAllEltsDisabled = false;
27315   if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
27316     AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
27317   if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
27318     AreAllEltsDisabled |=
27319         ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
27320 
27321   // This is the only generic VP combine we support for now.
27322   if (!AreAllEltsDisabled) {
27323     switch (N->getOpcode()) {
27324     case ISD::VP_FADD:
27325       return visitVP_FADD(N);
27326     case ISD::VP_FSUB:
27327       return visitVP_FSUB(N);
27328     case ISD::VP_FMA:
27329       return visitFMA<VPMatchContext>(N);
27330     case ISD::VP_SELECT:
27331       return visitVP_SELECT(N);
27332     case ISD::VP_MUL:
27333       return visitMUL<VPMatchContext>(N);
27334     case ISD::VP_SUB:
27335       return foldSubCtlzNot<VPMatchContext>(N, DAG);
27336     default:
27337       break;
27338     }
27339     return SDValue();
27340   }
27341 
27342   // Binary operations can be replaced by UNDEF.
27343   if (ISD::isVPBinaryOp(N->getOpcode()))
27344     return DAG.getUNDEF(N->getValueType(0));
27345 
27346   // VP Memory operations can be replaced by either the chain (stores) or the
27347   // chain + undef (loads).
27348   if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
27349     if (MemSD->writeMem())
27350       return MemSD->getChain();
27351     return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
27352   }
27353 
27354   // Reduction operations return the start operand when no elements are active.
27355   if (ISD::isVPReduction(N->getOpcode()))
27356     return N->getOperand(0);
27357 
27358   return SDValue();
27359 }
27360 
27361 SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
27362   SDValue Chain = N->getOperand(0);
27363   SDValue Ptr = N->getOperand(1);
27364   EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27365 
27366   // Check if the memory, where FP state is written to, is used only in a single
27367   // load operation.
27368   LoadSDNode *LdNode = nullptr;
27369   for (auto *U : Ptr->users()) {
27370     if (U == N)
27371       continue;
27372     if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
27373       if (LdNode && LdNode != Ld)
27374         return SDValue();
27375       LdNode = Ld;
27376       continue;
27377     }
27378     return SDValue();
27379   }
27380   if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27381       !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27382       !LdNode->getChain().reachesChainWithoutSideEffects(SDValue(N, 0)))
27383     return SDValue();
27384 
27385   // Check if the loaded value is used only in a store operation.
27386   StoreSDNode *StNode = nullptr;
27387   for (SDUse &U : LdNode->uses()) {
27388     if (U.getResNo() == 0) {
27389       if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
27390         if (StNode)
27391           return SDValue();
27392         StNode = St;
27393       } else {
27394         return SDValue();
27395       }
27396     }
27397   }
27398   if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27399       !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27400       !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27401     return SDValue();
27402 
27403   // Create new node GET_FPENV_MEM, which uses the store address to write FP
27404   // environment.
27405   SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
27406                                 StNode->getMemOperand());
27407   CombineTo(StNode, Res, false);
27408   return Res;
27409 }
27410 
27411 SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
27412   SDValue Chain = N->getOperand(0);
27413   SDValue Ptr = N->getOperand(1);
27414   EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27415 
27416   // Check if the address of FP state is used also in a store operation only.
27417   StoreSDNode *StNode = nullptr;
27418   for (auto *U : Ptr->users()) {
27419     if (U == N)
27420       continue;
27421     if (auto *St = dyn_cast<StoreSDNode>(U)) {
27422       if (StNode && StNode != St)
27423         return SDValue();
27424       StNode = St;
27425       continue;
27426     }
27427     return SDValue();
27428   }
27429   if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27430       !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27431       !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
27432     return SDValue();
27433 
27434   // Check if the stored value is loaded from some location and the loaded
27435   // value is used only in the store operation.
27436   SDValue StValue = StNode->getValue();
27437   auto *LdNode = dyn_cast<LoadSDNode>(StValue);
27438   if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27439       !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27440       !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27441     return SDValue();
27442 
27443   // Create new node SET_FPENV_MEM, which uses the load address to read FP
27444   // environment.
27445   SDValue Res =
27446       DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
27447                       LdNode->getMemOperand());
27448   return Res;
27449 }
27450 
27451 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
27452 /// with the destination vector and a zero vector.
27453 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
27454 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
27455 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
27456   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
27457 
27458   EVT VT = N->getValueType(0);
27459   SDValue LHS = N->getOperand(0);
27460   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
27461   SDLoc DL(N);
27462 
27463   // Make sure we're not running after operation legalization where it
27464   // may have custom lowered the vector shuffles.
27465   if (LegalOperations)
27466     return SDValue();
27467 
27468   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
27469     return SDValue();
27470 
27471   EVT RVT = RHS.getValueType();
27472   unsigned NumElts = RHS.getNumOperands();
27473 
27474   // Attempt to create a valid clear mask, splitting the mask into
27475   // sub elements and checking to see if each is
27476   // all zeros or all ones - suitable for shuffle masking.
27477   auto BuildClearMask = [&](int Split) {
27478     int NumSubElts = NumElts * Split;
27479     int NumSubBits = RVT.getScalarSizeInBits() / Split;
27480 
27481     SmallVector<int, 8> Indices;
27482     for (int i = 0; i != NumSubElts; ++i) {
27483       int EltIdx = i / Split;
27484       int SubIdx = i % Split;
27485       SDValue Elt = RHS.getOperand(EltIdx);
27486       // X & undef --> 0 (not undef). So this lane must be converted to choose
27487       // from the zero constant vector (same as if the element had all 0-bits).
27488       if (Elt.isUndef()) {
27489         Indices.push_back(i + NumSubElts);
27490         continue;
27491       }
27492 
27493       APInt Bits;
27494       if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
27495         Bits = Cst->getAPIntValue();
27496       else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
27497         Bits = CstFP->getValueAPF().bitcastToAPInt();
27498       else
27499         return SDValue();
27500 
27501       // Extract the sub element from the constant bit mask.
27502       if (DAG.getDataLayout().isBigEndian())
27503         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
27504       else
27505         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
27506 
27507       if (Bits.isAllOnes())
27508         Indices.push_back(i);
27509       else if (Bits == 0)
27510         Indices.push_back(i + NumSubElts);
27511       else
27512         return SDValue();
27513     }
27514 
27515     // Let's see if the target supports this vector_shuffle.
27516     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
27517     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
27518     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
27519       return SDValue();
27520 
27521     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
27522     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
27523                                                    DAG.getBitcast(ClearVT, LHS),
27524                                                    Zero, Indices));
27525   };
27526 
27527   // Determine maximum split level (byte level masking).
27528   int MaxSplit = 1;
27529   if (RVT.getScalarSizeInBits() % 8 == 0)
27530     MaxSplit = RVT.getScalarSizeInBits() / 8;
27531 
27532   for (int Split = 1; Split <= MaxSplit; ++Split)
27533     if (RVT.getScalarSizeInBits() % Split == 0)
27534       if (SDValue S = BuildClearMask(Split))
27535         return S;
27536 
27537   return SDValue();
27538 }
27539 
27540 /// If a vector binop is performed on splat values, it may be profitable to
27541 /// extract, scalarize, and insert/splat.
27542 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
27543                                       const SDLoc &DL, bool LegalTypes) {
27544   SDValue N0 = N->getOperand(0);
27545   SDValue N1 = N->getOperand(1);
27546   unsigned Opcode = N->getOpcode();
27547   EVT VT = N->getValueType(0);
27548   EVT EltVT = VT.getVectorElementType();
27549   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27550 
27551   // TODO: Remove/replace the extract cost check? If the elements are available
27552   //       as scalars, then there may be no extract cost. Should we ask if
27553   //       inserting a scalar back into a vector is cheap instead?
27554   int Index0, Index1;
27555   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27556   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
27557   // Extract element from splat_vector should be free.
27558   // TODO: use DAG.isSplatValue instead?
27559   bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
27560                            N1.getOpcode() == ISD::SPLAT_VECTOR;
27561   if (!Src0 || !Src1 || Index0 != Index1 ||
27562       Src0.getValueType().getVectorElementType() != EltVT ||
27563       Src1.getValueType().getVectorElementType() != EltVT ||
27564       !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
27565       // If before type legalization, allow scalar types that will eventually be
27566       // made legal.
27567       !TLI.isOperationLegalOrCustom(
27568           Opcode, LegalTypes
27569                       ? EltVT
27570                       : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
27571     return SDValue();
27572 
27573   // FIXME: Type legalization can't handle illegal MULHS/MULHU.
27574   if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
27575     return SDValue();
27576 
27577   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
27578     // All but one element should have an undef input, which will fold to a
27579     // constant or undef. Avoid splatting which would over-define potentially
27580     // undefined elements.
27581 
27582     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
27583     //   build_vec ..undef, (bo X, Y), undef...
27584     SmallVector<SDValue, 16> EltsX, EltsY, EltsResult;
27585     DAG.ExtractVectorElements(Src0, EltsX);
27586     DAG.ExtractVectorElements(Src1, EltsY);
27587 
27588     for (auto [X, Y] : zip(EltsX, EltsY))
27589       EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
27590     return DAG.getBuildVector(VT, DL, EltsResult);
27591   }
27592 
27593   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27594   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
27595   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
27596   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
27597 
27598   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
27599   return DAG.getSplat(VT, DL, ScalarBO);
27600 }
27601 
27602 /// Visit a vector cast operation, like FP_EXTEND.
27603 SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
27604   EVT VT = N->getValueType(0);
27605   assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
27606   EVT EltVT = VT.getVectorElementType();
27607   unsigned Opcode = N->getOpcode();
27608 
27609   SDValue N0 = N->getOperand(0);
27610   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27611 
27612   // TODO: promote operation might be also good here?
27613   int Index0;
27614   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27615   if (Src0 &&
27616       (N0.getOpcode() == ISD::SPLAT_VECTOR ||
27617        TLI.isExtractVecEltCheap(VT, Index0)) &&
27618       TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
27619       TLI.preferScalarizeSplat(N)) {
27620     EVT SrcVT = N0.getValueType();
27621     EVT SrcEltVT = SrcVT.getVectorElementType();
27622     SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27623     SDValue Elt =
27624         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
27625     SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
27626     if (VT.isScalableVector())
27627       return DAG.getSplatVector(VT, DL, ScalarBO);
27628     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
27629     return DAG.getBuildVector(VT, DL, Ops);
27630   }
27631 
27632   return SDValue();
27633 }
27634 
27635 /// Visit a binary vector operation, like ADD.
27636 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
27637   EVT VT = N->getValueType(0);
27638   assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
27639 
27640   SDValue LHS = N->getOperand(0);
27641   SDValue RHS = N->getOperand(1);
27642   unsigned Opcode = N->getOpcode();
27643   SDNodeFlags Flags = N->getFlags();
27644 
27645   // Move unary shuffles with identical masks after a vector binop:
27646   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
27647   //   --> shuffle (VBinOp A, B), Undef, Mask
27648   // This does not require type legality checks because we are creating the
27649   // same types of operations that are in the original sequence. We do have to
27650   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
27651   // though. This code is adapted from the identical transform in instcombine.
27652   if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
27653     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
27654     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
27655     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
27656         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
27657         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
27658       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
27659                                      RHS.getOperand(0), Flags);
27660       SDValue UndefV = LHS.getOperand(1);
27661       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
27662     }
27663 
27664     // Try to sink a splat shuffle after a binop with a uniform constant.
27665     // This is limited to cases where neither the shuffle nor the constant have
27666     // undefined elements because that could be poison-unsafe or inhibit
27667     // demanded elements analysis. It is further limited to not change a splat
27668     // of an inserted scalar because that may be optimized better by
27669     // load-folding or other target-specific behaviors.
27670     if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
27671         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
27672         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27673       // binop (splat X), (splat C) --> splat (binop X, C)
27674       SDValue X = Shuf0->getOperand(0);
27675       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
27676       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27677                                   Shuf0->getMask());
27678     }
27679     if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
27680         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
27681         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27682       // binop (splat C), (splat X) --> splat (binop C, X)
27683       SDValue X = Shuf1->getOperand(0);
27684       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
27685       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27686                                   Shuf1->getMask());
27687     }
27688   }
27689 
27690   // The following pattern is likely to emerge with vector reduction ops. Moving
27691   // the binary operation ahead of insertion may allow using a narrower vector
27692   // instruction that has better performance than the wide version of the op:
27693   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
27694   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
27695       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
27696       LHS.getOperand(2) == RHS.getOperand(2) &&
27697       (LHS.hasOneUse() || RHS.hasOneUse())) {
27698     SDValue X = LHS.getOperand(1);
27699     SDValue Y = RHS.getOperand(1);
27700     SDValue Z = LHS.getOperand(2);
27701     EVT NarrowVT = X.getValueType();
27702     if (NarrowVT == Y.getValueType() &&
27703         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
27704                                               LegalOperations)) {
27705       // (binop undef, undef) may not return undef, so compute that result.
27706       SDValue VecC =
27707           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
27708       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
27709       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
27710     }
27711   }
27712 
27713   // Make sure all but the first op are undef or constant.
27714   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
27715     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
27716            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
27717              return Op.isUndef() ||
27718                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
27719            });
27720   };
27721 
27722   // The following pattern is likely to emerge with vector reduction ops. Moving
27723   // the binary operation ahead of the concat may allow using a narrower vector
27724   // instruction that has better performance than the wide version of the op:
27725   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
27726   //   concat (VBinOp X, Y), VecC
27727   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
27728       (LHS.hasOneUse() || RHS.hasOneUse())) {
27729     EVT NarrowVT = LHS.getOperand(0).getValueType();
27730     if (NarrowVT == RHS.getOperand(0).getValueType() &&
27731         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
27732       unsigned NumOperands = LHS.getNumOperands();
27733       SmallVector<SDValue, 4> ConcatOps;
27734       for (unsigned i = 0; i != NumOperands; ++i) {
27735         // This constant fold for operands 1 and up.
27736         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
27737                                         RHS.getOperand(i)));
27738       }
27739 
27740       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
27741     }
27742   }
27743 
27744   if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
27745     return V;
27746 
27747   return SDValue();
27748 }
27749 
27750 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
27751                                     SDValue N2) {
27752   assert(N0.getOpcode() == ISD::SETCC &&
27753          "First argument must be a SetCC node!");
27754 
27755   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
27756                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
27757 
27758   // If we got a simplified select_cc node back from SimplifySelectCC, then
27759   // break it down into a new SETCC node, and a new SELECT node, and then return
27760   // the SELECT node, since we were called with a SELECT node.
27761   if (SCC.getNode()) {
27762     // Check to see if we got a select_cc back (to turn into setcc/select).
27763     // Otherwise, just return whatever node we got back, like fabs.
27764     if (SCC.getOpcode() == ISD::SELECT_CC) {
27765       const SDNodeFlags Flags = N0->getFlags();
27766       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
27767                                   N0.getValueType(),
27768                                   SCC.getOperand(0), SCC.getOperand(1),
27769                                   SCC.getOperand(4), Flags);
27770       AddToWorklist(SETCC.getNode());
27771       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
27772                                          SCC.getOperand(2), SCC.getOperand(3));
27773       SelectNode->setFlags(Flags);
27774       return SelectNode;
27775     }
27776 
27777     return SCC;
27778   }
27779   return SDValue();
27780 }
27781 
27782 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
27783 /// being selected between, see if we can simplify the select.  Callers of this
27784 /// should assume that TheSelect is deleted if this returns true.  As such, they
27785 /// should return the appropriate thing (e.g. the node) back to the top-level of
27786 /// the DAG combiner loop to avoid it being looked at.
27787 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
27788                                     SDValue RHS) {
27789   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27790   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
27791   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
27792     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
27793       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
27794       SDValue Sqrt = RHS;
27795       ISD::CondCode CC;
27796       SDValue CmpLHS;
27797       const ConstantFPSDNode *Zero = nullptr;
27798 
27799       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
27800         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
27801         CmpLHS = TheSelect->getOperand(0);
27802         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
27803       } else {
27804         // SELECT or VSELECT
27805         SDValue Cmp = TheSelect->getOperand(0);
27806         if (Cmp.getOpcode() == ISD::SETCC) {
27807           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
27808           CmpLHS = Cmp.getOperand(0);
27809           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
27810         }
27811       }
27812       if (Zero && Zero->isZero() &&
27813           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
27814           CC == ISD::SETULT || CC == ISD::SETLT)) {
27815         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27816         CombineTo(TheSelect, Sqrt);
27817         return true;
27818       }
27819     }
27820   }
27821   // Cannot simplify select with vector condition
27822   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27823 
27824   // If this is a select from two identical things, try to pull the operation
27825   // through the select.
27826   if (LHS.getOpcode() != RHS.getOpcode() ||
27827       !LHS.hasOneUse() || !RHS.hasOneUse())
27828     return false;
27829 
27830   // If this is a load and the token chain is identical, replace the select
27831   // of two loads with a load through a select of the address to load from.
27832   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27833   // constants have been dropped into the constant pool.
27834   if (LHS.getOpcode() == ISD::LOAD) {
27835     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27836     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27837 
27838     // Token chains must be identical.
27839     if (LHS.getOperand(0) != RHS.getOperand(0) ||
27840         // Do not let this transformation reduce the number of volatile loads.
27841         // Be conservative for atomics for the moment
27842         // TODO: This does appear to be legal for unordered atomics (see D66309)
27843         !LLD->isSimple() || !RLD->isSimple() ||
27844         // FIXME: If either is a pre/post inc/dec load,
27845         // we'd need to split out the address adjustment.
27846         LLD->isIndexed() || RLD->isIndexed() ||
27847         // If this is an EXTLOAD, the VT's must match.
27848         LLD->getMemoryVT() != RLD->getMemoryVT() ||
27849         // If this is an EXTLOAD, the kind of extension must match.
27850         (LLD->getExtensionType() != RLD->getExtensionType() &&
27851          // The only exception is if one of the extensions is anyext.
27852          LLD->getExtensionType() != ISD::EXTLOAD &&
27853          RLD->getExtensionType() != ISD::EXTLOAD) ||
27854         // FIXME: this discards src value information.  This is
27855         // over-conservative. It would be beneficial to be able to remember
27856         // both potential memory locations.  Since we are discarding
27857         // src value info, don't do the transformation if the memory
27858         // locations are not in the default address space.
27859         LLD->getPointerInfo().getAddrSpace() != 0 ||
27860         RLD->getPointerInfo().getAddrSpace() != 0 ||
27861         // We can't produce a CMOV of a TargetFrameIndex since we won't
27862         // generate the address generation required.
27863         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
27864         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
27865         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27866                                       LLD->getBasePtr().getValueType()))
27867       return false;
27868 
27869     // The loads must not depend on one another.
27870     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27871       return false;
27872 
27873     // Check that the select condition doesn't reach either load.  If so,
27874     // folding this will induce a cycle into the DAG.  If not, this is safe to
27875     // xform, so create a select of the addresses.
27876 
27877     SmallPtrSet<const SDNode *, 32> Visited;
27878     SmallVector<const SDNode *, 16> Worklist;
27879 
27880     // Always fail if LLD and RLD are not independent. TheSelect is a
27881     // predecessor to all Nodes in question so we need not search past it.
27882 
27883     Visited.insert(TheSelect);
27884     Worklist.push_back(LLD);
27885     Worklist.push_back(RLD);
27886 
27887     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27888         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27889       return false;
27890 
27891     SDValue Addr;
27892     if (TheSelect->getOpcode() == ISD::SELECT) {
27893       // We cannot do this optimization if any pair of {RLD, LLD} is a
27894       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27895       // Loads, we only need to check if CondNode is a successor to one of the
27896       // loads. We can further avoid this if there's no use of their chain
27897       // value.
27898       SDNode *CondNode = TheSelect->getOperand(0).getNode();
27899       Worklist.push_back(CondNode);
27900 
27901       if ((LLD->hasAnyUseOfValue(1) &&
27902            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27903           (RLD->hasAnyUseOfValue(1) &&
27904            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27905         return false;
27906 
27907       Addr = DAG.getSelect(SDLoc(TheSelect),
27908                            LLD->getBasePtr().getValueType(),
27909                            TheSelect->getOperand(0), LLD->getBasePtr(),
27910                            RLD->getBasePtr());
27911     } else {  // Otherwise SELECT_CC
27912       // We cannot do this optimization if any pair of {RLD, LLD} is a
27913       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27914       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27915       // one of the loads. We can further avoid this if there's no use of their
27916       // chain value.
27917 
27918       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27919       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27920       Worklist.push_back(CondLHS);
27921       Worklist.push_back(CondRHS);
27922 
27923       if ((LLD->hasAnyUseOfValue(1) &&
27924            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27925           (RLD->hasAnyUseOfValue(1) &&
27926            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27927         return false;
27928 
27929       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27930                          LLD->getBasePtr().getValueType(),
27931                          TheSelect->getOperand(0),
27932                          TheSelect->getOperand(1),
27933                          LLD->getBasePtr(), RLD->getBasePtr(),
27934                          TheSelect->getOperand(4));
27935     }
27936 
27937     SDValue Load;
27938     // It is safe to replace the two loads if they have different alignments,
27939     // but the new load must be the minimum (most restrictive) alignment of the
27940     // inputs.
27941     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27942     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27943     if (!RLD->isInvariant())
27944       MMOFlags &= ~MachineMemOperand::MOInvariant;
27945     if (!RLD->isDereferenceable())
27946       MMOFlags &= ~MachineMemOperand::MODereferenceable;
27947     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27948       // FIXME: Discards pointer and AA info.
27949       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27950                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27951                          MMOFlags);
27952     } else {
27953       // FIXME: Discards pointer and AA info.
27954       Load = DAG.getExtLoad(
27955           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
27956                                                   : LLD->getExtensionType(),
27957           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27958           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27959     }
27960 
27961     // Users of the select now use the result of the load.
27962     CombineTo(TheSelect, Load);
27963 
27964     // Users of the old loads now use the new load's chain.  We know the
27965     // old-load value is dead now.
27966     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27967     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27968     return true;
27969   }
27970 
27971   return false;
27972 }
27973 
27974 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27975 /// bitwise 'and'.
27976 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27977                                             SDValue N1, SDValue N2, SDValue N3,
27978                                             ISD::CondCode CC) {
27979   // If this is a select where the false operand is zero and the compare is a
27980   // check of the sign bit, see if we can perform the "gzip trick":
27981   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27982   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27983   EVT XType = N0.getValueType();
27984   EVT AType = N2.getValueType();
27985   if (!isNullConstant(N3) || !XType.bitsGE(AType))
27986     return SDValue();
27987 
27988   // If the comparison is testing for a positive value, we have to invert
27989   // the sign bit mask, so only do that transform if the target has a bitwise
27990   // 'and not' instruction (the invert is free).
27991   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27992     // (X > -1) ? A : 0
27993     // (X >  0) ? X : 0 <-- This is canonical signed max.
27994     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27995       return SDValue();
27996   } else if (CC == ISD::SETLT) {
27997     // (X <  0) ? A : 0
27998     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
27999     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
28000       return SDValue();
28001   } else {
28002     return SDValue();
28003   }
28004 
28005   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
28006   // constant.
28007   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28008   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
28009     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
28010     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
28011       SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28012       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
28013       AddToWorklist(Shift.getNode());
28014 
28015       if (XType.bitsGT(AType)) {
28016         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28017         AddToWorklist(Shift.getNode());
28018       }
28019 
28020       if (CC == ISD::SETGT)
28021         Shift = DAG.getNOT(DL, Shift, AType);
28022 
28023       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28024     }
28025   }
28026 
28027   unsigned ShCt = XType.getSizeInBits() - 1;
28028   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
28029     return SDValue();
28030 
28031   SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28032   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
28033   AddToWorklist(Shift.getNode());
28034 
28035   if (XType.bitsGT(AType)) {
28036     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28037     AddToWorklist(Shift.getNode());
28038   }
28039 
28040   if (CC == ISD::SETGT)
28041     Shift = DAG.getNOT(DL, Shift, AType);
28042 
28043   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28044 }
28045 
28046 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
28047 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
28048   SDValue N0 = N->getOperand(0);
28049   SDValue N1 = N->getOperand(1);
28050   SDValue N2 = N->getOperand(2);
28051   SDLoc DL(N);
28052 
28053   unsigned BinOpc = N1.getOpcode();
28054   if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
28055       (N1.getResNo() != N2.getResNo()))
28056     return SDValue();
28057 
28058   // The use checks are intentionally on SDNode because we may be dealing
28059   // with opcodes that produce more than one SDValue.
28060   // TODO: Do we really need to check N0 (the condition operand of the select)?
28061   //       But removing that clause could cause an infinite loop...
28062   if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
28063     return SDValue();
28064 
28065   // Binops may include opcodes that return multiple values, so all values
28066   // must be created/propagated from the newly created binops below.
28067   SDVTList OpVTs = N1->getVTList();
28068 
28069   // Fold select(cond, binop(x, y), binop(z, y))
28070   //  --> binop(select(cond, x, z), y)
28071   if (N1.getOperand(1) == N2.getOperand(1)) {
28072     SDValue N10 = N1.getOperand(0);
28073     SDValue N20 = N2.getOperand(0);
28074     SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
28075     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
28076     NewBinOp->setFlags(N1->getFlags());
28077     NewBinOp->intersectFlagsWith(N2->getFlags());
28078     return SDValue(NewBinOp.getNode(), N1.getResNo());
28079   }
28080 
28081   // Fold select(cond, binop(x, y), binop(x, z))
28082   //  --> binop(x, select(cond, y, z))
28083   if (N1.getOperand(0) == N2.getOperand(0)) {
28084     SDValue N11 = N1.getOperand(1);
28085     SDValue N21 = N2.getOperand(1);
28086     // Second op VT might be different (e.g. shift amount type)
28087     if (N11.getValueType() == N21.getValueType()) {
28088       SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
28089       SDValue NewBinOp =
28090           DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
28091       NewBinOp->setFlags(N1->getFlags());
28092       NewBinOp->intersectFlagsWith(N2->getFlags());
28093       return SDValue(NewBinOp.getNode(), N1.getResNo());
28094     }
28095   }
28096 
28097   // TODO: Handle isCommutativeBinOp patterns as well?
28098   return SDValue();
28099 }
28100 
28101 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
28102 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
28103   SDValue N0 = N->getOperand(0);
28104   EVT VT = N->getValueType(0);
28105   bool IsFabs = N->getOpcode() == ISD::FABS;
28106   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
28107 
28108   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
28109     return SDValue();
28110 
28111   SDValue Int = N0.getOperand(0);
28112   EVT IntVT = Int.getValueType();
28113 
28114   // The operand to cast should be integer.
28115   if (!IntVT.isInteger() || IntVT.isVector())
28116     return SDValue();
28117 
28118   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
28119   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
28120   APInt SignMask;
28121   if (N0.getValueType().isVector()) {
28122     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
28123     // 0x7f...) per element and splat it.
28124     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
28125     if (IsFabs)
28126       SignMask = ~SignMask;
28127     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
28128   } else {
28129     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
28130     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
28131     if (IsFabs)
28132       SignMask = ~SignMask;
28133   }
28134   SDLoc DL(N0);
28135   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
28136                     DAG.getConstant(SignMask, DL, IntVT));
28137   AddToWorklist(Int.getNode());
28138   return DAG.getBitcast(VT, Int);
28139 }
28140 
28141 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
28142 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
28143 /// in it. This may be a win when the constant is not otherwise available
28144 /// because it replaces two constant pool loads with one.
28145 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
28146     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
28147     ISD::CondCode CC) {
28148   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
28149     return SDValue();
28150 
28151   // If we are before legalize types, we want the other legalization to happen
28152   // first (for example, to avoid messing with soft float).
28153   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
28154   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
28155   EVT VT = N2.getValueType();
28156   if (!TV || !FV || !TLI.isTypeLegal(VT))
28157     return SDValue();
28158 
28159   // If a constant can be materialized without loads, this does not make sense.
28160   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
28161       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
28162       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
28163     return SDValue();
28164 
28165   // If both constants have multiple uses, then we won't need to do an extra
28166   // load. The values are likely around in registers for other users.
28167   if (!TV->hasOneUse() && !FV->hasOneUse())
28168     return SDValue();
28169 
28170   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
28171                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
28172   Type *FPTy = Elts[0]->getType();
28173   const DataLayout &TD = DAG.getDataLayout();
28174 
28175   // Create a ConstantArray of the two constants.
28176   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
28177   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
28178                                       TD.getPrefTypeAlign(FPTy));
28179   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
28180 
28181   // Get offsets to the 0 and 1 elements of the array, so we can select between
28182   // them.
28183   SDValue Zero = DAG.getIntPtrConstant(0, DL);
28184   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
28185   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
28186   SDValue Cond =
28187       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
28188   AddToWorklist(Cond.getNode());
28189   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
28190   AddToWorklist(CstOffset.getNode());
28191   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
28192   AddToWorklist(CPIdx.getNode());
28193   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
28194                      MachinePointerInfo::getConstantPool(
28195                          DAG.getMachineFunction()), Alignment);
28196 }
28197 
28198 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
28199 /// where 'cond' is the comparison specified by CC.
28200 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
28201                                       SDValue N2, SDValue N3, ISD::CondCode CC,
28202                                       bool NotExtCompare) {
28203   // (x ? y : y) -> y.
28204   if (N2 == N3) return N2;
28205 
28206   EVT CmpOpVT = N0.getValueType();
28207   EVT CmpResVT = getSetCCResultType(CmpOpVT);
28208   EVT VT = N2.getValueType();
28209   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
28210   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28211   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
28212 
28213   // Determine if the condition we're dealing with is constant.
28214   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
28215     AddToWorklist(SCC.getNode());
28216     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
28217       // fold select_cc true, x, y -> x
28218       // fold select_cc false, x, y -> y
28219       return !(SCCC->isZero()) ? N2 : N3;
28220     }
28221   }
28222 
28223   if (SDValue V =
28224           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
28225     return V;
28226 
28227   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
28228     return V;
28229 
28230   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
28231   // where y is has a single bit set.
28232   // A plaintext description would be, we can turn the SELECT_CC into an AND
28233   // when the condition can be materialized as an all-ones register.  Any
28234   // single bit-test can be materialized as an all-ones register with
28235   // shift-left and shift-right-arith.
28236   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
28237       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
28238     SDValue AndLHS = N0->getOperand(0);
28239     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
28240     if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
28241       // Shift the tested bit over the sign bit.
28242       const APInt &AndMask = ConstAndRHS->getAPIntValue();
28243       if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
28244         unsigned ShCt = AndMask.getBitWidth() - 1;
28245         SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
28246                                                     SDLoc(AndLHS));
28247         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
28248 
28249         // Now arithmetic right shift it all the way over, so the result is
28250         // either all-ones, or zero.
28251         SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
28252         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
28253 
28254         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
28255       }
28256     }
28257   }
28258 
28259   // fold select C, 16, 0 -> shl C, 4
28260   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
28261   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
28262 
28263   if ((Fold || Swap) &&
28264       TLI.getBooleanContents(CmpOpVT) ==
28265           TargetLowering::ZeroOrOneBooleanContent &&
28266       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
28267 
28268     if (Swap) {
28269       CC = ISD::getSetCCInverse(CC, CmpOpVT);
28270       std::swap(N2C, N3C);
28271     }
28272 
28273     // If the caller doesn't want us to simplify this into a zext of a compare,
28274     // don't do it.
28275     if (NotExtCompare && N2C->isOne())
28276       return SDValue();
28277 
28278     SDValue Temp, SCC;
28279     // zext (setcc n0, n1)
28280     if (LegalTypes) {
28281       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
28282       Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
28283     } else {
28284       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
28285       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
28286     }
28287 
28288     AddToWorklist(SCC.getNode());
28289     AddToWorklist(Temp.getNode());
28290 
28291     if (N2C->isOne())
28292       return Temp;
28293 
28294     unsigned ShCt = N2C->getAPIntValue().logBase2();
28295     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
28296       return SDValue();
28297 
28298     // shl setcc result by log2 n2c
28299     return DAG.getNode(
28300         ISD::SHL, DL, N2.getValueType(), Temp,
28301         DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
28302   }
28303 
28304   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
28305   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
28306   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
28307   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
28308   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
28309   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
28310   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
28311   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
28312   if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
28313     SDValue ValueOnZero = N2;
28314     SDValue Count = N3;
28315     // If the condition is NE instead of E, swap the operands.
28316     if (CC == ISD::SETNE)
28317       std::swap(ValueOnZero, Count);
28318     // Check if the value on zero is a constant equal to the bits in the type.
28319     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
28320       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
28321         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
28322         // legal, combine to just cttz.
28323         if ((Count.getOpcode() == ISD::CTTZ ||
28324              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
28325             N0 == Count.getOperand(0) &&
28326             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
28327           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
28328         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
28329         // legal, combine to just ctlz.
28330         if ((Count.getOpcode() == ISD::CTLZ ||
28331              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
28332             N0 == Count.getOperand(0) &&
28333             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
28334           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
28335       }
28336     }
28337   }
28338 
28339   // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
28340   // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
28341   if (!NotExtCompare && N1C && N2C && N3C &&
28342       N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
28343       ((N1C->isAllOnes() && CC == ISD::SETGT) ||
28344        (N1C->isZero() && CC == ISD::SETLT)) &&
28345       !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
28346     SDValue ASR = DAG.getNode(
28347         ISD::SRA, DL, CmpOpVT, N0,
28348         DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
28349     return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
28350                        DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
28351   }
28352 
28353   if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
28354     return S;
28355   if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
28356     return S;
28357   if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
28358     return ABD;
28359 
28360   return SDValue();
28361 }
28362 
28363 /// This is a stub for TargetLowering::SimplifySetCC.
28364 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
28365                                    ISD::CondCode Cond, const SDLoc &DL,
28366                                    bool foldBooleans) {
28367   TargetLowering::DAGCombinerInfo
28368     DagCombineInfo(DAG, Level, false, this);
28369   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
28370 }
28371 
28372 /// Given an ISD::SDIV node expressing a divide by constant, return
28373 /// a DAG expression to select that will generate the same value by multiplying
28374 /// by a magic number.
28375 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
28376 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
28377   // when optimising for minimum size, we don't want to expand a div to a mul
28378   // and a shift.
28379   if (DAG.getMachineFunction().getFunction().hasMinSize())
28380     return SDValue();
28381 
28382   SmallVector<SDNode *, 8> Built;
28383   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
28384     for (SDNode *N : Built)
28385       AddToWorklist(N);
28386     return S;
28387   }
28388 
28389   return SDValue();
28390 }
28391 
28392 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
28393 /// DAG expression that will generate the same value by right shifting.
28394 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
28395   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28396   if (!C)
28397     return SDValue();
28398 
28399   // Avoid division by zero.
28400   if (C->isZero())
28401     return SDValue();
28402 
28403   SmallVector<SDNode *, 8> Built;
28404   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
28405     for (SDNode *N : Built)
28406       AddToWorklist(N);
28407     return S;
28408   }
28409 
28410   return SDValue();
28411 }
28412 
28413 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
28414 /// expression that will generate the same value by multiplying by a magic
28415 /// number.
28416 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
28417 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
28418   // when optimising for minimum size, we don't want to expand a div to a mul
28419   // and a shift.
28420   if (DAG.getMachineFunction().getFunction().hasMinSize())
28421     return SDValue();
28422 
28423   SmallVector<SDNode *, 8> Built;
28424   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
28425     for (SDNode *N : Built)
28426       AddToWorklist(N);
28427     return S;
28428   }
28429 
28430   return SDValue();
28431 }
28432 
28433 /// Given an ISD::SREM node expressing a remainder by constant power of 2,
28434 /// return a DAG expression that will generate the same value.
28435 SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
28436   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28437   if (!C)
28438     return SDValue();
28439 
28440   // Avoid division by zero.
28441   if (C->isZero())
28442     return SDValue();
28443 
28444   SmallVector<SDNode *, 8> Built;
28445   if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
28446     for (SDNode *N : Built)
28447       AddToWorklist(N);
28448     return S;
28449   }
28450 
28451   return SDValue();
28452 }
28453 
28454 // This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
28455 //
28456 // Returns the node that represents `Log2(Op)`. This may create a new node. If
28457 // we are unable to compute `Log2(Op)` its return `SDValue()`.
28458 //
28459 // All nodes will be created at `DL` and the output will be of type `VT`.
28460 //
28461 // This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
28462 // `AssumeNonZero` if this function should simply assume (not require proving
28463 // `Op` is non-zero).
28464 static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
28465                                    SDValue Op, unsigned Depth,
28466                                    bool AssumeNonZero) {
28467   assert(VT.isInteger() && "Only integer types are supported!");
28468 
28469   auto PeekThroughCastsAndTrunc = [](SDValue V) {
28470     while (true) {
28471       switch (V.getOpcode()) {
28472       case ISD::TRUNCATE:
28473       case ISD::ZERO_EXTEND:
28474         V = V.getOperand(0);
28475         break;
28476       default:
28477         return V;
28478       }
28479     }
28480   };
28481 
28482   if (VT.isScalableVector())
28483     return SDValue();
28484 
28485   Op = PeekThroughCastsAndTrunc(Op);
28486 
28487   // Helper for determining whether a value is a power-2 constant scalar or a
28488   // vector of such elements.
28489   SmallVector<APInt> Pow2Constants;
28490   auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
28491     if (C->isZero() || C->isOpaque())
28492       return false;
28493     // TODO: We may also be able to support negative powers of 2 here.
28494     if (C->getAPIntValue().isPowerOf2()) {
28495       Pow2Constants.emplace_back(C->getAPIntValue());
28496       return true;
28497     }
28498     return false;
28499   };
28500 
28501   if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
28502     if (!VT.isVector())
28503       return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
28504     // We need to create a build vector
28505     if (Op.getOpcode() == ISD::SPLAT_VECTOR)
28506       return DAG.getSplat(VT, DL,
28507                           DAG.getConstant(Pow2Constants.back().logBase2(), DL,
28508                                           VT.getScalarType()));
28509     SmallVector<SDValue> Log2Ops;
28510     for (const APInt &Pow2 : Pow2Constants)
28511       Log2Ops.emplace_back(
28512           DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
28513     return DAG.getBuildVector(VT, DL, Log2Ops);
28514   }
28515 
28516   if (Depth >= DAG.MaxRecursionDepth)
28517     return SDValue();
28518 
28519   auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
28520     ToCast = PeekThroughCastsAndTrunc(ToCast);
28521     EVT CurVT = ToCast.getValueType();
28522     if (NewVT == CurVT)
28523       return ToCast;
28524 
28525     if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
28526       return DAG.getBitcast(NewVT, ToCast);
28527 
28528     return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
28529   };
28530 
28531   // log2(X << Y) -> log2(X) + Y
28532   if (Op.getOpcode() == ISD::SHL) {
28533     // 1 << Y and X nuw/nsw << Y are all non-zero.
28534     if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
28535         Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
28536       if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
28537                                              Depth + 1, AssumeNonZero))
28538         return DAG.getNode(ISD::ADD, DL, VT, LogX,
28539                            CastToVT(VT, Op.getOperand(1)));
28540   }
28541 
28542   // c ? X : Y -> c ? Log2(X) : Log2(Y)
28543   if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
28544       Op.hasOneUse()) {
28545     if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
28546                                            Depth + 1, AssumeNonZero))
28547       if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
28548                                              Depth + 1, AssumeNonZero))
28549         return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
28550   }
28551 
28552   // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
28553   // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
28554   if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
28555       Op.hasOneUse()) {
28556     // Use AssumeNonZero as false here. Otherwise we can hit case where
28557     // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
28558     if (SDValue LogX =
28559             takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
28560                                 /*AssumeNonZero*/ false))
28561       if (SDValue LogY =
28562               takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
28563                                   /*AssumeNonZero*/ false))
28564         return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
28565   }
28566 
28567   return SDValue();
28568 }
28569 
28570 /// Determines the LogBase2 value for a non-null input value using the
28571 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
28572 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
28573                                    bool KnownNonZero, bool InexpensiveOnly,
28574                                    std::optional<EVT> OutVT) {
28575   EVT VT = OutVT ? *OutVT : V.getValueType();
28576   SDValue InexpensiveLogBase2 =
28577       takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
28578   if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
28579     return InexpensiveLogBase2;
28580 
28581   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
28582   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
28583   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
28584   return LogBase2;
28585 }
28586 
28587 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28588 /// For the reciprocal, we need to find the zero of the function:
28589 ///   F(X) = 1/X - A [which has a zero at X = 1/A]
28590 ///     =>
28591 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
28592 ///     does not require additional intermediate precision]
28593 /// For the last iteration, put numerator N into it to gain more precision:
28594 ///   Result = N X_i + X_i (N - N A X_i)
28595 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
28596                                       SDNodeFlags Flags) {
28597   if (LegalDAG)
28598     return SDValue();
28599 
28600   // TODO: Handle extended types?
28601   EVT VT = Op.getValueType();
28602   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28603       VT.getScalarType() != MVT::f64)
28604     return SDValue();
28605 
28606   // If estimates are explicitly disabled for this function, we're done.
28607   MachineFunction &MF = DAG.getMachineFunction();
28608   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
28609   if (Enabled == TLI.ReciprocalEstimate::Disabled)
28610     return SDValue();
28611 
28612   // Estimates may be explicitly enabled for this type with a custom number of
28613   // refinement steps.
28614   int Iterations = TLI.getDivRefinementSteps(VT, MF);
28615   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
28616     AddToWorklist(Est.getNode());
28617 
28618     SDLoc DL(Op);
28619     if (Iterations) {
28620       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
28621 
28622       // Newton iterations: Est = Est + Est (N - Arg * Est)
28623       // If this is the last iteration, also multiply by the numerator.
28624       for (int i = 0; i < Iterations; ++i) {
28625         SDValue MulEst = Est;
28626 
28627         if (i == Iterations - 1) {
28628           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
28629           AddToWorklist(MulEst.getNode());
28630         }
28631 
28632         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
28633         AddToWorklist(NewEst.getNode());
28634 
28635         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
28636                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
28637         AddToWorklist(NewEst.getNode());
28638 
28639         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28640         AddToWorklist(NewEst.getNode());
28641 
28642         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
28643         AddToWorklist(Est.getNode());
28644       }
28645     } else {
28646       // If no iterations are available, multiply with N.
28647       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
28648       AddToWorklist(Est.getNode());
28649     }
28650 
28651     return Est;
28652   }
28653 
28654   return SDValue();
28655 }
28656 
28657 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28658 /// For the reciprocal sqrt, we need to find the zero of the function:
28659 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28660 ///     =>
28661 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
28662 /// As a result, we precompute A/2 prior to the iteration loop.
28663 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
28664                                          unsigned Iterations,
28665                                          SDNodeFlags Flags, bool Reciprocal) {
28666   EVT VT = Arg.getValueType();
28667   SDLoc DL(Arg);
28668   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
28669 
28670   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
28671   // this entire sequence requires only one FP constant.
28672   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
28673   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
28674 
28675   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
28676   for (unsigned i = 0; i < Iterations; ++i) {
28677     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
28678     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
28679     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
28680     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28681   }
28682 
28683   // If non-reciprocal square root is requested, multiply the result by Arg.
28684   if (!Reciprocal)
28685     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
28686 
28687   return Est;
28688 }
28689 
28690 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28691 /// For the reciprocal sqrt, we need to find the zero of the function:
28692 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28693 ///     =>
28694 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
28695 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
28696                                          unsigned Iterations,
28697                                          SDNodeFlags Flags, bool Reciprocal) {
28698   EVT VT = Arg.getValueType();
28699   SDLoc DL(Arg);
28700   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
28701   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
28702 
28703   // This routine must enter the loop below to work correctly
28704   // when (Reciprocal == false).
28705   assert(Iterations > 0);
28706 
28707   // Newton iterations for reciprocal square root:
28708   // E = (E * -0.5) * ((A * E) * E + -3.0)
28709   for (unsigned i = 0; i < Iterations; ++i) {
28710     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
28711     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
28712     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
28713 
28714     // When calculating a square root at the last iteration build:
28715     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
28716     // (notice a common subexpression)
28717     SDValue LHS;
28718     if (Reciprocal || (i + 1) < Iterations) {
28719       // RSQRT: LHS = (E * -0.5)
28720       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
28721     } else {
28722       // SQRT: LHS = (A * E) * -0.5
28723       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
28724     }
28725 
28726     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
28727   }
28728 
28729   return Est;
28730 }
28731 
28732 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
28733 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
28734 /// Op can be zero.
28735 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
28736                                            bool Reciprocal) {
28737   if (LegalDAG)
28738     return SDValue();
28739 
28740   // TODO: Handle extended types?
28741   EVT VT = Op.getValueType();
28742   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28743       VT.getScalarType() != MVT::f64)
28744     return SDValue();
28745 
28746   // If estimates are explicitly disabled for this function, we're done.
28747   MachineFunction &MF = DAG.getMachineFunction();
28748   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
28749   if (Enabled == TLI.ReciprocalEstimate::Disabled)
28750     return SDValue();
28751 
28752   // Estimates may be explicitly enabled for this type with a custom number of
28753   // refinement steps.
28754   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
28755 
28756   bool UseOneConstNR = false;
28757   if (SDValue Est =
28758       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
28759                           Reciprocal)) {
28760     AddToWorklist(Est.getNode());
28761 
28762     if (Iterations > 0)
28763       Est = UseOneConstNR
28764             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
28765             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
28766     if (!Reciprocal) {
28767       SDLoc DL(Op);
28768       // Try the target specific test first.
28769       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
28770 
28771       // The estimate is now completely wrong if the input was exactly 0.0 or
28772       // possibly a denormal. Force the answer to 0.0 or value provided by
28773       // target for those cases.
28774       Est = DAG.getNode(
28775           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
28776           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
28777     }
28778     return Est;
28779   }
28780 
28781   return SDValue();
28782 }
28783 
28784 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28785   return buildSqrtEstimateImpl(Op, Flags, true);
28786 }
28787 
28788 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28789   return buildSqrtEstimateImpl(Op, Flags, false);
28790 }
28791 
28792 /// Return true if there is any possibility that the two addresses overlap.
28793 bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
28794 
28795   struct MemUseCharacteristics {
28796     bool IsVolatile;
28797     bool IsAtomic;
28798     SDValue BasePtr;
28799     int64_t Offset;
28800     LocationSize NumBytes;
28801     MachineMemOperand *MMO;
28802   };
28803 
28804   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
28805     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
28806       int64_t Offset = 0;
28807       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
28808         Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
28809                  : (LSN->getAddressingMode() == ISD::PRE_DEC)
28810                      ? -1 * C->getSExtValue()
28811                      : 0;
28812       TypeSize Size = LSN->getMemoryVT().getStoreSize();
28813       return {LSN->isVolatile(),           LSN->isAtomic(),
28814               LSN->getBasePtr(),           Offset /*base offset*/,
28815               LocationSize::precise(Size), LSN->getMemOperand()};
28816     }
28817     if (const auto *LN = cast<LifetimeSDNode>(N))
28818       return {false /*isVolatile*/,
28819               /*isAtomic*/ false,
28820               LN->getOperand(1),
28821               (LN->hasOffset()) ? LN->getOffset() : 0,
28822               (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28823                                 : LocationSize::beforeOrAfterPointer(),
28824               (MachineMemOperand *)nullptr};
28825     // Default.
28826     return {false /*isvolatile*/,
28827             /*isAtomic*/ false,
28828             SDValue(),
28829             (int64_t)0 /*offset*/,
28830             LocationSize::beforeOrAfterPointer() /*size*/,
28831             (MachineMemOperand *)nullptr};
28832   };
28833 
28834   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28835                         MUC1 = getCharacteristics(Op1);
28836 
28837   // If they are to the same address, then they must be aliases.
28838   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28839       MUC0.Offset == MUC1.Offset)
28840     return true;
28841 
28842   // If they are both volatile then they cannot be reordered.
28843   if (MUC0.IsVolatile && MUC1.IsVolatile)
28844     return true;
28845 
28846   // Be conservative about atomics for the moment
28847   // TODO: This is way overconservative for unordered atomics (see D66309)
28848   if (MUC0.IsAtomic && MUC1.IsAtomic)
28849     return true;
28850 
28851   if (MUC0.MMO && MUC1.MMO) {
28852     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28853         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28854       return false;
28855   }
28856 
28857   // If NumBytes is scalable and offset is not 0, conservatively return may
28858   // alias
28859   if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28860        MUC0.Offset != 0) ||
28861       (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28862        MUC1.Offset != 0))
28863     return true;
28864   // Try to prove that there is aliasing, or that there is no aliasing. Either
28865   // way, we can return now. If nothing can be proved, proceed with more tests.
28866   bool IsAlias;
28867   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28868                                        DAG, IsAlias))
28869     return IsAlias;
28870 
28871   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28872   // either are not known.
28873   if (!MUC0.MMO || !MUC1.MMO)
28874     return true;
28875 
28876   // If one operation reads from invariant memory, and the other may store, they
28877   // cannot alias. These should really be checking the equivalent of mayWrite,
28878   // but it only matters for memory nodes other than load /store.
28879   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28880       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28881     return false;
28882 
28883   // If we know required SrcValue1 and SrcValue2 have relatively large
28884   // alignment compared to the size and offset of the access, we may be able
28885   // to prove they do not alias. This check is conservative for now to catch
28886   // cases created by splitting vector types, it only works when the offsets are
28887   // multiples of the size of the data.
28888   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28889   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28890   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28891   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28892   LocationSize Size0 = MUC0.NumBytes;
28893   LocationSize Size1 = MUC1.NumBytes;
28894 
28895   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28896       Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28897       !Size1.isScalable() && Size0 == Size1 &&
28898       OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28899       SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28900       SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28901     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28902     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28903 
28904     // There is no overlap between these relatively aligned accesses of
28905     // similar size. Return no alias.
28906     if ((OffAlign0 + static_cast<int64_t>(
28907                          Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28908         (OffAlign1 + static_cast<int64_t>(
28909                          Size1.getValue().getKnownMinValue())) <= OffAlign0)
28910       return false;
28911   }
28912 
28913   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
28914                    ? CombinerGlobalAA
28915                    : DAG.getSubtarget().useAA();
28916 #ifndef NDEBUG
28917   if (CombinerAAOnlyFunc.getNumOccurrences() &&
28918       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
28919     UseAA = false;
28920 #endif
28921 
28922   if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28923       Size0.hasValue() && Size1.hasValue() &&
28924       // Can't represent a scalable size + fixed offset in LocationSize
28925       (!Size0.isScalable() || SrcValOffset0 == 0) &&
28926       (!Size1.isScalable() || SrcValOffset1 == 0)) {
28927     // Use alias analysis information.
28928     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28929     int64_t Overlap0 =
28930         Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28931     int64_t Overlap1 =
28932         Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28933     LocationSize Loc0 =
28934         Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28935     LocationSize Loc1 =
28936         Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28937     if (BatchAA->isNoAlias(
28938             MemoryLocation(MUC0.MMO->getValue(), Loc0,
28939                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28940             MemoryLocation(MUC1.MMO->getValue(), Loc1,
28941                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28942       return false;
28943   }
28944 
28945   // Otherwise we have to assume they alias.
28946   return true;
28947 }
28948 
28949 /// Walk up chain skipping non-aliasing memory nodes,
28950 /// looking for aliasing nodes and adding them to the Aliases vector.
28951 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28952                                    SmallVectorImpl<SDValue> &Aliases) {
28953   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
28954   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
28955 
28956   // Get alias information for node.
28957   // TODO: relax aliasing for unordered atomics (see D66309)
28958   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28959 
28960   // Starting off.
28961   Chains.push_back(OriginalChain);
28962   unsigned Depth = 0;
28963 
28964   // Attempt to improve chain by a single step
28965   auto ImproveChain = [&](SDValue &C) -> bool {
28966     switch (C.getOpcode()) {
28967     case ISD::EntryToken:
28968       // No need to mark EntryToken.
28969       C = SDValue();
28970       return true;
28971     case ISD::LOAD:
28972     case ISD::STORE: {
28973       // Get alias information for C.
28974       // TODO: Relax aliasing for unordered atomics (see D66309)
28975       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28976                       cast<LSBaseSDNode>(C.getNode())->isSimple();
28977       if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28978         // Look further up the chain.
28979         C = C.getOperand(0);
28980         return true;
28981       }
28982       // Alias, so stop here.
28983       return false;
28984     }
28985 
28986     case ISD::CopyFromReg:
28987       // Always forward past CopyFromReg.
28988       C = C.getOperand(0);
28989       return true;
28990 
28991     case ISD::LIFETIME_START:
28992     case ISD::LIFETIME_END: {
28993       // We can forward past any lifetime start/end that can be proven not to
28994       // alias the memory access.
28995       if (!mayAlias(N, C.getNode())) {
28996         // Look further up the chain.
28997         C = C.getOperand(0);
28998         return true;
28999       }
29000       return false;
29001     }
29002     default:
29003       return false;
29004     }
29005   };
29006 
29007   // Look at each chain and determine if it is an alias.  If so, add it to the
29008   // aliases list.  If not, then continue up the chain looking for the next
29009   // candidate.
29010   while (!Chains.empty()) {
29011     SDValue Chain = Chains.pop_back_val();
29012 
29013     // Don't bother if we've seen Chain before.
29014     if (!Visited.insert(Chain.getNode()).second)
29015       continue;
29016 
29017     // For TokenFactor nodes, look at each operand and only continue up the
29018     // chain until we reach the depth limit.
29019     //
29020     // FIXME: The depth check could be made to return the last non-aliasing
29021     // chain we found before we hit a tokenfactor rather than the original
29022     // chain.
29023     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
29024       Aliases.clear();
29025       Aliases.push_back(OriginalChain);
29026       return;
29027     }
29028 
29029     if (Chain.getOpcode() == ISD::TokenFactor) {
29030       // We have to check each of the operands of the token factor for "small"
29031       // token factors, so we queue them up.  Adding the operands to the queue
29032       // (stack) in reverse order maintains the original order and increases the
29033       // likelihood that getNode will find a matching token factor (CSE.)
29034       if (Chain.getNumOperands() > 16) {
29035         Aliases.push_back(Chain);
29036         continue;
29037       }
29038       for (unsigned n = Chain.getNumOperands(); n;)
29039         Chains.push_back(Chain.getOperand(--n));
29040       ++Depth;
29041       continue;
29042     }
29043     // Everything else
29044     if (ImproveChain(Chain)) {
29045       // Updated Chain Found, Consider new chain if one exists.
29046       if (Chain.getNode())
29047         Chains.push_back(Chain);
29048       ++Depth;
29049       continue;
29050     }
29051     // No Improved Chain Possible, treat as Alias.
29052     Aliases.push_back(Chain);
29053   }
29054 }
29055 
29056 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
29057 /// (aliasing node.)
29058 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
29059   if (OptLevel == CodeGenOptLevel::None)
29060     return OldChain;
29061 
29062   // Ops for replacing token factor.
29063   SmallVector<SDValue, 8> Aliases;
29064 
29065   // Accumulate all the aliases to this node.
29066   GatherAllAliases(N, OldChain, Aliases);
29067 
29068   // If no operands then chain to entry token.
29069   if (Aliases.empty())
29070     return DAG.getEntryNode();
29071 
29072   // If a single operand then chain to it.  We don't need to revisit it.
29073   if (Aliases.size() == 1)
29074     return Aliases[0];
29075 
29076   // Construct a custom tailored token factor.
29077   return DAG.getTokenFactor(SDLoc(N), Aliases);
29078 }
29079 
29080 // This function tries to collect a bunch of potentially interesting
29081 // nodes to improve the chains of, all at once. This might seem
29082 // redundant, as this function gets called when visiting every store
29083 // node, so why not let the work be done on each store as it's visited?
29084 //
29085 // I believe this is mainly important because mergeConsecutiveStores
29086 // is unable to deal with merging stores of different sizes, so unless
29087 // we improve the chains of all the potential candidates up-front
29088 // before running mergeConsecutiveStores, it might only see some of
29089 // the nodes that will eventually be candidates, and then not be able
29090 // to go from a partially-merged state to the desired final
29091 // fully-merged state.
29092 
29093 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
29094   SmallVector<StoreSDNode *, 8> ChainedStores;
29095   StoreSDNode *STChain = St;
29096   // Intervals records which offsets from BaseIndex have been covered. In
29097   // the common case, every store writes to the immediately previous address
29098   // space and thus merged with the previous interval at insertion time.
29099 
29100   using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
29101                                  IntervalMapHalfOpenInfo<int64_t>>;
29102   IMap::Allocator A;
29103   IMap Intervals(A);
29104 
29105   // This holds the base pointer, index, and the offset in bytes from the base
29106   // pointer.
29107   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
29108 
29109   // We must have a base and an offset.
29110   if (!BasePtr.getBase().getNode())
29111     return false;
29112 
29113   // Do not handle stores to undef base pointers.
29114   if (BasePtr.getBase().isUndef())
29115     return false;
29116 
29117   // Do not handle stores to opaque types
29118   if (St->getMemoryVT().isZeroSized())
29119     return false;
29120 
29121   // BaseIndexOffset assumes that offsets are fixed-size, which
29122   // is not valid for scalable vectors where the offsets are
29123   // scaled by `vscale`, so bail out early.
29124   if (St->getMemoryVT().isScalableVT())
29125     return false;
29126 
29127   // Add ST's interval.
29128   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
29129                    std::monostate{});
29130 
29131   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
29132     if (Chain->getMemoryVT().isScalableVector())
29133       return false;
29134 
29135     // If the chain has more than one use, then we can't reorder the mem ops.
29136     if (!SDValue(Chain, 0)->hasOneUse())
29137       break;
29138     // TODO: Relax for unordered atomics (see D66309)
29139     if (!Chain->isSimple() || Chain->isIndexed())
29140       break;
29141 
29142     // Find the base pointer and offset for this memory node.
29143     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
29144     // Check that the base pointer is the same as the original one.
29145     int64_t Offset;
29146     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
29147       break;
29148     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
29149     // Make sure we don't overlap with other intervals by checking the ones to
29150     // the left or right before inserting.
29151     auto I = Intervals.find(Offset);
29152     // If there's a next interval, we should end before it.
29153     if (I != Intervals.end() && I.start() < (Offset + Length))
29154       break;
29155     // If there's a previous interval, we should start after it.
29156     if (I != Intervals.begin() && (--I).stop() <= Offset)
29157       break;
29158     Intervals.insert(Offset, Offset + Length, std::monostate{});
29159 
29160     ChainedStores.push_back(Chain);
29161     STChain = Chain;
29162   }
29163 
29164   // If we didn't find a chained store, exit.
29165   if (ChainedStores.empty())
29166     return false;
29167 
29168   // Improve all chained stores (St and ChainedStores members) starting from
29169   // where the store chain ended and return single TokenFactor.
29170   SDValue NewChain = STChain->getChain();
29171   SmallVector<SDValue, 8> TFOps;
29172   for (unsigned I = ChainedStores.size(); I;) {
29173     StoreSDNode *S = ChainedStores[--I];
29174     SDValue BetterChain = FindBetterChain(S, NewChain);
29175     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
29176         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
29177     TFOps.push_back(SDValue(S, 0));
29178     ChainedStores[I] = S;
29179   }
29180 
29181   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
29182   SDValue BetterChain = FindBetterChain(St, NewChain);
29183   SDValue NewST;
29184   if (St->isTruncatingStore())
29185     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
29186                               St->getBasePtr(), St->getMemoryVT(),
29187                               St->getMemOperand());
29188   else
29189     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
29190                          St->getBasePtr(), St->getMemOperand());
29191 
29192   TFOps.push_back(NewST);
29193 
29194   // If we improved every element of TFOps, then we've lost the dependence on
29195   // NewChain to successors of St and we need to add it back to TFOps. Do so at
29196   // the beginning to keep relative order consistent with FindBetterChains.
29197   auto hasImprovedChain = [&](SDValue ST) -> bool {
29198     return ST->getOperand(0) != NewChain;
29199   };
29200   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
29201   if (AddNewChain)
29202     TFOps.insert(TFOps.begin(), NewChain);
29203 
29204   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
29205   CombineTo(St, TF);
29206 
29207   // Add TF and its operands to the worklist.
29208   AddToWorklist(TF.getNode());
29209   for (const SDValue &Op : TF->ops())
29210     AddToWorklist(Op.getNode());
29211   AddToWorklist(STChain);
29212   return true;
29213 }
29214 
29215 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
29216   if (OptLevel == CodeGenOptLevel::None)
29217     return false;
29218 
29219   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
29220 
29221   // We must have a base and an offset.
29222   if (!BasePtr.getBase().getNode())
29223     return false;
29224 
29225   // Do not handle stores to undef base pointers.
29226   if (BasePtr.getBase().isUndef())
29227     return false;
29228 
29229   // Directly improve a chain of disjoint stores starting at St.
29230   if (parallelizeChainedStores(St))
29231     return true;
29232 
29233   // Improve St's Chain..
29234   SDValue BetterChain = FindBetterChain(St, St->getChain());
29235   if (St->getChain() != BetterChain) {
29236     replaceStoreChain(St, BetterChain);
29237     return true;
29238   }
29239   return false;
29240 }
29241 
29242 /// This is the entry point for the file.
29243 void SelectionDAG::Combine(CombineLevel Level, BatchAAResults *BatchAA,
29244                            CodeGenOptLevel OptLevel) {
29245   /// This is the main entry point to this class.
29246   DAGCombiner(*this, BatchAA, OptLevel).Run(Level);
29247 }
29248