xref: /freebsd-src/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/MemoryLocation.h"
33 #include "llvm/Analysis/VectorUtils.h"
34 #include "llvm/CodeGen/DAGCombine.h"
35 #include "llvm/CodeGen/ISDOpcodes.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineMemOperand.h"
39 #include "llvm/CodeGen/RuntimeLibcalls.h"
40 #include "llvm/CodeGen/SelectionDAG.h"
41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42 #include "llvm/CodeGen/SelectionDAGNodes.h"
43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44 #include "llvm/CodeGen/TargetLowering.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
47 #include "llvm/CodeGen/ValueTypes.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94         cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
97 static cl::opt<std::string>
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99                    cl::desc("Only use DAG-combiner alias analysis in this"
100                             " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107                   cl::desc("Bypass the profitability model of load slicing"),
108                   cl::init(false));
109 
110 static cl::opt<bool>
111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112                     cl::desc("DAG combiner may split indexing from loads"));
113 
114 static cl::opt<bool>
115     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
116                        cl::desc("DAG combiner enable merging multiple stores "
117                                 "into a wider store"));
118 
119 static cl::opt<unsigned> TokenFactorInlineLimit(
120     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
121     cl::desc("Limit the number of operands to inline for Token Factors"));
122 
123 static cl::opt<unsigned> StoreMergeDependenceLimit(
124     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
125     cl::desc("Limit the number of times for the same StoreNode and RootNode "
126              "to bail out in store merging dependence check"));
127 
128 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
129     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
130     cl::desc("DAG cominber enable reducing the width of load/op/store "
131              "sequence"));
132 
133 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
134     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
135     cl::desc("DAG cominber enable load/<replace bytes>/store with "
136              "a narrower store"));
137 
138 namespace {
139 
140   class DAGCombiner {
141     SelectionDAG &DAG;
142     const TargetLowering &TLI;
143     const SelectionDAGTargetInfo *STI;
144     CombineLevel Level;
145     CodeGenOpt::Level OptLevel;
146     bool LegalDAG = false;
147     bool LegalOperations = false;
148     bool LegalTypes = false;
149     bool ForCodeSize;
150     bool DisableGenericCombines;
151 
152     /// Worklist of all of the nodes that need to be simplified.
153     ///
154     /// This must behave as a stack -- new nodes to process are pushed onto the
155     /// back and when processing we pop off of the back.
156     ///
157     /// The worklist will not contain duplicates but may contain null entries
158     /// due to nodes being deleted from the underlying DAG.
159     SmallVector<SDNode *, 64> Worklist;
160 
161     /// Mapping from an SDNode to its position on the worklist.
162     ///
163     /// This is used to find and remove nodes from the worklist (by nulling
164     /// them) when they are deleted from the underlying DAG. It relies on
165     /// stable indices of nodes within the worklist.
166     DenseMap<SDNode *, unsigned> WorklistMap;
167     /// This records all nodes attempted to add to the worklist since we
168     /// considered a new worklist entry. As we keep do not add duplicate nodes
169     /// in the worklist, this is different from the tail of the worklist.
170     SmallSetVector<SDNode *, 32> PruningList;
171 
172     /// Set of nodes which have been combined (at least once).
173     ///
174     /// This is used to allow us to reliably add any operands of a DAG node
175     /// which have not yet been combined to the worklist.
176     SmallPtrSet<SDNode *, 32> CombinedNodes;
177 
178     /// Map from candidate StoreNode to the pair of RootNode and count.
179     /// The count is used to track how many times we have seen the StoreNode
180     /// with the same RootNode bail out in dependence check. If we have seen
181     /// the bail out for the same pair many times over a limit, we won't
182     /// consider the StoreNode with the same RootNode as store merging
183     /// candidate again.
184     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
185 
186     // AA - Used for DAG load/store alias analysis.
187     AliasAnalysis *AA;
188 
189     /// When an instruction is simplified, add all users of the instruction to
190     /// the work lists because they might get more simplified now.
191     void AddUsersToWorklist(SDNode *N) {
192       for (SDNode *Node : N->uses())
193         AddToWorklist(Node);
194     }
195 
196     /// Convenient shorthand to add a node and all of its user to the worklist.
197     void AddToWorklistWithUsers(SDNode *N) {
198       AddUsersToWorklist(N);
199       AddToWorklist(N);
200     }
201 
202     // Prune potentially dangling nodes. This is called after
203     // any visit to a node, but should also be called during a visit after any
204     // failed combine which may have created a DAG node.
205     void clearAddedDanglingWorklistEntries() {
206       // Check any nodes added to the worklist to see if they are prunable.
207       while (!PruningList.empty()) {
208         auto *N = PruningList.pop_back_val();
209         if (N->use_empty())
210           recursivelyDeleteUnusedNodes(N);
211       }
212     }
213 
214     SDNode *getNextWorklistEntry() {
215       // Before we do any work, remove nodes that are not in use.
216       clearAddedDanglingWorklistEntries();
217       SDNode *N = nullptr;
218       // The Worklist holds the SDNodes in order, but it may contain null
219       // entries.
220       while (!N && !Worklist.empty()) {
221         N = Worklist.pop_back_val();
222       }
223 
224       if (N) {
225         bool GoodWorklistEntry = WorklistMap.erase(N);
226         (void)GoodWorklistEntry;
227         assert(GoodWorklistEntry &&
228                "Found a worklist entry without a corresponding map entry!");
229       }
230       return N;
231     }
232 
233     /// Call the node-specific routine that folds each particular type of node.
234     SDValue visit(SDNode *N);
235 
236   public:
237     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
238         : DAG(D), TLI(D.getTargetLoweringInfo()),
239           STI(D.getSubtarget().getSelectionDAGInfo()),
240           Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
241       ForCodeSize = DAG.shouldOptForSize();
242       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
243 
244       MaximumLegalStoreInBits = 0;
245       // We use the minimum store size here, since that's all we can guarantee
246       // for the scalable vector types.
247       for (MVT VT : MVT::all_valuetypes())
248         if (EVT(VT).isSimple() && VT != MVT::Other &&
249             TLI.isTypeLegal(EVT(VT)) &&
250             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
251           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
252     }
253 
254     void ConsiderForPruning(SDNode *N) {
255       // Mark this for potential pruning.
256       PruningList.insert(N);
257     }
258 
259     /// Add to the worklist making sure its instance is at the back (next to be
260     /// processed.)
261     void AddToWorklist(SDNode *N) {
262       assert(N->getOpcode() != ISD::DELETED_NODE &&
263              "Deleted Node added to Worklist");
264 
265       // Skip handle nodes as they can't usefully be combined and confuse the
266       // zero-use deletion strategy.
267       if (N->getOpcode() == ISD::HANDLENODE)
268         return;
269 
270       ConsiderForPruning(N);
271 
272       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
273         Worklist.push_back(N);
274     }
275 
276     /// Remove all instances of N from the worklist.
277     void removeFromWorklist(SDNode *N) {
278       CombinedNodes.erase(N);
279       PruningList.remove(N);
280       StoreRootCountMap.erase(N);
281 
282       auto It = WorklistMap.find(N);
283       if (It == WorklistMap.end())
284         return; // Not in the worklist.
285 
286       // Null out the entry rather than erasing it to avoid a linear operation.
287       Worklist[It->second] = nullptr;
288       WorklistMap.erase(It);
289     }
290 
291     void deleteAndRecombine(SDNode *N);
292     bool recursivelyDeleteUnusedNodes(SDNode *N);
293 
294     /// Replaces all uses of the results of one DAG node with new values.
295     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
296                       bool AddTo = true);
297 
298     /// Replaces all uses of the results of one DAG node with new values.
299     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
300       return CombineTo(N, &Res, 1, AddTo);
301     }
302 
303     /// Replaces all uses of the results of one DAG node with new values.
304     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
305                       bool AddTo = true) {
306       SDValue To[] = { Res0, Res1 };
307       return CombineTo(N, To, 2, AddTo);
308     }
309 
310     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
311 
312   private:
313     unsigned MaximumLegalStoreInBits;
314 
315     /// Check the specified integer node value to see if it can be simplified or
316     /// if things it uses can be simplified by bit propagation.
317     /// If so, return true.
318     bool SimplifyDemandedBits(SDValue Op) {
319       unsigned BitWidth = Op.getScalarValueSizeInBits();
320       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
321       return SimplifyDemandedBits(Op, DemandedBits);
322     }
323 
324     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
325       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
326       KnownBits Known;
327       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
328         return false;
329 
330       // Revisit the node.
331       AddToWorklist(Op.getNode());
332 
333       CommitTargetLoweringOpt(TLO);
334       return true;
335     }
336 
337     /// Check the specified vector node value to see if it can be simplified or
338     /// if things it uses can be simplified as it only uses some of the
339     /// elements. If so, return true.
340     bool SimplifyDemandedVectorElts(SDValue Op) {
341       // TODO: For now just pretend it cannot be simplified.
342       if (Op.getValueType().isScalableVector())
343         return false;
344 
345       unsigned NumElts = Op.getValueType().getVectorNumElements();
346       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
347       return SimplifyDemandedVectorElts(Op, DemandedElts);
348     }
349 
350     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
351                               const APInt &DemandedElts,
352                               bool AssumeSingleUse = false);
353     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
354                                     bool AssumeSingleUse = false);
355 
356     bool CombineToPreIndexedLoadStore(SDNode *N);
357     bool CombineToPostIndexedLoadStore(SDNode *N);
358     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
359     bool SliceUpLoad(SDNode *N);
360 
361     // Scalars have size 0 to distinguish from singleton vectors.
362     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
363     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
364     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
365 
366     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
367     ///   load.
368     ///
369     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
370     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
371     /// \param EltNo index of the vector element to load.
372     /// \param OriginalLoad load that EVE came from to be replaced.
373     /// \returns EVE on success SDValue() on failure.
374     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
375                                          SDValue EltNo,
376                                          LoadSDNode *OriginalLoad);
377     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
378     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
379     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
380     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
381     SDValue PromoteIntBinOp(SDValue Op);
382     SDValue PromoteIntShiftOp(SDValue Op);
383     SDValue PromoteExtend(SDValue Op);
384     bool PromoteLoad(SDValue Op);
385 
386     /// Call the node-specific routine that knows how to fold each
387     /// particular type of node. If that doesn't do anything, try the
388     /// target-specific DAG combines.
389     SDValue combine(SDNode *N);
390 
391     // Visitation implementation - Implement dag node combining for different
392     // node types.  The semantics are as follows:
393     // Return Value:
394     //   SDValue.getNode() == 0 - No change was made
395     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
396     //   otherwise              - N should be replaced by the returned Operand.
397     //
398     SDValue visitTokenFactor(SDNode *N);
399     SDValue visitMERGE_VALUES(SDNode *N);
400     SDValue visitADD(SDNode *N);
401     SDValue visitADDLike(SDNode *N);
402     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
403     SDValue visitSUB(SDNode *N);
404     SDValue visitADDSAT(SDNode *N);
405     SDValue visitSUBSAT(SDNode *N);
406     SDValue visitADDC(SDNode *N);
407     SDValue visitADDO(SDNode *N);
408     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
409     SDValue visitSUBC(SDNode *N);
410     SDValue visitSUBO(SDNode *N);
411     SDValue visitADDE(SDNode *N);
412     SDValue visitADDCARRY(SDNode *N);
413     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
414     SDValue visitSUBE(SDNode *N);
415     SDValue visitSUBCARRY(SDNode *N);
416     SDValue visitMUL(SDNode *N);
417     SDValue visitMULFIX(SDNode *N);
418     SDValue useDivRem(SDNode *N);
419     SDValue visitSDIV(SDNode *N);
420     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
421     SDValue visitUDIV(SDNode *N);
422     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
423     SDValue visitREM(SDNode *N);
424     SDValue visitMULHU(SDNode *N);
425     SDValue visitMULHS(SDNode *N);
426     SDValue visitSMUL_LOHI(SDNode *N);
427     SDValue visitUMUL_LOHI(SDNode *N);
428     SDValue visitMULO(SDNode *N);
429     SDValue visitIMINMAX(SDNode *N);
430     SDValue visitAND(SDNode *N);
431     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
432     SDValue visitOR(SDNode *N);
433     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
434     SDValue visitXOR(SDNode *N);
435     SDValue SimplifyVBinOp(SDNode *N);
436     SDValue visitSHL(SDNode *N);
437     SDValue visitSRA(SDNode *N);
438     SDValue visitSRL(SDNode *N);
439     SDValue visitFunnelShift(SDNode *N);
440     SDValue visitRotate(SDNode *N);
441     SDValue visitABS(SDNode *N);
442     SDValue visitBSWAP(SDNode *N);
443     SDValue visitBITREVERSE(SDNode *N);
444     SDValue visitCTLZ(SDNode *N);
445     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
446     SDValue visitCTTZ(SDNode *N);
447     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
448     SDValue visitCTPOP(SDNode *N);
449     SDValue visitSELECT(SDNode *N);
450     SDValue visitVSELECT(SDNode *N);
451     SDValue visitSELECT_CC(SDNode *N);
452     SDValue visitSETCC(SDNode *N);
453     SDValue visitSETCCCARRY(SDNode *N);
454     SDValue visitSIGN_EXTEND(SDNode *N);
455     SDValue visitZERO_EXTEND(SDNode *N);
456     SDValue visitANY_EXTEND(SDNode *N);
457     SDValue visitAssertExt(SDNode *N);
458     SDValue visitAssertAlign(SDNode *N);
459     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
460     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
461     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
462     SDValue visitTRUNCATE(SDNode *N);
463     SDValue visitBITCAST(SDNode *N);
464     SDValue visitFREEZE(SDNode *N);
465     SDValue visitBUILD_PAIR(SDNode *N);
466     SDValue visitFADD(SDNode *N);
467     SDValue visitFSUB(SDNode *N);
468     SDValue visitFMUL(SDNode *N);
469     SDValue visitFMA(SDNode *N);
470     SDValue visitFDIV(SDNode *N);
471     SDValue visitFREM(SDNode *N);
472     SDValue visitFSQRT(SDNode *N);
473     SDValue visitFCOPYSIGN(SDNode *N);
474     SDValue visitFPOW(SDNode *N);
475     SDValue visitSINT_TO_FP(SDNode *N);
476     SDValue visitUINT_TO_FP(SDNode *N);
477     SDValue visitFP_TO_SINT(SDNode *N);
478     SDValue visitFP_TO_UINT(SDNode *N);
479     SDValue visitFP_ROUND(SDNode *N);
480     SDValue visitFP_EXTEND(SDNode *N);
481     SDValue visitFNEG(SDNode *N);
482     SDValue visitFABS(SDNode *N);
483     SDValue visitFCEIL(SDNode *N);
484     SDValue visitFTRUNC(SDNode *N);
485     SDValue visitFFLOOR(SDNode *N);
486     SDValue visitFMINNUM(SDNode *N);
487     SDValue visitFMAXNUM(SDNode *N);
488     SDValue visitFMINIMUM(SDNode *N);
489     SDValue visitFMAXIMUM(SDNode *N);
490     SDValue visitBRCOND(SDNode *N);
491     SDValue visitBR_CC(SDNode *N);
492     SDValue visitLOAD(SDNode *N);
493 
494     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
495     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
496 
497     SDValue visitSTORE(SDNode *N);
498     SDValue visitLIFETIME_END(SDNode *N);
499     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
500     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
501     SDValue visitBUILD_VECTOR(SDNode *N);
502     SDValue visitCONCAT_VECTORS(SDNode *N);
503     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
504     SDValue visitVECTOR_SHUFFLE(SDNode *N);
505     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
506     SDValue visitINSERT_SUBVECTOR(SDNode *N);
507     SDValue visitMLOAD(SDNode *N);
508     SDValue visitMSTORE(SDNode *N);
509     SDValue visitMGATHER(SDNode *N);
510     SDValue visitMSCATTER(SDNode *N);
511     SDValue visitFP_TO_FP16(SDNode *N);
512     SDValue visitFP16_TO_FP(SDNode *N);
513     SDValue visitVECREDUCE(SDNode *N);
514 
515     SDValue visitFADDForFMACombine(SDNode *N);
516     SDValue visitFSUBForFMACombine(SDNode *N);
517     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
518 
519     SDValue XformToShuffleWithZero(SDNode *N);
520     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
521                                                     const SDLoc &DL, SDValue N0,
522                                                     SDValue N1);
523     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
524                                       SDValue N1);
525     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
526                            SDValue N1, SDNodeFlags Flags);
527 
528     SDValue visitShiftByConstant(SDNode *N);
529 
530     SDValue foldSelectOfConstants(SDNode *N);
531     SDValue foldVSelectOfConstants(SDNode *N);
532     SDValue foldBinOpIntoSelect(SDNode *BO);
533     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
534     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
535     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
536     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
537                              SDValue N2, SDValue N3, ISD::CondCode CC,
538                              bool NotExtCompare = false);
539     SDValue convertSelectOfFPConstantsToLoadOffset(
540         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
541         ISD::CondCode CC);
542     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
543                                    SDValue N2, SDValue N3, ISD::CondCode CC);
544     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
545                               const SDLoc &DL);
546     SDValue unfoldMaskedMerge(SDNode *N);
547     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
548     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
549                           const SDLoc &DL, bool foldBooleans);
550     SDValue rebuildSetCC(SDValue N);
551 
552     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
553                            SDValue &CC, bool MatchStrict = false) const;
554     bool isOneUseSetCC(SDValue N) const;
555 
556     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
557                                          unsigned HiOp);
558     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
559     SDValue CombineExtLoad(SDNode *N);
560     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
561     SDValue combineRepeatedFPDivisors(SDNode *N);
562     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
563     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
564     SDValue BuildSDIV(SDNode *N);
565     SDValue BuildSDIVPow2(SDNode *N);
566     SDValue BuildUDIV(SDNode *N);
567     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
568     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
569     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
570     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
571     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
572     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
573                                 SDNodeFlags Flags, bool Reciprocal);
574     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
575                                 SDNodeFlags Flags, bool Reciprocal);
576     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
577                                bool DemandHighBits = true);
578     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
579     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
580                               SDValue InnerPos, SDValue InnerNeg,
581                               unsigned PosOpcode, unsigned NegOpcode,
582                               const SDLoc &DL);
583     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
584                               SDValue InnerPos, SDValue InnerNeg,
585                               unsigned PosOpcode, unsigned NegOpcode,
586                               const SDLoc &DL);
587     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
588     SDValue MatchLoadCombine(SDNode *N);
589     SDValue MatchStoreCombine(StoreSDNode *N);
590     SDValue ReduceLoadWidth(SDNode *N);
591     SDValue ReduceLoadOpStoreWidth(SDNode *N);
592     SDValue splitMergedValStore(StoreSDNode *ST);
593     SDValue TransformFPLoadStorePair(SDNode *N);
594     SDValue convertBuildVecZextToZext(SDNode *N);
595     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
596     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
597     SDValue reduceBuildVecToShuffle(SDNode *N);
598     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
599                                   ArrayRef<int> VectorMask, SDValue VecIn1,
600                                   SDValue VecIn2, unsigned LeftIdx,
601                                   bool DidSplitVec);
602     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
603 
604     /// Walk up chain skipping non-aliasing memory nodes,
605     /// looking for aliasing nodes and adding them to the Aliases vector.
606     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
607                           SmallVectorImpl<SDValue> &Aliases);
608 
609     /// Return true if there is any possibility that the two addresses overlap.
610     bool isAlias(SDNode *Op0, SDNode *Op1) const;
611 
612     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
613     /// chain (aliasing node.)
614     SDValue FindBetterChain(SDNode *N, SDValue Chain);
615 
616     /// Try to replace a store and any possibly adjacent stores on
617     /// consecutive chains with better chains. Return true only if St is
618     /// replaced.
619     ///
620     /// Notice that other chains may still be replaced even if the function
621     /// returns false.
622     bool findBetterNeighborChains(StoreSDNode *St);
623 
624     // Helper for findBetterNeighborChains. Walk up store chain add additional
625     // chained stores that do not overlap and can be parallelized.
626     bool parallelizeChainedStores(StoreSDNode *St);
627 
628     /// Holds a pointer to an LSBaseSDNode as well as information on where it
629     /// is located in a sequence of memory operations connected by a chain.
630     struct MemOpLink {
631       // Ptr to the mem node.
632       LSBaseSDNode *MemNode;
633 
634       // Offset from the base ptr.
635       int64_t OffsetFromBase;
636 
637       MemOpLink(LSBaseSDNode *N, int64_t Offset)
638           : MemNode(N), OffsetFromBase(Offset) {}
639     };
640 
641     // Classify the origin of a stored value.
642     enum class StoreSource { Unknown, Constant, Extract, Load };
643     StoreSource getStoreSource(SDValue StoreVal) {
644       if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal))
645         return StoreSource::Constant;
646       if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
647           StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR)
648         return StoreSource::Extract;
649       if (isa<LoadSDNode>(StoreVal))
650         return StoreSource::Load;
651       return StoreSource::Unknown;
652     }
653 
654     /// This is a helper function for visitMUL to check the profitability
655     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
656     /// MulNode is the original multiply, AddNode is (add x, c1),
657     /// and ConstNode is c2.
658     bool isMulAddWithConstProfitable(SDNode *MulNode,
659                                      SDValue &AddNode,
660                                      SDValue &ConstNode);
661 
662     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
663     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
664     /// the type of the loaded value to be extended.
665     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
666                           EVT LoadResultTy, EVT &ExtVT);
667 
668     /// Helper function to calculate whether the given Load/Store can have its
669     /// width reduced to ExtVT.
670     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
671                            EVT &MemVT, unsigned ShAmt = 0);
672 
673     /// Used by BackwardsPropagateMask to find suitable loads.
674     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
675                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
676                            ConstantSDNode *Mask, SDNode *&NodeToMask);
677     /// Attempt to propagate a given AND node back to load leaves so that they
678     /// can be combined into narrow loads.
679     bool BackwardsPropagateMask(SDNode *N);
680 
681     /// Helper function for mergeConsecutiveStores which merges the component
682     /// store chains.
683     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
684                                 unsigned NumStores);
685 
686     /// This is a helper function for mergeConsecutiveStores. When the source
687     /// elements of the consecutive stores are all constants or all extracted
688     /// vector elements, try to merge them into one larger store introducing
689     /// bitcasts if necessary.  \return True if a merged store was created.
690     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
691                                          EVT MemVT, unsigned NumStores,
692                                          bool IsConstantSrc, bool UseVector,
693                                          bool UseTrunc);
694 
695     /// This is a helper function for mergeConsecutiveStores. Stores that
696     /// potentially may be merged with St are placed in StoreNodes. RootNode is
697     /// a chain predecessor to all store candidates.
698     void getStoreMergeCandidates(StoreSDNode *St,
699                                  SmallVectorImpl<MemOpLink> &StoreNodes,
700                                  SDNode *&Root);
701 
702     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
703     /// have indirect dependency through their operands. RootNode is the
704     /// predecessor to all stores calculated by getStoreMergeCandidates and is
705     /// used to prune the dependency check. \return True if safe to merge.
706     bool checkMergeStoreCandidatesForDependencies(
707         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
708         SDNode *RootNode);
709 
710     /// This is a helper function for mergeConsecutiveStores. Given a list of
711     /// store candidates, find the first N that are consecutive in memory.
712     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
713     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
714                                   int64_t ElementSizeBytes) const;
715 
716     /// This is a helper function for mergeConsecutiveStores. It is used for
717     /// store chains that are composed entirely of constant values.
718     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
719                                   unsigned NumConsecutiveStores,
720                                   EVT MemVT, SDNode *Root, bool AllowVectors);
721 
722     /// This is a helper function for mergeConsecutiveStores. It is used for
723     /// store chains that are composed entirely of extracted vector elements.
724     /// When extracting multiple vector elements, try to store them in one
725     /// vector store rather than a sequence of scalar stores.
726     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
727                                  unsigned NumConsecutiveStores, EVT MemVT,
728                                  SDNode *Root);
729 
730     /// This is a helper function for mergeConsecutiveStores. It is used for
731     /// store chains that are composed entirely of loaded values.
732     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
733                               unsigned NumConsecutiveStores, EVT MemVT,
734                               SDNode *Root, bool AllowVectors,
735                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
736 
737     /// Merge consecutive store operations into a wide store.
738     /// This optimization uses wide integers or vectors when possible.
739     /// \return true if stores were merged.
740     bool mergeConsecutiveStores(StoreSDNode *St);
741 
742     /// Try to transform a truncation where C is a constant:
743     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
744     ///
745     /// \p N needs to be a truncation and its first operand an AND. Other
746     /// requirements are checked by the function (e.g. that trunc is
747     /// single-use) and if missed an empty SDValue is returned.
748     SDValue distributeTruncateThroughAnd(SDNode *N);
749 
750     /// Helper function to determine whether the target supports operation
751     /// given by \p Opcode for type \p VT, that is, whether the operation
752     /// is legal or custom before legalizing operations, and whether is
753     /// legal (but not custom) after legalization.
754     bool hasOperation(unsigned Opcode, EVT VT) {
755       if (LegalOperations)
756         return TLI.isOperationLegal(Opcode, VT);
757       return TLI.isOperationLegalOrCustom(Opcode, VT);
758     }
759 
760   public:
761     /// Runs the dag combiner on all nodes in the work list
762     void Run(CombineLevel AtLevel);
763 
764     SelectionDAG &getDAG() const { return DAG; }
765 
766     /// Returns a type large enough to hold any valid shift amount - before type
767     /// legalization these can be huge.
768     EVT getShiftAmountTy(EVT LHSTy) {
769       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
770       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
771     }
772 
773     /// This method returns true if we are running before type legalization or
774     /// if the specified VT is legal.
775     bool isTypeLegal(const EVT &VT) {
776       if (!LegalTypes) return true;
777       return TLI.isTypeLegal(VT);
778     }
779 
780     /// Convenience wrapper around TargetLowering::getSetCCResultType
781     EVT getSetCCResultType(EVT VT) const {
782       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
783     }
784 
785     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
786                          SDValue OrigLoad, SDValue ExtLoad,
787                          ISD::NodeType ExtType);
788   };
789 
790 /// This class is a DAGUpdateListener that removes any deleted
791 /// nodes from the worklist.
792 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
793   DAGCombiner &DC;
794 
795 public:
796   explicit WorklistRemover(DAGCombiner &dc)
797     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
798 
799   void NodeDeleted(SDNode *N, SDNode *E) override {
800     DC.removeFromWorklist(N);
801   }
802 };
803 
804 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
805   DAGCombiner &DC;
806 
807 public:
808   explicit WorklistInserter(DAGCombiner &dc)
809       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
810 
811   // FIXME: Ideally we could add N to the worklist, but this causes exponential
812   //        compile time costs in large DAGs, e.g. Halide.
813   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
814 };
815 
816 } // end anonymous namespace
817 
818 //===----------------------------------------------------------------------===//
819 //  TargetLowering::DAGCombinerInfo implementation
820 //===----------------------------------------------------------------------===//
821 
822 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
823   ((DAGCombiner*)DC)->AddToWorklist(N);
824 }
825 
826 SDValue TargetLowering::DAGCombinerInfo::
827 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
828   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
829 }
830 
831 SDValue TargetLowering::DAGCombinerInfo::
832 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
833   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
834 }
835 
836 SDValue TargetLowering::DAGCombinerInfo::
837 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
838   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
839 }
840 
841 bool TargetLowering::DAGCombinerInfo::
842 recursivelyDeleteUnusedNodes(SDNode *N) {
843   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
844 }
845 
846 void TargetLowering::DAGCombinerInfo::
847 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
848   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
849 }
850 
851 //===----------------------------------------------------------------------===//
852 // Helper Functions
853 //===----------------------------------------------------------------------===//
854 
855 void DAGCombiner::deleteAndRecombine(SDNode *N) {
856   removeFromWorklist(N);
857 
858   // If the operands of this node are only used by the node, they will now be
859   // dead. Make sure to re-visit them and recursively delete dead nodes.
860   for (const SDValue &Op : N->ops())
861     // For an operand generating multiple values, one of the values may
862     // become dead allowing further simplification (e.g. split index
863     // arithmetic from an indexed load).
864     if (Op->hasOneUse() || Op->getNumValues() > 1)
865       AddToWorklist(Op.getNode());
866 
867   DAG.DeleteNode(N);
868 }
869 
870 // APInts must be the same size for most operations, this helper
871 // function zero extends the shorter of the pair so that they match.
872 // We provide an Offset so that we can create bitwidths that won't overflow.
873 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
874   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
875   LHS = LHS.zextOrSelf(Bits);
876   RHS = RHS.zextOrSelf(Bits);
877 }
878 
879 // Return true if this node is a setcc, or is a select_cc
880 // that selects between the target values used for true and false, making it
881 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
882 // the appropriate nodes based on the type of node we are checking. This
883 // simplifies life a bit for the callers.
884 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
885                                     SDValue &CC, bool MatchStrict) const {
886   if (N.getOpcode() == ISD::SETCC) {
887     LHS = N.getOperand(0);
888     RHS = N.getOperand(1);
889     CC  = N.getOperand(2);
890     return true;
891   }
892 
893   if (MatchStrict &&
894       (N.getOpcode() == ISD::STRICT_FSETCC ||
895        N.getOpcode() == ISD::STRICT_FSETCCS)) {
896     LHS = N.getOperand(1);
897     RHS = N.getOperand(2);
898     CC  = N.getOperand(3);
899     return true;
900   }
901 
902   if (N.getOpcode() != ISD::SELECT_CC ||
903       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
904       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
905     return false;
906 
907   if (TLI.getBooleanContents(N.getValueType()) ==
908       TargetLowering::UndefinedBooleanContent)
909     return false;
910 
911   LHS = N.getOperand(0);
912   RHS = N.getOperand(1);
913   CC  = N.getOperand(4);
914   return true;
915 }
916 
917 /// Return true if this is a SetCC-equivalent operation with only one use.
918 /// If this is true, it allows the users to invert the operation for free when
919 /// it is profitable to do so.
920 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
921   SDValue N0, N1, N2;
922   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
923     return true;
924   return false;
925 }
926 
927 // Returns the SDNode if it is a constant float BuildVector
928 // or constant float.
929 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
930   if (isa<ConstantFPSDNode>(N))
931     return N.getNode();
932   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
933     return N.getNode();
934   return nullptr;
935 }
936 
937 // Determines if it is a constant integer or a build vector of constant
938 // integers (and undefs).
939 // Do not permit build vector implicit truncation.
940 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
941   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
942     return !(Const->isOpaque() && NoOpaques);
943   if (N.getOpcode() != ISD::BUILD_VECTOR)
944     return false;
945   unsigned BitWidth = N.getScalarValueSizeInBits();
946   for (const SDValue &Op : N->op_values()) {
947     if (Op.isUndef())
948       continue;
949     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
950     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
951         (Const->isOpaque() && NoOpaques))
952       return false;
953   }
954   return true;
955 }
956 
957 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
958 // undef's.
959 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
960   if (V.getOpcode() != ISD::BUILD_VECTOR)
961     return false;
962   return isConstantOrConstantVector(V, NoOpaques) ||
963          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
964 }
965 
966 // Determine if this an indexed load with an opaque target constant index.
967 static bool canSplitIdx(LoadSDNode *LD) {
968   return MaySplitLoadIndex &&
969          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
970           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
971 }
972 
973 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
974                                                              const SDLoc &DL,
975                                                              SDValue N0,
976                                                              SDValue N1) {
977   // Currently this only tries to ensure we don't undo the GEP splits done by
978   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
979   // we check if the following transformation would be problematic:
980   // (load/store (add, (add, x, offset1), offset2)) ->
981   // (load/store (add, x, offset1+offset2)).
982 
983   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
984     return false;
985 
986   if (N0.hasOneUse())
987     return false;
988 
989   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
990   auto *C2 = dyn_cast<ConstantSDNode>(N1);
991   if (!C1 || !C2)
992     return false;
993 
994   const APInt &C1APIntVal = C1->getAPIntValue();
995   const APInt &C2APIntVal = C2->getAPIntValue();
996   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
997     return false;
998 
999   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1000   if (CombinedValueIntVal.getBitWidth() > 64)
1001     return false;
1002   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1003 
1004   for (SDNode *Node : N0->uses()) {
1005     auto LoadStore = dyn_cast<MemSDNode>(Node);
1006     if (LoadStore) {
1007       // Is x[offset2] already not a legal addressing mode? If so then
1008       // reassociating the constants breaks nothing (we test offset2 because
1009       // that's the one we hope to fold into the load or store).
1010       TargetLoweringBase::AddrMode AM;
1011       AM.HasBaseReg = true;
1012       AM.BaseOffs = C2APIntVal.getSExtValue();
1013       EVT VT = LoadStore->getMemoryVT();
1014       unsigned AS = LoadStore->getAddressSpace();
1015       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1016       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1017         continue;
1018 
1019       // Would x[offset1+offset2] still be a legal addressing mode?
1020       AM.BaseOffs = CombinedValue;
1021       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1022         return true;
1023     }
1024   }
1025 
1026   return false;
1027 }
1028 
1029 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1030 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1031 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1032                                                SDValue N0, SDValue N1) {
1033   EVT VT = N0.getValueType();
1034 
1035   if (N0.getOpcode() != Opc)
1036     return SDValue();
1037 
1038   if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1039     if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1040       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1041       if (SDValue OpNode =
1042               DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1043         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1044       return SDValue();
1045     }
1046     if (N0.hasOneUse()) {
1047       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1048       //              iff (op x, c1) has one use
1049       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1050       if (!OpNode.getNode())
1051         return SDValue();
1052       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1053     }
1054   }
1055   return SDValue();
1056 }
1057 
1058 // Try to reassociate commutative binops.
1059 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1060                                     SDValue N1, SDNodeFlags Flags) {
1061   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1062 
1063   // Floating-point reassociation is not allowed without loose FP math.
1064   if (N0.getValueType().isFloatingPoint() ||
1065       N1.getValueType().isFloatingPoint())
1066     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1067       return SDValue();
1068 
1069   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1070     return Combined;
1071   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1072     return Combined;
1073   return SDValue();
1074 }
1075 
1076 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1077                                bool AddTo) {
1078   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1079   ++NodesCombined;
1080   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1081              To[0].getNode()->dump(&DAG);
1082              dbgs() << " and " << NumTo - 1 << " other values\n");
1083   for (unsigned i = 0, e = NumTo; i != e; ++i)
1084     assert((!To[i].getNode() ||
1085             N->getValueType(i) == To[i].getValueType()) &&
1086            "Cannot combine value to value of different type!");
1087 
1088   WorklistRemover DeadNodes(*this);
1089   DAG.ReplaceAllUsesWith(N, To);
1090   if (AddTo) {
1091     // Push the new nodes and any users onto the worklist
1092     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1093       if (To[i].getNode()) {
1094         AddToWorklist(To[i].getNode());
1095         AddUsersToWorklist(To[i].getNode());
1096       }
1097     }
1098   }
1099 
1100   // Finally, if the node is now dead, remove it from the graph.  The node
1101   // may not be dead if the replacement process recursively simplified to
1102   // something else needing this node.
1103   if (N->use_empty())
1104     deleteAndRecombine(N);
1105   return SDValue(N, 0);
1106 }
1107 
1108 void DAGCombiner::
1109 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1110   // Replace the old value with the new one.
1111   ++NodesCombined;
1112   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1113              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1114              dbgs() << '\n');
1115 
1116   // Replace all uses.  If any nodes become isomorphic to other nodes and
1117   // are deleted, make sure to remove them from our worklist.
1118   WorklistRemover DeadNodes(*this);
1119   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1120 
1121   // Push the new node and any (possibly new) users onto the worklist.
1122   AddToWorklistWithUsers(TLO.New.getNode());
1123 
1124   // Finally, if the node is now dead, remove it from the graph.  The node
1125   // may not be dead if the replacement process recursively simplified to
1126   // something else needing this node.
1127   if (TLO.Old.getNode()->use_empty())
1128     deleteAndRecombine(TLO.Old.getNode());
1129 }
1130 
1131 /// Check the specified integer node value to see if it can be simplified or if
1132 /// things it uses can be simplified by bit propagation. If so, return true.
1133 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1134                                        const APInt &DemandedElts,
1135                                        bool AssumeSingleUse) {
1136   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1137   KnownBits Known;
1138   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1139                                 AssumeSingleUse))
1140     return false;
1141 
1142   // Revisit the node.
1143   AddToWorklist(Op.getNode());
1144 
1145   CommitTargetLoweringOpt(TLO);
1146   return true;
1147 }
1148 
1149 /// Check the specified vector node value to see if it can be simplified or
1150 /// if things it uses can be simplified as it only uses some of the elements.
1151 /// If so, return true.
1152 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1153                                              const APInt &DemandedElts,
1154                                              bool AssumeSingleUse) {
1155   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1156   APInt KnownUndef, KnownZero;
1157   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1158                                       TLO, 0, AssumeSingleUse))
1159     return false;
1160 
1161   // Revisit the node.
1162   AddToWorklist(Op.getNode());
1163 
1164   CommitTargetLoweringOpt(TLO);
1165   return true;
1166 }
1167 
1168 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1169   SDLoc DL(Load);
1170   EVT VT = Load->getValueType(0);
1171   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1172 
1173   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1174              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1175   WorklistRemover DeadNodes(*this);
1176   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1177   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1178   deleteAndRecombine(Load);
1179   AddToWorklist(Trunc.getNode());
1180 }
1181 
1182 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1183   Replace = false;
1184   SDLoc DL(Op);
1185   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1186     LoadSDNode *LD = cast<LoadSDNode>(Op);
1187     EVT MemVT = LD->getMemoryVT();
1188     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1189                                                       : LD->getExtensionType();
1190     Replace = true;
1191     return DAG.getExtLoad(ExtType, DL, PVT,
1192                           LD->getChain(), LD->getBasePtr(),
1193                           MemVT, LD->getMemOperand());
1194   }
1195 
1196   unsigned Opc = Op.getOpcode();
1197   switch (Opc) {
1198   default: break;
1199   case ISD::AssertSext:
1200     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1201       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1202     break;
1203   case ISD::AssertZext:
1204     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1205       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1206     break;
1207   case ISD::Constant: {
1208     unsigned ExtOpc =
1209       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1210     return DAG.getNode(ExtOpc, DL, PVT, Op);
1211   }
1212   }
1213 
1214   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1215     return SDValue();
1216   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1217 }
1218 
1219 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1220   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1221     return SDValue();
1222   EVT OldVT = Op.getValueType();
1223   SDLoc DL(Op);
1224   bool Replace = false;
1225   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1226   if (!NewOp.getNode())
1227     return SDValue();
1228   AddToWorklist(NewOp.getNode());
1229 
1230   if (Replace)
1231     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1232   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1233                      DAG.getValueType(OldVT));
1234 }
1235 
1236 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1237   EVT OldVT = Op.getValueType();
1238   SDLoc DL(Op);
1239   bool Replace = false;
1240   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1241   if (!NewOp.getNode())
1242     return SDValue();
1243   AddToWorklist(NewOp.getNode());
1244 
1245   if (Replace)
1246     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1247   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1248 }
1249 
1250 /// Promote the specified integer binary operation if the target indicates it is
1251 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1252 /// i32 since i16 instructions are longer.
1253 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1254   if (!LegalOperations)
1255     return SDValue();
1256 
1257   EVT VT = Op.getValueType();
1258   if (VT.isVector() || !VT.isInteger())
1259     return SDValue();
1260 
1261   // If operation type is 'undesirable', e.g. i16 on x86, consider
1262   // promoting it.
1263   unsigned Opc = Op.getOpcode();
1264   if (TLI.isTypeDesirableForOp(Opc, VT))
1265     return SDValue();
1266 
1267   EVT PVT = VT;
1268   // Consult target whether it is a good idea to promote this operation and
1269   // what's the right type to promote it to.
1270   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1271     assert(PVT != VT && "Don't know what type to promote to!");
1272 
1273     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1274 
1275     bool Replace0 = false;
1276     SDValue N0 = Op.getOperand(0);
1277     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1278 
1279     bool Replace1 = false;
1280     SDValue N1 = Op.getOperand(1);
1281     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1282     SDLoc DL(Op);
1283 
1284     SDValue RV =
1285         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1286 
1287     // We are always replacing N0/N1's use in N and only need additional
1288     // replacements if there are additional uses.
1289     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1290     //       (SDValue) here because the node may reference multiple values
1291     //       (for example, the chain value of a load node).
1292     Replace0 &= !N0->hasOneUse();
1293     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1294 
1295     // Combine Op here so it is preserved past replacements.
1296     CombineTo(Op.getNode(), RV);
1297 
1298     // If operands have a use ordering, make sure we deal with
1299     // predecessor first.
1300     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1301       std::swap(N0, N1);
1302       std::swap(NN0, NN1);
1303     }
1304 
1305     if (Replace0) {
1306       AddToWorklist(NN0.getNode());
1307       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1308     }
1309     if (Replace1) {
1310       AddToWorklist(NN1.getNode());
1311       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1312     }
1313     return Op;
1314   }
1315   return SDValue();
1316 }
1317 
1318 /// Promote the specified integer shift operation if the target indicates it is
1319 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1320 /// i32 since i16 instructions are longer.
1321 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1322   if (!LegalOperations)
1323     return SDValue();
1324 
1325   EVT VT = Op.getValueType();
1326   if (VT.isVector() || !VT.isInteger())
1327     return SDValue();
1328 
1329   // If operation type is 'undesirable', e.g. i16 on x86, consider
1330   // promoting it.
1331   unsigned Opc = Op.getOpcode();
1332   if (TLI.isTypeDesirableForOp(Opc, VT))
1333     return SDValue();
1334 
1335   EVT PVT = VT;
1336   // Consult target whether it is a good idea to promote this operation and
1337   // what's the right type to promote it to.
1338   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1339     assert(PVT != VT && "Don't know what type to promote to!");
1340 
1341     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1342 
1343     bool Replace = false;
1344     SDValue N0 = Op.getOperand(0);
1345     SDValue N1 = Op.getOperand(1);
1346     if (Opc == ISD::SRA)
1347       N0 = SExtPromoteOperand(N0, PVT);
1348     else if (Opc == ISD::SRL)
1349       N0 = ZExtPromoteOperand(N0, PVT);
1350     else
1351       N0 = PromoteOperand(N0, PVT, Replace);
1352 
1353     if (!N0.getNode())
1354       return SDValue();
1355 
1356     SDLoc DL(Op);
1357     SDValue RV =
1358         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1359 
1360     if (Replace)
1361       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1362 
1363     // Deal with Op being deleted.
1364     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1365       return RV;
1366   }
1367   return SDValue();
1368 }
1369 
1370 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1371   if (!LegalOperations)
1372     return SDValue();
1373 
1374   EVT VT = Op.getValueType();
1375   if (VT.isVector() || !VT.isInteger())
1376     return SDValue();
1377 
1378   // If operation type is 'undesirable', e.g. i16 on x86, consider
1379   // promoting it.
1380   unsigned Opc = Op.getOpcode();
1381   if (TLI.isTypeDesirableForOp(Opc, VT))
1382     return SDValue();
1383 
1384   EVT PVT = VT;
1385   // Consult target whether it is a good idea to promote this operation and
1386   // what's the right type to promote it to.
1387   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1388     assert(PVT != VT && "Don't know what type to promote to!");
1389     // fold (aext (aext x)) -> (aext x)
1390     // fold (aext (zext x)) -> (zext x)
1391     // fold (aext (sext x)) -> (sext x)
1392     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1393     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1394   }
1395   return SDValue();
1396 }
1397 
1398 bool DAGCombiner::PromoteLoad(SDValue Op) {
1399   if (!LegalOperations)
1400     return false;
1401 
1402   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1403     return false;
1404 
1405   EVT VT = Op.getValueType();
1406   if (VT.isVector() || !VT.isInteger())
1407     return false;
1408 
1409   // If operation type is 'undesirable', e.g. i16 on x86, consider
1410   // promoting it.
1411   unsigned Opc = Op.getOpcode();
1412   if (TLI.isTypeDesirableForOp(Opc, VT))
1413     return false;
1414 
1415   EVT PVT = VT;
1416   // Consult target whether it is a good idea to promote this operation and
1417   // what's the right type to promote it to.
1418   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1419     assert(PVT != VT && "Don't know what type to promote to!");
1420 
1421     SDLoc DL(Op);
1422     SDNode *N = Op.getNode();
1423     LoadSDNode *LD = cast<LoadSDNode>(N);
1424     EVT MemVT = LD->getMemoryVT();
1425     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1426                                                       : LD->getExtensionType();
1427     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1428                                    LD->getChain(), LD->getBasePtr(),
1429                                    MemVT, LD->getMemOperand());
1430     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1431 
1432     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1433                Result.getNode()->dump(&DAG); dbgs() << '\n');
1434     WorklistRemover DeadNodes(*this);
1435     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1436     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1437     deleteAndRecombine(N);
1438     AddToWorklist(Result.getNode());
1439     return true;
1440   }
1441   return false;
1442 }
1443 
1444 /// Recursively delete a node which has no uses and any operands for
1445 /// which it is the only use.
1446 ///
1447 /// Note that this both deletes the nodes and removes them from the worklist.
1448 /// It also adds any nodes who have had a user deleted to the worklist as they
1449 /// may now have only one use and subject to other combines.
1450 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1451   if (!N->use_empty())
1452     return false;
1453 
1454   SmallSetVector<SDNode *, 16> Nodes;
1455   Nodes.insert(N);
1456   do {
1457     N = Nodes.pop_back_val();
1458     if (!N)
1459       continue;
1460 
1461     if (N->use_empty()) {
1462       for (const SDValue &ChildN : N->op_values())
1463         Nodes.insert(ChildN.getNode());
1464 
1465       removeFromWorklist(N);
1466       DAG.DeleteNode(N);
1467     } else {
1468       AddToWorklist(N);
1469     }
1470   } while (!Nodes.empty());
1471   return true;
1472 }
1473 
1474 //===----------------------------------------------------------------------===//
1475 //  Main DAG Combiner implementation
1476 //===----------------------------------------------------------------------===//
1477 
1478 void DAGCombiner::Run(CombineLevel AtLevel) {
1479   // set the instance variables, so that the various visit routines may use it.
1480   Level = AtLevel;
1481   LegalDAG = Level >= AfterLegalizeDAG;
1482   LegalOperations = Level >= AfterLegalizeVectorOps;
1483   LegalTypes = Level >= AfterLegalizeTypes;
1484 
1485   WorklistInserter AddNodes(*this);
1486 
1487   // Add all the dag nodes to the worklist.
1488   for (SDNode &Node : DAG.allnodes())
1489     AddToWorklist(&Node);
1490 
1491   // Create a dummy node (which is not added to allnodes), that adds a reference
1492   // to the root node, preventing it from being deleted, and tracking any
1493   // changes of the root.
1494   HandleSDNode Dummy(DAG.getRoot());
1495 
1496   // While we have a valid worklist entry node, try to combine it.
1497   while (SDNode *N = getNextWorklistEntry()) {
1498     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1499     // N is deleted from the DAG, since they too may now be dead or may have a
1500     // reduced number of uses, allowing other xforms.
1501     if (recursivelyDeleteUnusedNodes(N))
1502       continue;
1503 
1504     WorklistRemover DeadNodes(*this);
1505 
1506     // If this combine is running after legalizing the DAG, re-legalize any
1507     // nodes pulled off the worklist.
1508     if (LegalDAG) {
1509       SmallSetVector<SDNode *, 16> UpdatedNodes;
1510       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1511 
1512       for (SDNode *LN : UpdatedNodes)
1513         AddToWorklistWithUsers(LN);
1514 
1515       if (!NIsValid)
1516         continue;
1517     }
1518 
1519     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1520 
1521     // Add any operands of the new node which have not yet been combined to the
1522     // worklist as well. Because the worklist uniques things already, this
1523     // won't repeatedly process the same operand.
1524     CombinedNodes.insert(N);
1525     for (const SDValue &ChildN : N->op_values())
1526       if (!CombinedNodes.count(ChildN.getNode()))
1527         AddToWorklist(ChildN.getNode());
1528 
1529     SDValue RV = combine(N);
1530 
1531     if (!RV.getNode())
1532       continue;
1533 
1534     ++NodesCombined;
1535 
1536     // If we get back the same node we passed in, rather than a new node or
1537     // zero, we know that the node must have defined multiple values and
1538     // CombineTo was used.  Since CombineTo takes care of the worklist
1539     // mechanics for us, we have no work to do in this case.
1540     if (RV.getNode() == N)
1541       continue;
1542 
1543     assert(N->getOpcode() != ISD::DELETED_NODE &&
1544            RV.getOpcode() != ISD::DELETED_NODE &&
1545            "Node was deleted but visit returned new node!");
1546 
1547     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1548 
1549     if (N->getNumValues() == RV.getNode()->getNumValues())
1550       DAG.ReplaceAllUsesWith(N, RV.getNode());
1551     else {
1552       assert(N->getValueType(0) == RV.getValueType() &&
1553              N->getNumValues() == 1 && "Type mismatch");
1554       DAG.ReplaceAllUsesWith(N, &RV);
1555     }
1556 
1557     // Push the new node and any users onto the worklist
1558     AddToWorklist(RV.getNode());
1559     AddUsersToWorklist(RV.getNode());
1560 
1561     // Finally, if the node is now dead, remove it from the graph.  The node
1562     // may not be dead if the replacement process recursively simplified to
1563     // something else needing this node. This will also take care of adding any
1564     // operands which have lost a user to the worklist.
1565     recursivelyDeleteUnusedNodes(N);
1566   }
1567 
1568   // If the root changed (e.g. it was a dead load, update the root).
1569   DAG.setRoot(Dummy.getValue());
1570   DAG.RemoveDeadNodes();
1571 }
1572 
1573 SDValue DAGCombiner::visit(SDNode *N) {
1574   switch (N->getOpcode()) {
1575   default: break;
1576   case ISD::TokenFactor:        return visitTokenFactor(N);
1577   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1578   case ISD::ADD:                return visitADD(N);
1579   case ISD::SUB:                return visitSUB(N);
1580   case ISD::SADDSAT:
1581   case ISD::UADDSAT:            return visitADDSAT(N);
1582   case ISD::SSUBSAT:
1583   case ISD::USUBSAT:            return visitSUBSAT(N);
1584   case ISD::ADDC:               return visitADDC(N);
1585   case ISD::SADDO:
1586   case ISD::UADDO:              return visitADDO(N);
1587   case ISD::SUBC:               return visitSUBC(N);
1588   case ISD::SSUBO:
1589   case ISD::USUBO:              return visitSUBO(N);
1590   case ISD::ADDE:               return visitADDE(N);
1591   case ISD::ADDCARRY:           return visitADDCARRY(N);
1592   case ISD::SUBE:               return visitSUBE(N);
1593   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1594   case ISD::SMULFIX:
1595   case ISD::SMULFIXSAT:
1596   case ISD::UMULFIX:
1597   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1598   case ISD::MUL:                return visitMUL(N);
1599   case ISD::SDIV:               return visitSDIV(N);
1600   case ISD::UDIV:               return visitUDIV(N);
1601   case ISD::SREM:
1602   case ISD::UREM:               return visitREM(N);
1603   case ISD::MULHU:              return visitMULHU(N);
1604   case ISD::MULHS:              return visitMULHS(N);
1605   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1606   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1607   case ISD::SMULO:
1608   case ISD::UMULO:              return visitMULO(N);
1609   case ISD::SMIN:
1610   case ISD::SMAX:
1611   case ISD::UMIN:
1612   case ISD::UMAX:               return visitIMINMAX(N);
1613   case ISD::AND:                return visitAND(N);
1614   case ISD::OR:                 return visitOR(N);
1615   case ISD::XOR:                return visitXOR(N);
1616   case ISD::SHL:                return visitSHL(N);
1617   case ISD::SRA:                return visitSRA(N);
1618   case ISD::SRL:                return visitSRL(N);
1619   case ISD::ROTR:
1620   case ISD::ROTL:               return visitRotate(N);
1621   case ISD::FSHL:
1622   case ISD::FSHR:               return visitFunnelShift(N);
1623   case ISD::ABS:                return visitABS(N);
1624   case ISD::BSWAP:              return visitBSWAP(N);
1625   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1626   case ISD::CTLZ:               return visitCTLZ(N);
1627   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1628   case ISD::CTTZ:               return visitCTTZ(N);
1629   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1630   case ISD::CTPOP:              return visitCTPOP(N);
1631   case ISD::SELECT:             return visitSELECT(N);
1632   case ISD::VSELECT:            return visitVSELECT(N);
1633   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1634   case ISD::SETCC:              return visitSETCC(N);
1635   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1636   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1637   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1638   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1639   case ISD::AssertSext:
1640   case ISD::AssertZext:         return visitAssertExt(N);
1641   case ISD::AssertAlign:        return visitAssertAlign(N);
1642   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1643   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1644   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1645   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1646   case ISD::BITCAST:            return visitBITCAST(N);
1647   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1648   case ISD::FADD:               return visitFADD(N);
1649   case ISD::FSUB:               return visitFSUB(N);
1650   case ISD::FMUL:               return visitFMUL(N);
1651   case ISD::FMA:                return visitFMA(N);
1652   case ISD::FDIV:               return visitFDIV(N);
1653   case ISD::FREM:               return visitFREM(N);
1654   case ISD::FSQRT:              return visitFSQRT(N);
1655   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1656   case ISD::FPOW:               return visitFPOW(N);
1657   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1658   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1659   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1660   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1661   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1662   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1663   case ISD::FNEG:               return visitFNEG(N);
1664   case ISD::FABS:               return visitFABS(N);
1665   case ISD::FFLOOR:             return visitFFLOOR(N);
1666   case ISD::FMINNUM:            return visitFMINNUM(N);
1667   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1668   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1669   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1670   case ISD::FCEIL:              return visitFCEIL(N);
1671   case ISD::FTRUNC:             return visitFTRUNC(N);
1672   case ISD::BRCOND:             return visitBRCOND(N);
1673   case ISD::BR_CC:              return visitBR_CC(N);
1674   case ISD::LOAD:               return visitLOAD(N);
1675   case ISD::STORE:              return visitSTORE(N);
1676   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1677   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1678   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1679   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1680   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1681   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1682   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1683   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1684   case ISD::MGATHER:            return visitMGATHER(N);
1685   case ISD::MLOAD:              return visitMLOAD(N);
1686   case ISD::MSCATTER:           return visitMSCATTER(N);
1687   case ISD::MSTORE:             return visitMSTORE(N);
1688   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1689   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1690   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1691   case ISD::FREEZE:             return visitFREEZE(N);
1692   case ISD::VECREDUCE_FADD:
1693   case ISD::VECREDUCE_FMUL:
1694   case ISD::VECREDUCE_ADD:
1695   case ISD::VECREDUCE_MUL:
1696   case ISD::VECREDUCE_AND:
1697   case ISD::VECREDUCE_OR:
1698   case ISD::VECREDUCE_XOR:
1699   case ISD::VECREDUCE_SMAX:
1700   case ISD::VECREDUCE_SMIN:
1701   case ISD::VECREDUCE_UMAX:
1702   case ISD::VECREDUCE_UMIN:
1703   case ISD::VECREDUCE_FMAX:
1704   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1705   }
1706   return SDValue();
1707 }
1708 
1709 SDValue DAGCombiner::combine(SDNode *N) {
1710   SDValue RV;
1711   if (!DisableGenericCombines)
1712     RV = visit(N);
1713 
1714   // If nothing happened, try a target-specific DAG combine.
1715   if (!RV.getNode()) {
1716     assert(N->getOpcode() != ISD::DELETED_NODE &&
1717            "Node was deleted but visit returned NULL!");
1718 
1719     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1720         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1721 
1722       // Expose the DAG combiner to the target combiner impls.
1723       TargetLowering::DAGCombinerInfo
1724         DagCombineInfo(DAG, Level, false, this);
1725 
1726       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1727     }
1728   }
1729 
1730   // If nothing happened still, try promoting the operation.
1731   if (!RV.getNode()) {
1732     switch (N->getOpcode()) {
1733     default: break;
1734     case ISD::ADD:
1735     case ISD::SUB:
1736     case ISD::MUL:
1737     case ISD::AND:
1738     case ISD::OR:
1739     case ISD::XOR:
1740       RV = PromoteIntBinOp(SDValue(N, 0));
1741       break;
1742     case ISD::SHL:
1743     case ISD::SRA:
1744     case ISD::SRL:
1745       RV = PromoteIntShiftOp(SDValue(N, 0));
1746       break;
1747     case ISD::SIGN_EXTEND:
1748     case ISD::ZERO_EXTEND:
1749     case ISD::ANY_EXTEND:
1750       RV = PromoteExtend(SDValue(N, 0));
1751       break;
1752     case ISD::LOAD:
1753       if (PromoteLoad(SDValue(N, 0)))
1754         RV = SDValue(N, 0);
1755       break;
1756     }
1757   }
1758 
1759   // If N is a commutative binary node, try to eliminate it if the commuted
1760   // version is already present in the DAG.
1761   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1762       N->getNumValues() == 1) {
1763     SDValue N0 = N->getOperand(0);
1764     SDValue N1 = N->getOperand(1);
1765 
1766     // Constant operands are canonicalized to RHS.
1767     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1768       SDValue Ops[] = {N1, N0};
1769       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1770                                             N->getFlags());
1771       if (CSENode)
1772         return SDValue(CSENode, 0);
1773     }
1774   }
1775 
1776   return RV;
1777 }
1778 
1779 /// Given a node, return its input chain if it has one, otherwise return a null
1780 /// sd operand.
1781 static SDValue getInputChainForNode(SDNode *N) {
1782   if (unsigned NumOps = N->getNumOperands()) {
1783     if (N->getOperand(0).getValueType() == MVT::Other)
1784       return N->getOperand(0);
1785     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1786       return N->getOperand(NumOps-1);
1787     for (unsigned i = 1; i < NumOps-1; ++i)
1788       if (N->getOperand(i).getValueType() == MVT::Other)
1789         return N->getOperand(i);
1790   }
1791   return SDValue();
1792 }
1793 
1794 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1795   // If N has two operands, where one has an input chain equal to the other,
1796   // the 'other' chain is redundant.
1797   if (N->getNumOperands() == 2) {
1798     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1799       return N->getOperand(0);
1800     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1801       return N->getOperand(1);
1802   }
1803 
1804   // Don't simplify token factors if optnone.
1805   if (OptLevel == CodeGenOpt::None)
1806     return SDValue();
1807 
1808   // If the sole user is a token factor, we should make sure we have a
1809   // chance to merge them together. This prevents TF chains from inhibiting
1810   // optimizations.
1811   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1812     AddToWorklist(*(N->use_begin()));
1813 
1814   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1815   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1816   SmallPtrSet<SDNode*, 16> SeenOps;
1817   bool Changed = false;             // If we should replace this token factor.
1818 
1819   // Start out with this token factor.
1820   TFs.push_back(N);
1821 
1822   // Iterate through token factors.  The TFs grows when new token factors are
1823   // encountered.
1824   for (unsigned i = 0; i < TFs.size(); ++i) {
1825     // Limit number of nodes to inline, to avoid quadratic compile times.
1826     // We have to add the outstanding Token Factors to Ops, otherwise we might
1827     // drop Ops from the resulting Token Factors.
1828     if (Ops.size() > TokenFactorInlineLimit) {
1829       for (unsigned j = i; j < TFs.size(); j++)
1830         Ops.emplace_back(TFs[j], 0);
1831       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1832       // combiner worklist later.
1833       TFs.resize(i);
1834       break;
1835     }
1836 
1837     SDNode *TF = TFs[i];
1838     // Check each of the operands.
1839     for (const SDValue &Op : TF->op_values()) {
1840       switch (Op.getOpcode()) {
1841       case ISD::EntryToken:
1842         // Entry tokens don't need to be added to the list. They are
1843         // redundant.
1844         Changed = true;
1845         break;
1846 
1847       case ISD::TokenFactor:
1848         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1849           // Queue up for processing.
1850           TFs.push_back(Op.getNode());
1851           Changed = true;
1852           break;
1853         }
1854         LLVM_FALLTHROUGH;
1855 
1856       default:
1857         // Only add if it isn't already in the list.
1858         if (SeenOps.insert(Op.getNode()).second)
1859           Ops.push_back(Op);
1860         else
1861           Changed = true;
1862         break;
1863       }
1864     }
1865   }
1866 
1867   // Re-visit inlined Token Factors, to clean them up in case they have been
1868   // removed. Skip the first Token Factor, as this is the current node.
1869   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1870     AddToWorklist(TFs[i]);
1871 
1872   // Remove Nodes that are chained to another node in the list. Do so
1873   // by walking up chains breath-first stopping when we've seen
1874   // another operand. In general we must climb to the EntryNode, but we can exit
1875   // early if we find all remaining work is associated with just one operand as
1876   // no further pruning is possible.
1877 
1878   // List of nodes to search through and original Ops from which they originate.
1879   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1880   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1881   SmallPtrSet<SDNode *, 16> SeenChains;
1882   bool DidPruneOps = false;
1883 
1884   unsigned NumLeftToConsider = 0;
1885   for (const SDValue &Op : Ops) {
1886     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1887     OpWorkCount.push_back(1);
1888   }
1889 
1890   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1891     // If this is an Op, we can remove the op from the list. Remark any
1892     // search associated with it as from the current OpNumber.
1893     if (SeenOps.count(Op) != 0) {
1894       Changed = true;
1895       DidPruneOps = true;
1896       unsigned OrigOpNumber = 0;
1897       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1898         OrigOpNumber++;
1899       assert((OrigOpNumber != Ops.size()) &&
1900              "expected to find TokenFactor Operand");
1901       // Re-mark worklist from OrigOpNumber to OpNumber
1902       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1903         if (Worklist[i].second == OrigOpNumber) {
1904           Worklist[i].second = OpNumber;
1905         }
1906       }
1907       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1908       OpWorkCount[OrigOpNumber] = 0;
1909       NumLeftToConsider--;
1910     }
1911     // Add if it's a new chain
1912     if (SeenChains.insert(Op).second) {
1913       OpWorkCount[OpNumber]++;
1914       Worklist.push_back(std::make_pair(Op, OpNumber));
1915     }
1916   };
1917 
1918   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1919     // We need at least be consider at least 2 Ops to prune.
1920     if (NumLeftToConsider <= 1)
1921       break;
1922     auto CurNode = Worklist[i].first;
1923     auto CurOpNumber = Worklist[i].second;
1924     assert((OpWorkCount[CurOpNumber] > 0) &&
1925            "Node should not appear in worklist");
1926     switch (CurNode->getOpcode()) {
1927     case ISD::EntryToken:
1928       // Hitting EntryToken is the only way for the search to terminate without
1929       // hitting
1930       // another operand's search. Prevent us from marking this operand
1931       // considered.
1932       NumLeftToConsider++;
1933       break;
1934     case ISD::TokenFactor:
1935       for (const SDValue &Op : CurNode->op_values())
1936         AddToWorklist(i, Op.getNode(), CurOpNumber);
1937       break;
1938     case ISD::LIFETIME_START:
1939     case ISD::LIFETIME_END:
1940     case ISD::CopyFromReg:
1941     case ISD::CopyToReg:
1942       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1943       break;
1944     default:
1945       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1946         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1947       break;
1948     }
1949     OpWorkCount[CurOpNumber]--;
1950     if (OpWorkCount[CurOpNumber] == 0)
1951       NumLeftToConsider--;
1952   }
1953 
1954   // If we've changed things around then replace token factor.
1955   if (Changed) {
1956     SDValue Result;
1957     if (Ops.empty()) {
1958       // The entry token is the only possible outcome.
1959       Result = DAG.getEntryNode();
1960     } else {
1961       if (DidPruneOps) {
1962         SmallVector<SDValue, 8> PrunedOps;
1963         //
1964         for (const SDValue &Op : Ops) {
1965           if (SeenChains.count(Op.getNode()) == 0)
1966             PrunedOps.push_back(Op);
1967         }
1968         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1969       } else {
1970         Result = DAG.getTokenFactor(SDLoc(N), Ops);
1971       }
1972     }
1973     return Result;
1974   }
1975   return SDValue();
1976 }
1977 
1978 /// MERGE_VALUES can always be eliminated.
1979 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1980   WorklistRemover DeadNodes(*this);
1981   // Replacing results may cause a different MERGE_VALUES to suddenly
1982   // be CSE'd with N, and carry its uses with it. Iterate until no
1983   // uses remain, to ensure that the node can be safely deleted.
1984   // First add the users of this node to the work list so that they
1985   // can be tried again once they have new operands.
1986   AddUsersToWorklist(N);
1987   do {
1988     // Do as a single replacement to avoid rewalking use lists.
1989     SmallVector<SDValue, 8> Ops;
1990     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1991       Ops.push_back(N->getOperand(i));
1992     DAG.ReplaceAllUsesWith(N, Ops.data());
1993   } while (!N->use_empty());
1994   deleteAndRecombine(N);
1995   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1996 }
1997 
1998 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1999 /// ConstantSDNode pointer else nullptr.
2000 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2001   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2002   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2003 }
2004 
2005 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2006   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2007          "Unexpected binary operator");
2008 
2009   // Don't do this unless the old select is going away. We want to eliminate the
2010   // binary operator, not replace a binop with a select.
2011   // TODO: Handle ISD::SELECT_CC.
2012   unsigned SelOpNo = 0;
2013   SDValue Sel = BO->getOperand(0);
2014   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2015     SelOpNo = 1;
2016     Sel = BO->getOperand(1);
2017   }
2018 
2019   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2020     return SDValue();
2021 
2022   SDValue CT = Sel.getOperand(1);
2023   if (!isConstantOrConstantVector(CT, true) &&
2024       !isConstantFPBuildVectorOrConstantFP(CT))
2025     return SDValue();
2026 
2027   SDValue CF = Sel.getOperand(2);
2028   if (!isConstantOrConstantVector(CF, true) &&
2029       !isConstantFPBuildVectorOrConstantFP(CF))
2030     return SDValue();
2031 
2032   // Bail out if any constants are opaque because we can't constant fold those.
2033   // The exception is "and" and "or" with either 0 or -1 in which case we can
2034   // propagate non constant operands into select. I.e.:
2035   // and (select Cond, 0, -1), X --> select Cond, 0, X
2036   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2037   auto BinOpcode = BO->getOpcode();
2038   bool CanFoldNonConst =
2039       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2040       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2041       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2042 
2043   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2044   if (!CanFoldNonConst &&
2045       !isConstantOrConstantVector(CBO, true) &&
2046       !isConstantFPBuildVectorOrConstantFP(CBO))
2047     return SDValue();
2048 
2049   EVT VT = Sel.getValueType();
2050 
2051   // In case of shift value and shift amount may have different VT. For instance
2052   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2053   // swapped operands and value types do not match. NB: x86 is fine if operands
2054   // are not swapped with shift amount VT being not bigger than shifted value.
2055   // TODO: that is possible to check for a shift operation, correct VTs and
2056   // still perform optimization on x86 if needed.
2057   if (SelOpNo && VT != CBO.getValueType())
2058     return SDValue();
2059 
2060   // We have a select-of-constants followed by a binary operator with a
2061   // constant. Eliminate the binop by pulling the constant math into the select.
2062   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2063   SDLoc DL(Sel);
2064   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2065                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2066   if (!CanFoldNonConst && !NewCT.isUndef() &&
2067       !isConstantOrConstantVector(NewCT, true) &&
2068       !isConstantFPBuildVectorOrConstantFP(NewCT))
2069     return SDValue();
2070 
2071   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2072                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2073   if (!CanFoldNonConst && !NewCF.isUndef() &&
2074       !isConstantOrConstantVector(NewCF, true) &&
2075       !isConstantFPBuildVectorOrConstantFP(NewCF))
2076     return SDValue();
2077 
2078   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2079   SelectOp->setFlags(BO->getFlags());
2080   return SelectOp;
2081 }
2082 
2083 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2084   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2085          "Expecting add or sub");
2086 
2087   // Match a constant operand and a zext operand for the math instruction:
2088   // add Z, C
2089   // sub C, Z
2090   bool IsAdd = N->getOpcode() == ISD::ADD;
2091   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2092   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2093   auto *CN = dyn_cast<ConstantSDNode>(C);
2094   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2095     return SDValue();
2096 
2097   // Match the zext operand as a setcc of a boolean.
2098   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2099       Z.getOperand(0).getValueType() != MVT::i1)
2100     return SDValue();
2101 
2102   // Match the compare as: setcc (X & 1), 0, eq.
2103   SDValue SetCC = Z.getOperand(0);
2104   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2105   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2106       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2107       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2108     return SDValue();
2109 
2110   // We are adding/subtracting a constant and an inverted low bit. Turn that
2111   // into a subtract/add of the low bit with incremented/decremented constant:
2112   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2113   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2114   EVT VT = C.getValueType();
2115   SDLoc DL(N);
2116   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2117   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2118                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2119   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2120 }
2121 
2122 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2123 /// a shift and add with a different constant.
2124 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2125   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2126          "Expecting add or sub");
2127 
2128   // We need a constant operand for the add/sub, and the other operand is a
2129   // logical shift right: add (srl), C or sub C, (srl).
2130   bool IsAdd = N->getOpcode() == ISD::ADD;
2131   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2132   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2133   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2134       ShiftOp.getOpcode() != ISD::SRL)
2135     return SDValue();
2136 
2137   // The shift must be of a 'not' value.
2138   SDValue Not = ShiftOp.getOperand(0);
2139   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2140     return SDValue();
2141 
2142   // The shift must be moving the sign bit to the least-significant-bit.
2143   EVT VT = ShiftOp.getValueType();
2144   SDValue ShAmt = ShiftOp.getOperand(1);
2145   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2146   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2147     return SDValue();
2148 
2149   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2150   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2151   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2152   SDLoc DL(N);
2153   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2154   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2155   if (SDValue NewC =
2156           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2157                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2158     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2159   return SDValue();
2160 }
2161 
2162 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2163 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2164 /// are no common bits set in the operands).
2165 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2166   SDValue N0 = N->getOperand(0);
2167   SDValue N1 = N->getOperand(1);
2168   EVT VT = N0.getValueType();
2169   SDLoc DL(N);
2170 
2171   // fold vector ops
2172   if (VT.isVector()) {
2173     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2174       return FoldedVOp;
2175 
2176     // fold (add x, 0) -> x, vector edition
2177     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2178       return N0;
2179     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2180       return N1;
2181   }
2182 
2183   // fold (add x, undef) -> undef
2184   if (N0.isUndef())
2185     return N0;
2186 
2187   if (N1.isUndef())
2188     return N1;
2189 
2190   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2191     // canonicalize constant to RHS
2192     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2193       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2194     // fold (add c1, c2) -> c1+c2
2195     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2196   }
2197 
2198   // fold (add x, 0) -> x
2199   if (isNullConstant(N1))
2200     return N0;
2201 
2202   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2203     // fold ((A-c1)+c2) -> (A+(c2-c1))
2204     if (N0.getOpcode() == ISD::SUB &&
2205         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2206       SDValue Sub =
2207           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2208       assert(Sub && "Constant folding failed");
2209       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2210     }
2211 
2212     // fold ((c1-A)+c2) -> (c1+c2)-A
2213     if (N0.getOpcode() == ISD::SUB &&
2214         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2215       SDValue Add =
2216           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2217       assert(Add && "Constant folding failed");
2218       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2219     }
2220 
2221     // add (sext i1 X), 1 -> zext (not i1 X)
2222     // We don't transform this pattern:
2223     //   add (zext i1 X), -1 -> sext (not i1 X)
2224     // because most (?) targets generate better code for the zext form.
2225     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2226         isOneOrOneSplat(N1)) {
2227       SDValue X = N0.getOperand(0);
2228       if ((!LegalOperations ||
2229            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2230             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2231           X.getScalarValueSizeInBits() == 1) {
2232         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2233         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2234       }
2235     }
2236 
2237     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2238     // equivalent to (add x, c0).
2239     if (N0.getOpcode() == ISD::OR &&
2240         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2241         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2242       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2243                                                     {N1, N0.getOperand(1)}))
2244         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2245     }
2246   }
2247 
2248   if (SDValue NewSel = foldBinOpIntoSelect(N))
2249     return NewSel;
2250 
2251   // reassociate add
2252   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2253     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2254       return RADD;
2255   }
2256   // fold ((0-A) + B) -> B-A
2257   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2258     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2259 
2260   // fold (A + (0-B)) -> A-B
2261   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2262     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2263 
2264   // fold (A+(B-A)) -> B
2265   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2266     return N1.getOperand(0);
2267 
2268   // fold ((B-A)+A) -> B
2269   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2270     return N0.getOperand(0);
2271 
2272   // fold ((A-B)+(C-A)) -> (C-B)
2273   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2274       N0.getOperand(0) == N1.getOperand(1))
2275     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2276                        N0.getOperand(1));
2277 
2278   // fold ((A-B)+(B-C)) -> (A-C)
2279   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2280       N0.getOperand(1) == N1.getOperand(0))
2281     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2282                        N1.getOperand(1));
2283 
2284   // fold (A+(B-(A+C))) to (B-C)
2285   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2286       N0 == N1.getOperand(1).getOperand(0))
2287     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2288                        N1.getOperand(1).getOperand(1));
2289 
2290   // fold (A+(B-(C+A))) to (B-C)
2291   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2292       N0 == N1.getOperand(1).getOperand(1))
2293     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2294                        N1.getOperand(1).getOperand(0));
2295 
2296   // fold (A+((B-A)+or-C)) to (B+or-C)
2297   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2298       N1.getOperand(0).getOpcode() == ISD::SUB &&
2299       N0 == N1.getOperand(0).getOperand(1))
2300     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2301                        N1.getOperand(1));
2302 
2303   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2304   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2305     SDValue N00 = N0.getOperand(0);
2306     SDValue N01 = N0.getOperand(1);
2307     SDValue N10 = N1.getOperand(0);
2308     SDValue N11 = N1.getOperand(1);
2309 
2310     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2311       return DAG.getNode(ISD::SUB, DL, VT,
2312                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2313                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2314   }
2315 
2316   // fold (add (umax X, C), -C) --> (usubsat X, C)
2317   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2318     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2319       return (!Max && !Op) ||
2320              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2321     };
2322     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2323                                   /*AllowUndefs*/ true))
2324       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2325                          N0.getOperand(1));
2326   }
2327 
2328   if (SimplifyDemandedBits(SDValue(N, 0)))
2329     return SDValue(N, 0);
2330 
2331   if (isOneOrOneSplat(N1)) {
2332     // fold (add (xor a, -1), 1) -> (sub 0, a)
2333     if (isBitwiseNot(N0))
2334       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2335                          N0.getOperand(0));
2336 
2337     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2338     if (N0.getOpcode() == ISD::ADD ||
2339         N0.getOpcode() == ISD::UADDO ||
2340         N0.getOpcode() == ISD::SADDO) {
2341       SDValue A, Xor;
2342 
2343       if (isBitwiseNot(N0.getOperand(0))) {
2344         A = N0.getOperand(1);
2345         Xor = N0.getOperand(0);
2346       } else if (isBitwiseNot(N0.getOperand(1))) {
2347         A = N0.getOperand(0);
2348         Xor = N0.getOperand(1);
2349       }
2350 
2351       if (Xor)
2352         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2353     }
2354 
2355     // Look for:
2356     //   add (add x, y), 1
2357     // And if the target does not like this form then turn into:
2358     //   sub y, (xor x, -1)
2359     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2360         N0.getOpcode() == ISD::ADD) {
2361       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2362                                 DAG.getAllOnesConstant(DL, VT));
2363       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2364     }
2365   }
2366 
2367   // (x - y) + -1  ->  add (xor y, -1), x
2368   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2369       isAllOnesOrAllOnesSplat(N1)) {
2370     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2371     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2372   }
2373 
2374   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2375     return Combined;
2376 
2377   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2378     return Combined;
2379 
2380   return SDValue();
2381 }
2382 
2383 SDValue DAGCombiner::visitADD(SDNode *N) {
2384   SDValue N0 = N->getOperand(0);
2385   SDValue N1 = N->getOperand(1);
2386   EVT VT = N0.getValueType();
2387   SDLoc DL(N);
2388 
2389   if (SDValue Combined = visitADDLike(N))
2390     return Combined;
2391 
2392   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2393     return V;
2394 
2395   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2396     return V;
2397 
2398   // fold (a+b) -> (a|b) iff a and b share no bits.
2399   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2400       DAG.haveNoCommonBitsSet(N0, N1))
2401     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2402 
2403   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2404   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2405     APInt C0 = N0->getConstantOperandAPInt(0);
2406     APInt C1 = N1->getConstantOperandAPInt(0);
2407     return DAG.getVScale(DL, VT, C0 + C1);
2408   }
2409 
2410   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2411   if ((N0.getOpcode() == ISD::ADD) &&
2412       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2413       (N1.getOpcode() == ISD::VSCALE)) {
2414     auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2415     auto VS1 = N1->getConstantOperandAPInt(0);
2416     auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
2417     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2418   }
2419 
2420   return SDValue();
2421 }
2422 
2423 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2424   unsigned Opcode = N->getOpcode();
2425   SDValue N0 = N->getOperand(0);
2426   SDValue N1 = N->getOperand(1);
2427   EVT VT = N0.getValueType();
2428   SDLoc DL(N);
2429 
2430   // fold vector ops
2431   if (VT.isVector()) {
2432     // TODO SimplifyVBinOp
2433 
2434     // fold (add_sat x, 0) -> x, vector edition
2435     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2436       return N0;
2437     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2438       return N1;
2439   }
2440 
2441   // fold (add_sat x, undef) -> -1
2442   if (N0.isUndef() || N1.isUndef())
2443     return DAG.getAllOnesConstant(DL, VT);
2444 
2445   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2446     // canonicalize constant to RHS
2447     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2448       return DAG.getNode(Opcode, DL, VT, N1, N0);
2449     // fold (add_sat c1, c2) -> c3
2450     return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2451   }
2452 
2453   // fold (add_sat x, 0) -> x
2454   if (isNullConstant(N1))
2455     return N0;
2456 
2457   // If it cannot overflow, transform into an add.
2458   if (Opcode == ISD::UADDSAT)
2459     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2460       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2461 
2462   return SDValue();
2463 }
2464 
2465 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2466   bool Masked = false;
2467 
2468   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2469   while (true) {
2470     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2471       V = V.getOperand(0);
2472       continue;
2473     }
2474 
2475     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2476       Masked = true;
2477       V = V.getOperand(0);
2478       continue;
2479     }
2480 
2481     break;
2482   }
2483 
2484   // If this is not a carry, return.
2485   if (V.getResNo() != 1)
2486     return SDValue();
2487 
2488   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2489       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2490     return SDValue();
2491 
2492   EVT VT = V.getNode()->getValueType(0);
2493   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2494     return SDValue();
2495 
2496   // If the result is masked, then no matter what kind of bool it is we can
2497   // return. If it isn't, then we need to make sure the bool type is either 0 or
2498   // 1 and not other values.
2499   if (Masked ||
2500       TLI.getBooleanContents(V.getValueType()) ==
2501           TargetLoweringBase::ZeroOrOneBooleanContent)
2502     return V;
2503 
2504   return SDValue();
2505 }
2506 
2507 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2508 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2509 /// the opcode and bypass the mask operation.
2510 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2511                                  SelectionDAG &DAG, const SDLoc &DL) {
2512   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2513     return SDValue();
2514 
2515   EVT VT = N0.getValueType();
2516   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2517     return SDValue();
2518 
2519   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2520   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2521   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2522 }
2523 
2524 /// Helper for doing combines based on N0 and N1 being added to each other.
2525 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2526                                           SDNode *LocReference) {
2527   EVT VT = N0.getValueType();
2528   SDLoc DL(LocReference);
2529 
2530   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2531   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2532       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2533     return DAG.getNode(ISD::SUB, DL, VT, N0,
2534                        DAG.getNode(ISD::SHL, DL, VT,
2535                                    N1.getOperand(0).getOperand(1),
2536                                    N1.getOperand(1)));
2537 
2538   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2539     return V;
2540 
2541   // Look for:
2542   //   add (add x, 1), y
2543   // And if the target does not like this form then turn into:
2544   //   sub y, (xor x, -1)
2545   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2546       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2547     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2548                               DAG.getAllOnesConstant(DL, VT));
2549     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2550   }
2551 
2552   // Hoist one-use subtraction by non-opaque constant:
2553   //   (x - C) + y  ->  (x + y) - C
2554   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2555   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2556       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2557     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2558     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2559   }
2560   // Hoist one-use subtraction from non-opaque constant:
2561   //   (C - x) + y  ->  (y - x) + C
2562   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2563       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2564     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2565     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2566   }
2567 
2568   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2569   // rather than 'add 0/-1' (the zext should get folded).
2570   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2571   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2572       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2573       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2574     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2575     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2576   }
2577 
2578   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2579   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2580     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2581     if (TN->getVT() == MVT::i1) {
2582       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2583                                  DAG.getConstant(1, DL, VT));
2584       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2585     }
2586   }
2587 
2588   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2589   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2590       N1.getResNo() == 0)
2591     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2592                        N0, N1.getOperand(0), N1.getOperand(2));
2593 
2594   // (add X, Carry) -> (addcarry X, 0, Carry)
2595   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2596     if (SDValue Carry = getAsCarry(TLI, N1))
2597       return DAG.getNode(ISD::ADDCARRY, DL,
2598                          DAG.getVTList(VT, Carry.getValueType()), N0,
2599                          DAG.getConstant(0, DL, VT), Carry);
2600 
2601   return SDValue();
2602 }
2603 
2604 SDValue DAGCombiner::visitADDC(SDNode *N) {
2605   SDValue N0 = N->getOperand(0);
2606   SDValue N1 = N->getOperand(1);
2607   EVT VT = N0.getValueType();
2608   SDLoc DL(N);
2609 
2610   // If the flag result is dead, turn this into an ADD.
2611   if (!N->hasAnyUseOfValue(1))
2612     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2613                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2614 
2615   // canonicalize constant to RHS.
2616   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2617   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2618   if (N0C && !N1C)
2619     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2620 
2621   // fold (addc x, 0) -> x + no carry out
2622   if (isNullConstant(N1))
2623     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2624                                         DL, MVT::Glue));
2625 
2626   // If it cannot overflow, transform into an add.
2627   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2628     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2629                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2630 
2631   return SDValue();
2632 }
2633 
2634 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2635                            SelectionDAG &DAG, const TargetLowering &TLI) {
2636   EVT VT = V.getValueType();
2637 
2638   SDValue Cst;
2639   switch (TLI.getBooleanContents(VT)) {
2640   case TargetLowering::ZeroOrOneBooleanContent:
2641   case TargetLowering::UndefinedBooleanContent:
2642     Cst = DAG.getConstant(1, DL, VT);
2643     break;
2644   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2645     Cst = DAG.getAllOnesConstant(DL, VT);
2646     break;
2647   }
2648 
2649   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2650 }
2651 
2652 /**
2653  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2654  * then the flip also occurs if computing the inverse is the same cost.
2655  * This function returns an empty SDValue in case it cannot flip the boolean
2656  * without increasing the cost of the computation. If you want to flip a boolean
2657  * no matter what, use flipBoolean.
2658  */
2659 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2660                                   const TargetLowering &TLI,
2661                                   bool Force) {
2662   if (Force && isa<ConstantSDNode>(V))
2663     return flipBoolean(V, SDLoc(V), DAG, TLI);
2664 
2665   if (V.getOpcode() != ISD::XOR)
2666     return SDValue();
2667 
2668   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2669   if (!Const)
2670     return SDValue();
2671 
2672   EVT VT = V.getValueType();
2673 
2674   bool IsFlip = false;
2675   switch(TLI.getBooleanContents(VT)) {
2676     case TargetLowering::ZeroOrOneBooleanContent:
2677       IsFlip = Const->isOne();
2678       break;
2679     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2680       IsFlip = Const->isAllOnesValue();
2681       break;
2682     case TargetLowering::UndefinedBooleanContent:
2683       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2684       break;
2685   }
2686 
2687   if (IsFlip)
2688     return V.getOperand(0);
2689   if (Force)
2690     return flipBoolean(V, SDLoc(V), DAG, TLI);
2691   return SDValue();
2692 }
2693 
2694 SDValue DAGCombiner::visitADDO(SDNode *N) {
2695   SDValue N0 = N->getOperand(0);
2696   SDValue N1 = N->getOperand(1);
2697   EVT VT = N0.getValueType();
2698   bool IsSigned = (ISD::SADDO == N->getOpcode());
2699 
2700   EVT CarryVT = N->getValueType(1);
2701   SDLoc DL(N);
2702 
2703   // If the flag result is dead, turn this into an ADD.
2704   if (!N->hasAnyUseOfValue(1))
2705     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2706                      DAG.getUNDEF(CarryVT));
2707 
2708   // canonicalize constant to RHS.
2709   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2710       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2711     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2712 
2713   // fold (addo x, 0) -> x + no carry out
2714   if (isNullOrNullSplat(N1))
2715     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2716 
2717   if (!IsSigned) {
2718     // If it cannot overflow, transform into an add.
2719     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2720       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2721                        DAG.getConstant(0, DL, CarryVT));
2722 
2723     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2724     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2725       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2726                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2727       return CombineTo(N, Sub,
2728                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2729     }
2730 
2731     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2732       return Combined;
2733 
2734     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2735       return Combined;
2736   }
2737 
2738   return SDValue();
2739 }
2740 
2741 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2742   EVT VT = N0.getValueType();
2743   if (VT.isVector())
2744     return SDValue();
2745 
2746   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2747   // If Y + 1 cannot overflow.
2748   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2749     SDValue Y = N1.getOperand(0);
2750     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2751     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2752       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2753                          N1.getOperand(2));
2754   }
2755 
2756   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2757   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2758     if (SDValue Carry = getAsCarry(TLI, N1))
2759       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2760                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2761 
2762   return SDValue();
2763 }
2764 
2765 SDValue DAGCombiner::visitADDE(SDNode *N) {
2766   SDValue N0 = N->getOperand(0);
2767   SDValue N1 = N->getOperand(1);
2768   SDValue CarryIn = N->getOperand(2);
2769 
2770   // canonicalize constant to RHS
2771   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2772   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2773   if (N0C && !N1C)
2774     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2775                        N1, N0, CarryIn);
2776 
2777   // fold (adde x, y, false) -> (addc x, y)
2778   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2779     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2780 
2781   return SDValue();
2782 }
2783 
2784 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2785   SDValue N0 = N->getOperand(0);
2786   SDValue N1 = N->getOperand(1);
2787   SDValue CarryIn = N->getOperand(2);
2788   SDLoc DL(N);
2789 
2790   // canonicalize constant to RHS
2791   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2792   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2793   if (N0C && !N1C)
2794     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2795 
2796   // fold (addcarry x, y, false) -> (uaddo x, y)
2797   if (isNullConstant(CarryIn)) {
2798     if (!LegalOperations ||
2799         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2800       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2801   }
2802 
2803   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2804   if (isNullConstant(N0) && isNullConstant(N1)) {
2805     EVT VT = N0.getValueType();
2806     EVT CarryVT = CarryIn.getValueType();
2807     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2808     AddToWorklist(CarryExt.getNode());
2809     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2810                                     DAG.getConstant(1, DL, VT)),
2811                      DAG.getConstant(0, DL, CarryVT));
2812   }
2813 
2814   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2815     return Combined;
2816 
2817   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2818     return Combined;
2819 
2820   return SDValue();
2821 }
2822 
2823 /**
2824  * If we are facing some sort of diamond carry propapagtion pattern try to
2825  * break it up to generate something like:
2826  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2827  *
2828  * The end result is usually an increase in operation required, but because the
2829  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2830  *
2831  * Patterns typically look something like
2832  *            (uaddo A, B)
2833  *             /       \
2834  *          Carry      Sum
2835  *            |          \
2836  *            | (addcarry *, 0, Z)
2837  *            |       /
2838  *             \   Carry
2839  *              |   /
2840  * (addcarry X, *, *)
2841  *
2842  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2843  * produce a combine with a single path for carry propagation.
2844  */
2845 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2846                                       SDValue X, SDValue Carry0, SDValue Carry1,
2847                                       SDNode *N) {
2848   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2849     return SDValue();
2850   if (Carry1.getOpcode() != ISD::UADDO)
2851     return SDValue();
2852 
2853   SDValue Z;
2854 
2855   /**
2856    * First look for a suitable Z. It will present itself in the form of
2857    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2858    */
2859   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2860       isNullConstant(Carry0.getOperand(1))) {
2861     Z = Carry0.getOperand(2);
2862   } else if (Carry0.getOpcode() == ISD::UADDO &&
2863              isOneConstant(Carry0.getOperand(1))) {
2864     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2865     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2866   } else {
2867     // We couldn't find a suitable Z.
2868     return SDValue();
2869   }
2870 
2871 
2872   auto cancelDiamond = [&](SDValue A,SDValue B) {
2873     SDLoc DL(N);
2874     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2875     Combiner.AddToWorklist(NewY.getNode());
2876     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2877                        DAG.getConstant(0, DL, X.getValueType()),
2878                        NewY.getValue(1));
2879   };
2880 
2881   /**
2882    *      (uaddo A, B)
2883    *           |
2884    *          Sum
2885    *           |
2886    * (addcarry *, 0, Z)
2887    */
2888   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2889     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2890   }
2891 
2892   /**
2893    * (addcarry A, 0, Z)
2894    *         |
2895    *        Sum
2896    *         |
2897    *  (uaddo *, B)
2898    */
2899   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2900     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2901   }
2902 
2903   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2904     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2905   }
2906 
2907   return SDValue();
2908 }
2909 
2910 // If we are facing some sort of diamond carry/borrow in/out pattern try to
2911 // match patterns like:
2912 //
2913 //          (uaddo A, B)            CarryIn
2914 //            |  \                     |
2915 //            |   \                    |
2916 //    PartialSum   PartialCarryOutX   /
2917 //            |        |             /
2918 //            |    ____|____________/
2919 //            |   /    |
2920 //     (uaddo *, *)    \________
2921 //       |  \                   \
2922 //       |   \                   |
2923 //       |    PartialCarryOutY   |
2924 //       |        \              |
2925 //       |         \            /
2926 //   AddCarrySum    |    ______/
2927 //                  |   /
2928 //   CarryOut = (or *, *)
2929 //
2930 // And generate ADDCARRY (or SUBCARRY) with two result values:
2931 //
2932 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
2933 //
2934 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
2935 // a single path for carry/borrow out propagation:
2936 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2937                                    const TargetLowering &TLI, SDValue Carry0,
2938                                    SDValue Carry1, SDNode *N) {
2939   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
2940     return SDValue();
2941   unsigned Opcode = Carry0.getOpcode();
2942   if (Opcode != Carry1.getOpcode())
2943     return SDValue();
2944   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
2945     return SDValue();
2946 
2947   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
2948   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
2949   // the above ASCII art.)
2950   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2951       Carry1.getOperand(1) != Carry0.getValue(0))
2952     std::swap(Carry0, Carry1);
2953   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2954       Carry1.getOperand(1) != Carry0.getValue(0))
2955     return SDValue();
2956 
2957   // The carry in value must be on the righthand side for subtraction.
2958   unsigned CarryInOperandNum =
2959       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
2960   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
2961     return SDValue();
2962   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
2963 
2964   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
2965   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
2966     return SDValue();
2967 
2968   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
2969   // TODO: make getAsCarry() aware of how partial carries are merged.
2970   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
2971     return SDValue();
2972   CarryIn = CarryIn.getOperand(0);
2973   if (CarryIn.getValueType() != MVT::i1)
2974     return SDValue();
2975 
2976   SDLoc DL(N);
2977   SDValue Merged =
2978       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
2979                   Carry0.getOperand(1), CarryIn);
2980 
2981   // Please note that because we have proven that the result of the UADDO/USUBO
2982   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
2983   // therefore prove that if the first UADDO/USUBO overflows, the second
2984   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
2985   // maximum value.
2986   //
2987   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
2988   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
2989   //
2990   // This is important because it means that OR and XOR can be used to merge
2991   // carry flags; and that AND can return a constant zero.
2992   //
2993   // TODO: match other operations that can merge flags (ADD, etc)
2994   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
2995   if (N->getOpcode() == ISD::AND)
2996     return DAG.getConstant(0, DL, MVT::i1);
2997   return Merged.getValue(1);
2998 }
2999 
3000 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3001                                        SDNode *N) {
3002   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3003   if (isBitwiseNot(N0))
3004     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3005       SDLoc DL(N);
3006       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3007                                 N0.getOperand(0), NotC);
3008       return CombineTo(N, Sub,
3009                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
3010     }
3011 
3012   // Iff the flag result is dead:
3013   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3014   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3015   // or the dependency between the instructions.
3016   if ((N0.getOpcode() == ISD::ADD ||
3017        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3018         N0.getValue(1) != CarryIn)) &&
3019       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3020     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3021                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3022 
3023   /**
3024    * When one of the addcarry argument is itself a carry, we may be facing
3025    * a diamond carry propagation. In which case we try to transform the DAG
3026    * to ensure linear carry propagation if that is possible.
3027    */
3028   if (auto Y = getAsCarry(TLI, N1)) {
3029     // Because both are carries, Y and Z can be swapped.
3030     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3031       return R;
3032     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3033       return R;
3034   }
3035 
3036   return SDValue();
3037 }
3038 
3039 // Since it may not be valid to emit a fold to zero for vector initializers
3040 // check if we can before folding.
3041 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3042                              SelectionDAG &DAG, bool LegalOperations) {
3043   if (!VT.isVector())
3044     return DAG.getConstant(0, DL, VT);
3045   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3046     return DAG.getConstant(0, DL, VT);
3047   return SDValue();
3048 }
3049 
3050 SDValue DAGCombiner::visitSUB(SDNode *N) {
3051   SDValue N0 = N->getOperand(0);
3052   SDValue N1 = N->getOperand(1);
3053   EVT VT = N0.getValueType();
3054   SDLoc DL(N);
3055 
3056   // fold vector ops
3057   if (VT.isVector()) {
3058     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3059       return FoldedVOp;
3060 
3061     // fold (sub x, 0) -> x, vector edition
3062     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3063       return N0;
3064   }
3065 
3066   // fold (sub x, x) -> 0
3067   // FIXME: Refactor this and xor and other similar operations together.
3068   if (N0 == N1)
3069     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3070 
3071   // fold (sub c1, c2) -> c3
3072   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3073     return C;
3074 
3075   if (SDValue NewSel = foldBinOpIntoSelect(N))
3076     return NewSel;
3077 
3078   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3079 
3080   // fold (sub x, c) -> (add x, -c)
3081   if (N1C) {
3082     return DAG.getNode(ISD::ADD, DL, VT, N0,
3083                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3084   }
3085 
3086   if (isNullOrNullSplat(N0)) {
3087     unsigned BitWidth = VT.getScalarSizeInBits();
3088     // Right-shifting everything out but the sign bit followed by negation is
3089     // the same as flipping arithmetic/logical shift type without the negation:
3090     // -(X >>u 31) -> (X >>s 31)
3091     // -(X >>s 31) -> (X >>u 31)
3092     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3093       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3094       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3095         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3096         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3097           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3098       }
3099     }
3100 
3101     // 0 - X --> 0 if the sub is NUW.
3102     if (N->getFlags().hasNoUnsignedWrap())
3103       return N0;
3104 
3105     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3106       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3107       // N1 must be 0 because negating the minimum signed value is undefined.
3108       if (N->getFlags().hasNoSignedWrap())
3109         return N0;
3110 
3111       // 0 - X --> X if X is 0 or the minimum signed value.
3112       return N1;
3113     }
3114   }
3115 
3116   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3117   if (isAllOnesOrAllOnesSplat(N0))
3118     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3119 
3120   // fold (A - (0-B)) -> A+B
3121   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3122     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3123 
3124   // fold A-(A-B) -> B
3125   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3126     return N1.getOperand(1);
3127 
3128   // fold (A+B)-A -> B
3129   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3130     return N0.getOperand(1);
3131 
3132   // fold (A+B)-B -> A
3133   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3134     return N0.getOperand(0);
3135 
3136   // fold (A+C1)-C2 -> A+(C1-C2)
3137   if (N0.getOpcode() == ISD::ADD &&
3138       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3139       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3140     SDValue NewC =
3141         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3142     assert(NewC && "Constant folding failed");
3143     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3144   }
3145 
3146   // fold C2-(A+C1) -> (C2-C1)-A
3147   if (N1.getOpcode() == ISD::ADD) {
3148     SDValue N11 = N1.getOperand(1);
3149     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3150         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3151       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3152       assert(NewC && "Constant folding failed");
3153       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3154     }
3155   }
3156 
3157   // fold (A-C1)-C2 -> A-(C1+C2)
3158   if (N0.getOpcode() == ISD::SUB &&
3159       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3160       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3161     SDValue NewC =
3162         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3163     assert(NewC && "Constant folding failed");
3164     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3165   }
3166 
3167   // fold (c1-A)-c2 -> (c1-c2)-A
3168   if (N0.getOpcode() == ISD::SUB &&
3169       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3170       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3171     SDValue NewC =
3172         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3173     assert(NewC && "Constant folding failed");
3174     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3175   }
3176 
3177   // fold ((A+(B+or-C))-B) -> A+or-C
3178   if (N0.getOpcode() == ISD::ADD &&
3179       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3180        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3181       N0.getOperand(1).getOperand(0) == N1)
3182     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3183                        N0.getOperand(1).getOperand(1));
3184 
3185   // fold ((A+(C+B))-B) -> A+C
3186   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3187       N0.getOperand(1).getOperand(1) == N1)
3188     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3189                        N0.getOperand(1).getOperand(0));
3190 
3191   // fold ((A-(B-C))-C) -> A-B
3192   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3193       N0.getOperand(1).getOperand(1) == N1)
3194     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3195                        N0.getOperand(1).getOperand(0));
3196 
3197   // fold (A-(B-C)) -> A+(C-B)
3198   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3199     return DAG.getNode(ISD::ADD, DL, VT, N0,
3200                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3201                                    N1.getOperand(0)));
3202 
3203   // A - (A & B)  ->  A & (~B)
3204   if (N1.getOpcode() == ISD::AND) {
3205     SDValue A = N1.getOperand(0);
3206     SDValue B = N1.getOperand(1);
3207     if (A != N0)
3208       std::swap(A, B);
3209     if (A == N0 &&
3210         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3211       SDValue InvB =
3212           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3213       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3214     }
3215   }
3216 
3217   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3218   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3219     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3220         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3221       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3222                                 N1.getOperand(0).getOperand(1),
3223                                 N1.getOperand(1));
3224       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3225     }
3226     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3227         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3228       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3229                                 N1.getOperand(0),
3230                                 N1.getOperand(1).getOperand(1));
3231       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3232     }
3233   }
3234 
3235   // If either operand of a sub is undef, the result is undef
3236   if (N0.isUndef())
3237     return N0;
3238   if (N1.isUndef())
3239     return N1;
3240 
3241   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3242     return V;
3243 
3244   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3245     return V;
3246 
3247   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3248     return V;
3249 
3250   // (x - y) - 1  ->  add (xor y, -1), x
3251   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3252     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3253                               DAG.getAllOnesConstant(DL, VT));
3254     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3255   }
3256 
3257   // Look for:
3258   //   sub y, (xor x, -1)
3259   // And if the target does not like this form then turn into:
3260   //   add (add x, y), 1
3261   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3262     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3263     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3264   }
3265 
3266   // Hoist one-use addition by non-opaque constant:
3267   //   (x + C) - y  ->  (x - y) + C
3268   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3269       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3270     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3271     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3272   }
3273   // y - (x + C)  ->  (y - x) - C
3274   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3275       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3276     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3277     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3278   }
3279   // (x - C) - y  ->  (x - y) - C
3280   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3281   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3282       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3283     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3284     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3285   }
3286   // (C - x) - y  ->  C - (x + y)
3287   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3288       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3289     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3290     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3291   }
3292 
3293   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3294   // rather than 'sub 0/1' (the sext should get folded).
3295   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3296   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3297       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3298       TLI.getBooleanContents(VT) ==
3299           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3300     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3301     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3302   }
3303 
3304   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3305   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3306     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3307       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3308       SDValue S0 = N1.getOperand(0);
3309       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3310         unsigned OpSizeInBits = VT.getScalarSizeInBits();
3311         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3312           if (C->getAPIntValue() == (OpSizeInBits - 1))
3313             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3314       }
3315     }
3316   }
3317 
3318   // If the relocation model supports it, consider symbol offsets.
3319   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3320     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3321       // fold (sub Sym, c) -> Sym-c
3322       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3323         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3324                                     GA->getOffset() -
3325                                         (uint64_t)N1C->getSExtValue());
3326       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3327       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3328         if (GA->getGlobal() == GB->getGlobal())
3329           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3330                                  DL, VT);
3331     }
3332 
3333   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3334   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3335     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3336     if (TN->getVT() == MVT::i1) {
3337       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3338                                  DAG.getConstant(1, DL, VT));
3339       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3340     }
3341   }
3342 
3343   // canonicalize (sub X, (vscale * C)) to (add X,  (vscale * -C))
3344   if (N1.getOpcode() == ISD::VSCALE) {
3345     APInt IntVal = N1.getConstantOperandAPInt(0);
3346     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3347   }
3348 
3349   // Prefer an add for more folding potential and possibly better codegen:
3350   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3351   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3352     SDValue ShAmt = N1.getOperand(1);
3353     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3354     if (ShAmtC &&
3355         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3356       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3357       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3358     }
3359   }
3360 
3361   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3362     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3363     if (SDValue Carry = getAsCarry(TLI, N0)) {
3364       SDValue X = N1;
3365       SDValue Zero = DAG.getConstant(0, DL, VT);
3366       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3367       return DAG.getNode(ISD::ADDCARRY, DL,
3368                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3369                          Carry);
3370     }
3371   }
3372 
3373   return SDValue();
3374 }
3375 
3376 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3377   SDValue N0 = N->getOperand(0);
3378   SDValue N1 = N->getOperand(1);
3379   EVT VT = N0.getValueType();
3380   SDLoc DL(N);
3381 
3382   // fold vector ops
3383   if (VT.isVector()) {
3384     // TODO SimplifyVBinOp
3385 
3386     // fold (sub_sat x, 0) -> x, vector edition
3387     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3388       return N0;
3389   }
3390 
3391   // fold (sub_sat x, undef) -> 0
3392   if (N0.isUndef() || N1.isUndef())
3393     return DAG.getConstant(0, DL, VT);
3394 
3395   // fold (sub_sat x, x) -> 0
3396   if (N0 == N1)
3397     return DAG.getConstant(0, DL, VT);
3398 
3399   // fold (sub_sat c1, c2) -> c3
3400   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3401     return C;
3402 
3403   // fold (sub_sat x, 0) -> x
3404   if (isNullConstant(N1))
3405     return N0;
3406 
3407   return SDValue();
3408 }
3409 
3410 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3411   SDValue N0 = N->getOperand(0);
3412   SDValue N1 = N->getOperand(1);
3413   EVT VT = N0.getValueType();
3414   SDLoc DL(N);
3415 
3416   // If the flag result is dead, turn this into an SUB.
3417   if (!N->hasAnyUseOfValue(1))
3418     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3419                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3420 
3421   // fold (subc x, x) -> 0 + no borrow
3422   if (N0 == N1)
3423     return CombineTo(N, DAG.getConstant(0, DL, VT),
3424                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3425 
3426   // fold (subc x, 0) -> x + no borrow
3427   if (isNullConstant(N1))
3428     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3429 
3430   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3431   if (isAllOnesConstant(N0))
3432     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3433                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3434 
3435   return SDValue();
3436 }
3437 
3438 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3439   SDValue N0 = N->getOperand(0);
3440   SDValue N1 = N->getOperand(1);
3441   EVT VT = N0.getValueType();
3442   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3443 
3444   EVT CarryVT = N->getValueType(1);
3445   SDLoc DL(N);
3446 
3447   // If the flag result is dead, turn this into an SUB.
3448   if (!N->hasAnyUseOfValue(1))
3449     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3450                      DAG.getUNDEF(CarryVT));
3451 
3452   // fold (subo x, x) -> 0 + no borrow
3453   if (N0 == N1)
3454     return CombineTo(N, DAG.getConstant(0, DL, VT),
3455                      DAG.getConstant(0, DL, CarryVT));
3456 
3457   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3458 
3459   // fold (subox, c) -> (addo x, -c)
3460   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3461     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3462                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3463   }
3464 
3465   // fold (subo x, 0) -> x + no borrow
3466   if (isNullOrNullSplat(N1))
3467     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3468 
3469   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3470   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3471     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3472                      DAG.getConstant(0, DL, CarryVT));
3473 
3474   return SDValue();
3475 }
3476 
3477 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3478   SDValue N0 = N->getOperand(0);
3479   SDValue N1 = N->getOperand(1);
3480   SDValue CarryIn = N->getOperand(2);
3481 
3482   // fold (sube x, y, false) -> (subc x, y)
3483   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3484     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3485 
3486   return SDValue();
3487 }
3488 
3489 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3490   SDValue N0 = N->getOperand(0);
3491   SDValue N1 = N->getOperand(1);
3492   SDValue CarryIn = N->getOperand(2);
3493 
3494   // fold (subcarry x, y, false) -> (usubo x, y)
3495   if (isNullConstant(CarryIn)) {
3496     if (!LegalOperations ||
3497         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3498       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3499   }
3500 
3501   return SDValue();
3502 }
3503 
3504 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3505 // UMULFIXSAT here.
3506 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3507   SDValue N0 = N->getOperand(0);
3508   SDValue N1 = N->getOperand(1);
3509   SDValue Scale = N->getOperand(2);
3510   EVT VT = N0.getValueType();
3511 
3512   // fold (mulfix x, undef, scale) -> 0
3513   if (N0.isUndef() || N1.isUndef())
3514     return DAG.getConstant(0, SDLoc(N), VT);
3515 
3516   // Canonicalize constant to RHS (vector doesn't have to splat)
3517   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3518      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3519     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3520 
3521   // fold (mulfix x, 0, scale) -> 0
3522   if (isNullConstant(N1))
3523     return DAG.getConstant(0, SDLoc(N), VT);
3524 
3525   return SDValue();
3526 }
3527 
3528 SDValue DAGCombiner::visitMUL(SDNode *N) {
3529   SDValue N0 = N->getOperand(0);
3530   SDValue N1 = N->getOperand(1);
3531   EVT VT = N0.getValueType();
3532 
3533   // fold (mul x, undef) -> 0
3534   if (N0.isUndef() || N1.isUndef())
3535     return DAG.getConstant(0, SDLoc(N), VT);
3536 
3537   bool N1IsConst = false;
3538   bool N1IsOpaqueConst = false;
3539   APInt ConstValue1;
3540 
3541   // fold vector ops
3542   if (VT.isVector()) {
3543     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3544       return FoldedVOp;
3545 
3546     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3547     assert((!N1IsConst ||
3548             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3549            "Splat APInt should be element width");
3550   } else {
3551     N1IsConst = isa<ConstantSDNode>(N1);
3552     if (N1IsConst) {
3553       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3554       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3555     }
3556   }
3557 
3558   // fold (mul c1, c2) -> c1*c2
3559   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3560     return C;
3561 
3562   // canonicalize constant to RHS (vector doesn't have to splat)
3563   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3564      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3565     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3566 
3567   // fold (mul x, 0) -> 0
3568   if (N1IsConst && ConstValue1.isNullValue())
3569     return N1;
3570 
3571   // fold (mul x, 1) -> x
3572   if (N1IsConst && ConstValue1.isOneValue())
3573     return N0;
3574 
3575   if (SDValue NewSel = foldBinOpIntoSelect(N))
3576     return NewSel;
3577 
3578   // fold (mul x, -1) -> 0-x
3579   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3580     SDLoc DL(N);
3581     return DAG.getNode(ISD::SUB, DL, VT,
3582                        DAG.getConstant(0, DL, VT), N0);
3583   }
3584 
3585   // fold (mul x, (1 << c)) -> x << c
3586   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3587       DAG.isKnownToBeAPowerOfTwo(N1) &&
3588       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3589     SDLoc DL(N);
3590     SDValue LogBase2 = BuildLogBase2(N1, DL);
3591     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3592     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3593     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3594   }
3595 
3596   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3597   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3598     unsigned Log2Val = (-ConstValue1).logBase2();
3599     SDLoc DL(N);
3600     // FIXME: If the input is something that is easily negated (e.g. a
3601     // single-use add), we should put the negate there.
3602     return DAG.getNode(ISD::SUB, DL, VT,
3603                        DAG.getConstant(0, DL, VT),
3604                        DAG.getNode(ISD::SHL, DL, VT, N0,
3605                             DAG.getConstant(Log2Val, DL,
3606                                       getShiftAmountTy(N0.getValueType()))));
3607   }
3608 
3609   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3610   // mul x, (2^N + 1) --> add (shl x, N), x
3611   // mul x, (2^N - 1) --> sub (shl x, N), x
3612   // Examples: x * 33 --> (x << 5) + x
3613   //           x * 15 --> (x << 4) - x
3614   //           x * -33 --> -((x << 5) + x)
3615   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3616   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3617     // TODO: We could handle more general decomposition of any constant by
3618     //       having the target set a limit on number of ops and making a
3619     //       callback to determine that sequence (similar to sqrt expansion).
3620     unsigned MathOp = ISD::DELETED_NODE;
3621     APInt MulC = ConstValue1.abs();
3622     if ((MulC - 1).isPowerOf2())
3623       MathOp = ISD::ADD;
3624     else if ((MulC + 1).isPowerOf2())
3625       MathOp = ISD::SUB;
3626 
3627     if (MathOp != ISD::DELETED_NODE) {
3628       unsigned ShAmt =
3629           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3630       assert(ShAmt < VT.getScalarSizeInBits() &&
3631              "multiply-by-constant generated out of bounds shift");
3632       SDLoc DL(N);
3633       SDValue Shl =
3634           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3635       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3636       if (ConstValue1.isNegative())
3637         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3638       return R;
3639     }
3640   }
3641 
3642   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3643   if (N0.getOpcode() == ISD::SHL &&
3644       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3645       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3646     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3647     if (isConstantOrConstantVector(C3))
3648       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3649   }
3650 
3651   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3652   // use.
3653   {
3654     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3655 
3656     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3657     if (N0.getOpcode() == ISD::SHL &&
3658         isConstantOrConstantVector(N0.getOperand(1)) &&
3659         N0.getNode()->hasOneUse()) {
3660       Sh = N0; Y = N1;
3661     } else if (N1.getOpcode() == ISD::SHL &&
3662                isConstantOrConstantVector(N1.getOperand(1)) &&
3663                N1.getNode()->hasOneUse()) {
3664       Sh = N1; Y = N0;
3665     }
3666 
3667     if (Sh.getNode()) {
3668       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3669       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3670     }
3671   }
3672 
3673   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3674   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3675       N0.getOpcode() == ISD::ADD &&
3676       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3677       isMulAddWithConstProfitable(N, N0, N1))
3678       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3679                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3680                                      N0.getOperand(0), N1),
3681                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3682                                      N0.getOperand(1), N1));
3683 
3684   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3685   if (N0.getOpcode() == ISD::VSCALE)
3686     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3687       APInt C0 = N0.getConstantOperandAPInt(0);
3688       APInt C1 = NC1->getAPIntValue();
3689       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3690     }
3691 
3692   // reassociate mul
3693   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3694     return RMUL;
3695 
3696   return SDValue();
3697 }
3698 
3699 /// Return true if divmod libcall is available.
3700 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3701                                      const TargetLowering &TLI) {
3702   RTLIB::Libcall LC;
3703   EVT NodeType = Node->getValueType(0);
3704   if (!NodeType.isSimple())
3705     return false;
3706   switch (NodeType.getSimpleVT().SimpleTy) {
3707   default: return false; // No libcall for vector types.
3708   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3709   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3710   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3711   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3712   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3713   }
3714 
3715   return TLI.getLibcallName(LC) != nullptr;
3716 }
3717 
3718 /// Issue divrem if both quotient and remainder are needed.
3719 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3720   if (Node->use_empty())
3721     return SDValue(); // This is a dead node, leave it alone.
3722 
3723   unsigned Opcode = Node->getOpcode();
3724   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3725   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3726 
3727   // DivMod lib calls can still work on non-legal types if using lib-calls.
3728   EVT VT = Node->getValueType(0);
3729   if (VT.isVector() || !VT.isInteger())
3730     return SDValue();
3731 
3732   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3733     return SDValue();
3734 
3735   // If DIVREM is going to get expanded into a libcall,
3736   // but there is no libcall available, then don't combine.
3737   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3738       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3739     return SDValue();
3740 
3741   // If div is legal, it's better to do the normal expansion
3742   unsigned OtherOpcode = 0;
3743   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3744     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3745     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3746       return SDValue();
3747   } else {
3748     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3749     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3750       return SDValue();
3751   }
3752 
3753   SDValue Op0 = Node->getOperand(0);
3754   SDValue Op1 = Node->getOperand(1);
3755   SDValue combined;
3756   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3757          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3758     SDNode *User = *UI;
3759     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3760         User->use_empty())
3761       continue;
3762     // Convert the other matching node(s), too;
3763     // otherwise, the DIVREM may get target-legalized into something
3764     // target-specific that we won't be able to recognize.
3765     unsigned UserOpc = User->getOpcode();
3766     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3767         User->getOperand(0) == Op0 &&
3768         User->getOperand(1) == Op1) {
3769       if (!combined) {
3770         if (UserOpc == OtherOpcode) {
3771           SDVTList VTs = DAG.getVTList(VT, VT);
3772           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3773         } else if (UserOpc == DivRemOpc) {
3774           combined = SDValue(User, 0);
3775         } else {
3776           assert(UserOpc == Opcode);
3777           continue;
3778         }
3779       }
3780       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3781         CombineTo(User, combined);
3782       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3783         CombineTo(User, combined.getValue(1));
3784     }
3785   }
3786   return combined;
3787 }
3788 
3789 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3790   SDValue N0 = N->getOperand(0);
3791   SDValue N1 = N->getOperand(1);
3792   EVT VT = N->getValueType(0);
3793   SDLoc DL(N);
3794 
3795   unsigned Opc = N->getOpcode();
3796   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3797   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3798 
3799   // X / undef -> undef
3800   // X % undef -> undef
3801   // X / 0 -> undef
3802   // X % 0 -> undef
3803   // NOTE: This includes vectors where any divisor element is zero/undef.
3804   if (DAG.isUndef(Opc, {N0, N1}))
3805     return DAG.getUNDEF(VT);
3806 
3807   // undef / X -> 0
3808   // undef % X -> 0
3809   if (N0.isUndef())
3810     return DAG.getConstant(0, DL, VT);
3811 
3812   // 0 / X -> 0
3813   // 0 % X -> 0
3814   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3815   if (N0C && N0C->isNullValue())
3816     return N0;
3817 
3818   // X / X -> 1
3819   // X % X -> 0
3820   if (N0 == N1)
3821     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3822 
3823   // X / 1 -> X
3824   // X % 1 -> 0
3825   // If this is a boolean op (single-bit element type), we can't have
3826   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3827   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3828   // it's a 1.
3829   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3830     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3831 
3832   return SDValue();
3833 }
3834 
3835 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3836   SDValue N0 = N->getOperand(0);
3837   SDValue N1 = N->getOperand(1);
3838   EVT VT = N->getValueType(0);
3839   EVT CCVT = getSetCCResultType(VT);
3840 
3841   // fold vector ops
3842   if (VT.isVector())
3843     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3844       return FoldedVOp;
3845 
3846   SDLoc DL(N);
3847 
3848   // fold (sdiv c1, c2) -> c1/c2
3849   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3850   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
3851     return C;
3852 
3853   // fold (sdiv X, -1) -> 0-X
3854   if (N1C && N1C->isAllOnesValue())
3855     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3856 
3857   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3858   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3859     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3860                          DAG.getConstant(1, DL, VT),
3861                          DAG.getConstant(0, DL, VT));
3862 
3863   if (SDValue V = simplifyDivRem(N, DAG))
3864     return V;
3865 
3866   if (SDValue NewSel = foldBinOpIntoSelect(N))
3867     return NewSel;
3868 
3869   // If we know the sign bits of both operands are zero, strength reduce to a
3870   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3871   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3872     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3873 
3874   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3875     // If the corresponding remainder node exists, update its users with
3876     // (Dividend - (Quotient * Divisor).
3877     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3878                                               { N0, N1 })) {
3879       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3880       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3881       AddToWorklist(Mul.getNode());
3882       AddToWorklist(Sub.getNode());
3883       CombineTo(RemNode, Sub);
3884     }
3885     return V;
3886   }
3887 
3888   // sdiv, srem -> sdivrem
3889   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3890   // true.  Otherwise, we break the simplification logic in visitREM().
3891   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3892   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3893     if (SDValue DivRem = useDivRem(N))
3894         return DivRem;
3895 
3896   return SDValue();
3897 }
3898 
3899 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3900   SDLoc DL(N);
3901   EVT VT = N->getValueType(0);
3902   EVT CCVT = getSetCCResultType(VT);
3903   unsigned BitWidth = VT.getScalarSizeInBits();
3904 
3905   // Helper for determining whether a value is a power-2 constant scalar or a
3906   // vector of such elements.
3907   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3908     if (C->isNullValue() || C->isOpaque())
3909       return false;
3910     if (C->getAPIntValue().isPowerOf2())
3911       return true;
3912     if ((-C->getAPIntValue()).isPowerOf2())
3913       return true;
3914     return false;
3915   };
3916 
3917   // fold (sdiv X, pow2) -> simple ops after legalize
3918   // FIXME: We check for the exact bit here because the generic lowering gives
3919   // better results in that case. The target-specific lowering should learn how
3920   // to handle exact sdivs efficiently.
3921   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3922     // Target-specific implementation of sdiv x, pow2.
3923     if (SDValue Res = BuildSDIVPow2(N))
3924       return Res;
3925 
3926     // Create constants that are functions of the shift amount value.
3927     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3928     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3929     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3930     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3931     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3932     if (!isConstantOrConstantVector(Inexact))
3933       return SDValue();
3934 
3935     // Splat the sign bit into the register
3936     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3937                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3938     AddToWorklist(Sign.getNode());
3939 
3940     // Add (N0 < 0) ? abs2 - 1 : 0;
3941     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3942     AddToWorklist(Srl.getNode());
3943     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3944     AddToWorklist(Add.getNode());
3945     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3946     AddToWorklist(Sra.getNode());
3947 
3948     // Special case: (sdiv X, 1) -> X
3949     // Special Case: (sdiv X, -1) -> 0-X
3950     SDValue One = DAG.getConstant(1, DL, VT);
3951     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3952     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3953     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3954     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3955     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3956 
3957     // If dividing by a positive value, we're done. Otherwise, the result must
3958     // be negated.
3959     SDValue Zero = DAG.getConstant(0, DL, VT);
3960     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3961 
3962     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3963     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3964     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3965     return Res;
3966   }
3967 
3968   // If integer divide is expensive and we satisfy the requirements, emit an
3969   // alternate sequence.  Targets may check function attributes for size/speed
3970   // trade-offs.
3971   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3972   if (isConstantOrConstantVector(N1) &&
3973       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3974     if (SDValue Op = BuildSDIV(N))
3975       return Op;
3976 
3977   return SDValue();
3978 }
3979 
3980 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3981   SDValue N0 = N->getOperand(0);
3982   SDValue N1 = N->getOperand(1);
3983   EVT VT = N->getValueType(0);
3984   EVT CCVT = getSetCCResultType(VT);
3985 
3986   // fold vector ops
3987   if (VT.isVector())
3988     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3989       return FoldedVOp;
3990 
3991   SDLoc DL(N);
3992 
3993   // fold (udiv c1, c2) -> c1/c2
3994   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3995   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
3996     return C;
3997 
3998   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3999   if (N1C && N1C->getAPIntValue().isAllOnesValue())
4000     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4001                          DAG.getConstant(1, DL, VT),
4002                          DAG.getConstant(0, DL, VT));
4003 
4004   if (SDValue V = simplifyDivRem(N, DAG))
4005     return V;
4006 
4007   if (SDValue NewSel = foldBinOpIntoSelect(N))
4008     return NewSel;
4009 
4010   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4011     // If the corresponding remainder node exists, update its users with
4012     // (Dividend - (Quotient * Divisor).
4013     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4014                                               { N0, N1 })) {
4015       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4016       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4017       AddToWorklist(Mul.getNode());
4018       AddToWorklist(Sub.getNode());
4019       CombineTo(RemNode, Sub);
4020     }
4021     return V;
4022   }
4023 
4024   // sdiv, srem -> sdivrem
4025   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4026   // true.  Otherwise, we break the simplification logic in visitREM().
4027   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4028   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4029     if (SDValue DivRem = useDivRem(N))
4030         return DivRem;
4031 
4032   return SDValue();
4033 }
4034 
4035 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4036   SDLoc DL(N);
4037   EVT VT = N->getValueType(0);
4038 
4039   // fold (udiv x, (1 << c)) -> x >>u c
4040   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4041       DAG.isKnownToBeAPowerOfTwo(N1)) {
4042     SDValue LogBase2 = BuildLogBase2(N1, DL);
4043     AddToWorklist(LogBase2.getNode());
4044 
4045     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4046     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4047     AddToWorklist(Trunc.getNode());
4048     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4049   }
4050 
4051   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4052   if (N1.getOpcode() == ISD::SHL) {
4053     SDValue N10 = N1.getOperand(0);
4054     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4055         DAG.isKnownToBeAPowerOfTwo(N10)) {
4056       SDValue LogBase2 = BuildLogBase2(N10, DL);
4057       AddToWorklist(LogBase2.getNode());
4058 
4059       EVT ADDVT = N1.getOperand(1).getValueType();
4060       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4061       AddToWorklist(Trunc.getNode());
4062       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4063       AddToWorklist(Add.getNode());
4064       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4065     }
4066   }
4067 
4068   // fold (udiv x, c) -> alternate
4069   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4070   if (isConstantOrConstantVector(N1) &&
4071       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4072     if (SDValue Op = BuildUDIV(N))
4073       return Op;
4074 
4075   return SDValue();
4076 }
4077 
4078 // handles ISD::SREM and ISD::UREM
4079 SDValue DAGCombiner::visitREM(SDNode *N) {
4080   unsigned Opcode = N->getOpcode();
4081   SDValue N0 = N->getOperand(0);
4082   SDValue N1 = N->getOperand(1);
4083   EVT VT = N->getValueType(0);
4084   EVT CCVT = getSetCCResultType(VT);
4085 
4086   bool isSigned = (Opcode == ISD::SREM);
4087   SDLoc DL(N);
4088 
4089   // fold (rem c1, c2) -> c1%c2
4090   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4091   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4092     return C;
4093 
4094   // fold (urem X, -1) -> select(X == -1, 0, x)
4095   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4096     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4097                          DAG.getConstant(0, DL, VT), N0);
4098 
4099   if (SDValue V = simplifyDivRem(N, DAG))
4100     return V;
4101 
4102   if (SDValue NewSel = foldBinOpIntoSelect(N))
4103     return NewSel;
4104 
4105   if (isSigned) {
4106     // If we know the sign bits of both operands are zero, strength reduce to a
4107     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4108     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4109       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4110   } else {
4111     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4112     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4113       // fold (urem x, pow2) -> (and x, pow2-1)
4114       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4115       AddToWorklist(Add.getNode());
4116       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4117     }
4118     if (N1.getOpcode() == ISD::SHL &&
4119         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4120       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4121       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4122       AddToWorklist(Add.getNode());
4123       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4124     }
4125   }
4126 
4127   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4128 
4129   // If X/C can be simplified by the division-by-constant logic, lower
4130   // X%C to the equivalent of X-X/C*C.
4131   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4132   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4133   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4134   // combine will not return a DIVREM.  Regardless, checking cheapness here
4135   // makes sense since the simplification results in fatter code.
4136   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4137     SDValue OptimizedDiv =
4138         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4139     if (OptimizedDiv.getNode()) {
4140       // If the equivalent Div node also exists, update its users.
4141       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4142       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4143                                                 { N0, N1 }))
4144         CombineTo(DivNode, OptimizedDiv);
4145       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4146       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4147       AddToWorklist(OptimizedDiv.getNode());
4148       AddToWorklist(Mul.getNode());
4149       return Sub;
4150     }
4151   }
4152 
4153   // sdiv, srem -> sdivrem
4154   if (SDValue DivRem = useDivRem(N))
4155     return DivRem.getValue(1);
4156 
4157   return SDValue();
4158 }
4159 
4160 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4161   SDValue N0 = N->getOperand(0);
4162   SDValue N1 = N->getOperand(1);
4163   EVT VT = N->getValueType(0);
4164   SDLoc DL(N);
4165 
4166   if (VT.isVector()) {
4167     // fold (mulhs x, 0) -> 0
4168     // do not return N0/N1, because undef node may exist.
4169     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4170         ISD::isBuildVectorAllZeros(N1.getNode()))
4171       return DAG.getConstant(0, DL, VT);
4172   }
4173 
4174   // fold (mulhs x, 0) -> 0
4175   if (isNullConstant(N1))
4176     return N1;
4177   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4178   if (isOneConstant(N1))
4179     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4180                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4181                                        getShiftAmountTy(N0.getValueType())));
4182 
4183   // fold (mulhs x, undef) -> 0
4184   if (N0.isUndef() || N1.isUndef())
4185     return DAG.getConstant(0, DL, VT);
4186 
4187   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4188   // plus a shift.
4189   if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
4190     MVT Simple = VT.getSimpleVT();
4191     unsigned SimpleSize = Simple.getSizeInBits();
4192     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4193     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4194       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4195       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4196       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4197       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4198             DAG.getConstant(SimpleSize, DL,
4199                             getShiftAmountTy(N1.getValueType())));
4200       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4201     }
4202   }
4203 
4204   return SDValue();
4205 }
4206 
4207 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4208   SDValue N0 = N->getOperand(0);
4209   SDValue N1 = N->getOperand(1);
4210   EVT VT = N->getValueType(0);
4211   SDLoc DL(N);
4212 
4213   if (VT.isVector()) {
4214     // fold (mulhu x, 0) -> 0
4215     // do not return N0/N1, because undef node may exist.
4216     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4217         ISD::isBuildVectorAllZeros(N1.getNode()))
4218       return DAG.getConstant(0, DL, VT);
4219   }
4220 
4221   // fold (mulhu x, 0) -> 0
4222   if (isNullConstant(N1))
4223     return N1;
4224   // fold (mulhu x, 1) -> 0
4225   if (isOneConstant(N1))
4226     return DAG.getConstant(0, DL, N0.getValueType());
4227   // fold (mulhu x, undef) -> 0
4228   if (N0.isUndef() || N1.isUndef())
4229     return DAG.getConstant(0, DL, VT);
4230 
4231   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4232   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4233       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4234     unsigned NumEltBits = VT.getScalarSizeInBits();
4235     SDValue LogBase2 = BuildLogBase2(N1, DL);
4236     SDValue SRLAmt = DAG.getNode(
4237         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4238     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4239     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4240     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4241   }
4242 
4243   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4244   // plus a shift.
4245   if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
4246     MVT Simple = VT.getSimpleVT();
4247     unsigned SimpleSize = Simple.getSizeInBits();
4248     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4249     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4250       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4251       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4252       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4253       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4254             DAG.getConstant(SimpleSize, DL,
4255                             getShiftAmountTy(N1.getValueType())));
4256       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4257     }
4258   }
4259 
4260   return SDValue();
4261 }
4262 
4263 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4264 /// give the opcodes for the two computations that are being performed. Return
4265 /// true if a simplification was made.
4266 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4267                                                 unsigned HiOp) {
4268   // If the high half is not needed, just compute the low half.
4269   bool HiExists = N->hasAnyUseOfValue(1);
4270   if (!HiExists && (!LegalOperations ||
4271                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4272     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4273     return CombineTo(N, Res, Res);
4274   }
4275 
4276   // If the low half is not needed, just compute the high half.
4277   bool LoExists = N->hasAnyUseOfValue(0);
4278   if (!LoExists && (!LegalOperations ||
4279                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4280     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4281     return CombineTo(N, Res, Res);
4282   }
4283 
4284   // If both halves are used, return as it is.
4285   if (LoExists && HiExists)
4286     return SDValue();
4287 
4288   // If the two computed results can be simplified separately, separate them.
4289   if (LoExists) {
4290     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4291     AddToWorklist(Lo.getNode());
4292     SDValue LoOpt = combine(Lo.getNode());
4293     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4294         (!LegalOperations ||
4295          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4296       return CombineTo(N, LoOpt, LoOpt);
4297   }
4298 
4299   if (HiExists) {
4300     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4301     AddToWorklist(Hi.getNode());
4302     SDValue HiOpt = combine(Hi.getNode());
4303     if (HiOpt.getNode() && HiOpt != Hi &&
4304         (!LegalOperations ||
4305          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4306       return CombineTo(N, HiOpt, HiOpt);
4307   }
4308 
4309   return SDValue();
4310 }
4311 
4312 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4313   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4314     return Res;
4315 
4316   EVT VT = N->getValueType(0);
4317   SDLoc DL(N);
4318 
4319   // If the type is twice as wide is legal, transform the mulhu to a wider
4320   // multiply plus a shift.
4321   if (VT.isSimple() && !VT.isVector()) {
4322     MVT Simple = VT.getSimpleVT();
4323     unsigned SimpleSize = Simple.getSizeInBits();
4324     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4325     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4326       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4327       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4328       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4329       // Compute the high part as N1.
4330       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4331             DAG.getConstant(SimpleSize, DL,
4332                             getShiftAmountTy(Lo.getValueType())));
4333       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4334       // Compute the low part as N0.
4335       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4336       return CombineTo(N, Lo, Hi);
4337     }
4338   }
4339 
4340   return SDValue();
4341 }
4342 
4343 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4344   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4345     return Res;
4346 
4347   EVT VT = N->getValueType(0);
4348   SDLoc DL(N);
4349 
4350   // (umul_lohi N0, 0) -> (0, 0)
4351   if (isNullConstant(N->getOperand(1))) {
4352     SDValue Zero = DAG.getConstant(0, DL, VT);
4353     return CombineTo(N, Zero, Zero);
4354   }
4355 
4356   // (umul_lohi N0, 1) -> (N0, 0)
4357   if (isOneConstant(N->getOperand(1))) {
4358     SDValue Zero = DAG.getConstant(0, DL, VT);
4359     return CombineTo(N, N->getOperand(0), Zero);
4360   }
4361 
4362   // If the type is twice as wide is legal, transform the mulhu to a wider
4363   // multiply plus a shift.
4364   if (VT.isSimple() && !VT.isVector()) {
4365     MVT Simple = VT.getSimpleVT();
4366     unsigned SimpleSize = Simple.getSizeInBits();
4367     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4368     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4369       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4370       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4371       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4372       // Compute the high part as N1.
4373       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4374             DAG.getConstant(SimpleSize, DL,
4375                             getShiftAmountTy(Lo.getValueType())));
4376       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4377       // Compute the low part as N0.
4378       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4379       return CombineTo(N, Lo, Hi);
4380     }
4381   }
4382 
4383   return SDValue();
4384 }
4385 
4386 SDValue DAGCombiner::visitMULO(SDNode *N) {
4387   SDValue N0 = N->getOperand(0);
4388   SDValue N1 = N->getOperand(1);
4389   EVT VT = N0.getValueType();
4390   bool IsSigned = (ISD::SMULO == N->getOpcode());
4391 
4392   EVT CarryVT = N->getValueType(1);
4393   SDLoc DL(N);
4394 
4395   // canonicalize constant to RHS.
4396   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4397       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4398     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4399 
4400   // fold (mulo x, 0) -> 0 + no carry out
4401   if (isNullOrNullSplat(N1))
4402     return CombineTo(N, DAG.getConstant(0, DL, VT),
4403                      DAG.getConstant(0, DL, CarryVT));
4404 
4405   // (mulo x, 2) -> (addo x, x)
4406   if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
4407     if (C2->getAPIntValue() == 2)
4408       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4409                          N->getVTList(), N0, N0);
4410 
4411   return SDValue();
4412 }
4413 
4414 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4415   SDValue N0 = N->getOperand(0);
4416   SDValue N1 = N->getOperand(1);
4417   EVT VT = N0.getValueType();
4418   unsigned Opcode = N->getOpcode();
4419 
4420   // fold vector ops
4421   if (VT.isVector())
4422     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4423       return FoldedVOp;
4424 
4425   // fold operation with constant operands.
4426   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4427     return C;
4428 
4429   // canonicalize constant to RHS
4430   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4431       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4432     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4433 
4434   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4435   // Only do this if the current op isn't legal and the flipped is.
4436   if (!TLI.isOperationLegal(Opcode, VT) &&
4437       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4438       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4439     unsigned AltOpcode;
4440     switch (Opcode) {
4441     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4442     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4443     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4444     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4445     default: llvm_unreachable("Unknown MINMAX opcode");
4446     }
4447     if (TLI.isOperationLegal(AltOpcode, VT))
4448       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4449   }
4450 
4451   return SDValue();
4452 }
4453 
4454 /// If this is a bitwise logic instruction and both operands have the same
4455 /// opcode, try to sink the other opcode after the logic instruction.
4456 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4457   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4458   EVT VT = N0.getValueType();
4459   unsigned LogicOpcode = N->getOpcode();
4460   unsigned HandOpcode = N0.getOpcode();
4461   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4462           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4463   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4464 
4465   // Bail early if none of these transforms apply.
4466   if (N0.getNumOperands() == 0)
4467     return SDValue();
4468 
4469   // FIXME: We should check number of uses of the operands to not increase
4470   //        the instruction count for all transforms.
4471 
4472   // Handle size-changing casts.
4473   SDValue X = N0.getOperand(0);
4474   SDValue Y = N1.getOperand(0);
4475   EVT XVT = X.getValueType();
4476   SDLoc DL(N);
4477   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4478       HandOpcode == ISD::SIGN_EXTEND) {
4479     // If both operands have other uses, this transform would create extra
4480     // instructions without eliminating anything.
4481     if (!N0.hasOneUse() && !N1.hasOneUse())
4482       return SDValue();
4483     // We need matching integer source types.
4484     if (XVT != Y.getValueType())
4485       return SDValue();
4486     // Don't create an illegal op during or after legalization. Don't ever
4487     // create an unsupported vector op.
4488     if ((VT.isVector() || LegalOperations) &&
4489         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4490       return SDValue();
4491     // Avoid infinite looping with PromoteIntBinOp.
4492     // TODO: Should we apply desirable/legal constraints to all opcodes?
4493     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4494         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4495       return SDValue();
4496     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4497     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4498     return DAG.getNode(HandOpcode, DL, VT, Logic);
4499   }
4500 
4501   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4502   if (HandOpcode == ISD::TRUNCATE) {
4503     // If both operands have other uses, this transform would create extra
4504     // instructions without eliminating anything.
4505     if (!N0.hasOneUse() && !N1.hasOneUse())
4506       return SDValue();
4507     // We need matching source types.
4508     if (XVT != Y.getValueType())
4509       return SDValue();
4510     // Don't create an illegal op during or after legalization.
4511     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4512       return SDValue();
4513     // Be extra careful sinking truncate. If it's free, there's no benefit in
4514     // widening a binop. Also, don't create a logic op on an illegal type.
4515     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4516       return SDValue();
4517     if (!TLI.isTypeLegal(XVT))
4518       return SDValue();
4519     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4520     return DAG.getNode(HandOpcode, DL, VT, Logic);
4521   }
4522 
4523   // For binops SHL/SRL/SRA/AND:
4524   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4525   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4526        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4527       N0.getOperand(1) == N1.getOperand(1)) {
4528     // If either operand has other uses, this transform is not an improvement.
4529     if (!N0.hasOneUse() || !N1.hasOneUse())
4530       return SDValue();
4531     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4532     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4533   }
4534 
4535   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4536   if (HandOpcode == ISD::BSWAP) {
4537     // If either operand has other uses, this transform is not an improvement.
4538     if (!N0.hasOneUse() || !N1.hasOneUse())
4539       return SDValue();
4540     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4541     return DAG.getNode(HandOpcode, DL, VT, Logic);
4542   }
4543 
4544   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4545   // Only perform this optimization up until type legalization, before
4546   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4547   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4548   // we don't want to undo this promotion.
4549   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4550   // on scalars.
4551   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4552        Level <= AfterLegalizeTypes) {
4553     // Input types must be integer and the same.
4554     if (XVT.isInteger() && XVT == Y.getValueType() &&
4555         !(VT.isVector() && TLI.isTypeLegal(VT) &&
4556           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4557       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4558       return DAG.getNode(HandOpcode, DL, VT, Logic);
4559     }
4560   }
4561 
4562   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4563   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4564   // If both shuffles use the same mask, and both shuffle within a single
4565   // vector, then it is worthwhile to move the swizzle after the operation.
4566   // The type-legalizer generates this pattern when loading illegal
4567   // vector types from memory. In many cases this allows additional shuffle
4568   // optimizations.
4569   // There are other cases where moving the shuffle after the xor/and/or
4570   // is profitable even if shuffles don't perform a swizzle.
4571   // If both shuffles use the same mask, and both shuffles have the same first
4572   // or second operand, then it might still be profitable to move the shuffle
4573   // after the xor/and/or operation.
4574   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4575     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4576     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4577     assert(X.getValueType() == Y.getValueType() &&
4578            "Inputs to shuffles are not the same type");
4579 
4580     // Check that both shuffles use the same mask. The masks are known to be of
4581     // the same length because the result vector type is the same.
4582     // Check also that shuffles have only one use to avoid introducing extra
4583     // instructions.
4584     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4585         !SVN0->getMask().equals(SVN1->getMask()))
4586       return SDValue();
4587 
4588     // Don't try to fold this node if it requires introducing a
4589     // build vector of all zeros that might be illegal at this stage.
4590     SDValue ShOp = N0.getOperand(1);
4591     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4592       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4593 
4594     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4595     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4596       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4597                                   N0.getOperand(0), N1.getOperand(0));
4598       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4599     }
4600 
4601     // Don't try to fold this node if it requires introducing a
4602     // build vector of all zeros that might be illegal at this stage.
4603     ShOp = N0.getOperand(0);
4604     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4605       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4606 
4607     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4608     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4609       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4610                                   N1.getOperand(1));
4611       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4612     }
4613   }
4614 
4615   return SDValue();
4616 }
4617 
4618 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4619 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4620                                        const SDLoc &DL) {
4621   SDValue LL, LR, RL, RR, N0CC, N1CC;
4622   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4623       !isSetCCEquivalent(N1, RL, RR, N1CC))
4624     return SDValue();
4625 
4626   assert(N0.getValueType() == N1.getValueType() &&
4627          "Unexpected operand types for bitwise logic op");
4628   assert(LL.getValueType() == LR.getValueType() &&
4629          RL.getValueType() == RR.getValueType() &&
4630          "Unexpected operand types for setcc");
4631 
4632   // If we're here post-legalization or the logic op type is not i1, the logic
4633   // op type must match a setcc result type. Also, all folds require new
4634   // operations on the left and right operands, so those types must match.
4635   EVT VT = N0.getValueType();
4636   EVT OpVT = LL.getValueType();
4637   if (LegalOperations || VT.getScalarType() != MVT::i1)
4638     if (VT != getSetCCResultType(OpVT))
4639       return SDValue();
4640   if (OpVT != RL.getValueType())
4641     return SDValue();
4642 
4643   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4644   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4645   bool IsInteger = OpVT.isInteger();
4646   if (LR == RR && CC0 == CC1 && IsInteger) {
4647     bool IsZero = isNullOrNullSplat(LR);
4648     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4649 
4650     // All bits clear?
4651     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4652     // All sign bits clear?
4653     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4654     // Any bits set?
4655     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4656     // Any sign bits set?
4657     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4658 
4659     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4660     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4661     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4662     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4663     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4664       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4665       AddToWorklist(Or.getNode());
4666       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4667     }
4668 
4669     // All bits set?
4670     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4671     // All sign bits set?
4672     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4673     // Any bits clear?
4674     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4675     // Any sign bits clear?
4676     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4677 
4678     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4679     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4680     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4681     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4682     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4683       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4684       AddToWorklist(And.getNode());
4685       return DAG.getSetCC(DL, VT, And, LR, CC1);
4686     }
4687   }
4688 
4689   // TODO: What is the 'or' equivalent of this fold?
4690   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4691   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4692       IsInteger && CC0 == ISD::SETNE &&
4693       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4694        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4695     SDValue One = DAG.getConstant(1, DL, OpVT);
4696     SDValue Two = DAG.getConstant(2, DL, OpVT);
4697     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4698     AddToWorklist(Add.getNode());
4699     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4700   }
4701 
4702   // Try more general transforms if the predicates match and the only user of
4703   // the compares is the 'and' or 'or'.
4704   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4705       N0.hasOneUse() && N1.hasOneUse()) {
4706     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4707     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4708     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4709       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4710       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4711       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4712       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4713       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4714     }
4715 
4716     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4717     // TODO - support non-uniform vector amounts.
4718     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4719       // Match a shared variable operand and 2 non-opaque constant operands.
4720       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4721       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4722       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4723         // Canonicalize larger constant as C0.
4724         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4725           std::swap(C0, C1);
4726 
4727         // The difference of the constants must be a single bit.
4728         const APInt &C0Val = C0->getAPIntValue();
4729         const APInt &C1Val = C1->getAPIntValue();
4730         if ((C0Val - C1Val).isPowerOf2()) {
4731           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4732           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4733           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4734           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4735           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4736           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4737           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4738           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4739         }
4740       }
4741     }
4742   }
4743 
4744   // Canonicalize equivalent operands to LL == RL.
4745   if (LL == RR && LR == RL) {
4746     CC1 = ISD::getSetCCSwappedOperands(CC1);
4747     std::swap(RL, RR);
4748   }
4749 
4750   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4751   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4752   if (LL == RL && LR == RR) {
4753     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
4754                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
4755     if (NewCC != ISD::SETCC_INVALID &&
4756         (!LegalOperations ||
4757          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4758           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4759       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4760   }
4761 
4762   return SDValue();
4763 }
4764 
4765 /// This contains all DAGCombine rules which reduce two values combined by
4766 /// an And operation to a single value. This makes them reusable in the context
4767 /// of visitSELECT(). Rules involving constants are not included as
4768 /// visitSELECT() already handles those cases.
4769 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4770   EVT VT = N1.getValueType();
4771   SDLoc DL(N);
4772 
4773   // fold (and x, undef) -> 0
4774   if (N0.isUndef() || N1.isUndef())
4775     return DAG.getConstant(0, DL, VT);
4776 
4777   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4778     return V;
4779 
4780   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4781       VT.getSizeInBits() <= 64) {
4782     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4783       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4784         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4785         // immediate for an add, but it is legal if its top c2 bits are set,
4786         // transform the ADD so the immediate doesn't need to be materialized
4787         // in a register.
4788         APInt ADDC = ADDI->getAPIntValue();
4789         APInt SRLC = SRLI->getAPIntValue();
4790         if (ADDC.getMinSignedBits() <= 64 &&
4791             SRLC.ult(VT.getSizeInBits()) &&
4792             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4793           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4794                                              SRLC.getZExtValue());
4795           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4796             ADDC |= Mask;
4797             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4798               SDLoc DL0(N0);
4799               SDValue NewAdd =
4800                 DAG.getNode(ISD::ADD, DL0, VT,
4801                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4802               CombineTo(N0.getNode(), NewAdd);
4803               // Return N so it doesn't get rechecked!
4804               return SDValue(N, 0);
4805             }
4806           }
4807         }
4808       }
4809     }
4810   }
4811 
4812   // Reduce bit extract of low half of an integer to the narrower type.
4813   // (and (srl i64:x, K), KMask) ->
4814   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4815   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4816     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4817       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4818         unsigned Size = VT.getSizeInBits();
4819         const APInt &AndMask = CAnd->getAPIntValue();
4820         unsigned ShiftBits = CShift->getZExtValue();
4821 
4822         // Bail out, this node will probably disappear anyway.
4823         if (ShiftBits == 0)
4824           return SDValue();
4825 
4826         unsigned MaskBits = AndMask.countTrailingOnes();
4827         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4828 
4829         if (AndMask.isMask() &&
4830             // Required bits must not span the two halves of the integer and
4831             // must fit in the half size type.
4832             (ShiftBits + MaskBits <= Size / 2) &&
4833             TLI.isNarrowingProfitable(VT, HalfVT) &&
4834             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4835             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4836             TLI.isTruncateFree(VT, HalfVT) &&
4837             TLI.isZExtFree(HalfVT, VT)) {
4838           // The isNarrowingProfitable is to avoid regressions on PPC and
4839           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4840           // on downstream users of this. Those patterns could probably be
4841           // extended to handle extensions mixed in.
4842 
4843           SDValue SL(N0);
4844           assert(MaskBits <= Size);
4845 
4846           // Extracting the highest bit of the low half.
4847           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4848           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4849                                       N0.getOperand(0));
4850 
4851           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4852           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4853           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4854           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4855           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4856         }
4857       }
4858     }
4859   }
4860 
4861   return SDValue();
4862 }
4863 
4864 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4865                                    EVT LoadResultTy, EVT &ExtVT) {
4866   if (!AndC->getAPIntValue().isMask())
4867     return false;
4868 
4869   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4870 
4871   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4872   EVT LoadedVT = LoadN->getMemoryVT();
4873 
4874   if (ExtVT == LoadedVT &&
4875       (!LegalOperations ||
4876        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4877     // ZEXTLOAD will match without needing to change the size of the value being
4878     // loaded.
4879     return true;
4880   }
4881 
4882   // Do not change the width of a volatile or atomic loads.
4883   if (!LoadN->isSimple())
4884     return false;
4885 
4886   // Do not generate loads of non-round integer types since these can
4887   // be expensive (and would be wrong if the type is not byte sized).
4888   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4889     return false;
4890 
4891   if (LegalOperations &&
4892       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4893     return false;
4894 
4895   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4896     return false;
4897 
4898   return true;
4899 }
4900 
4901 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4902                                     ISD::LoadExtType ExtType, EVT &MemVT,
4903                                     unsigned ShAmt) {
4904   if (!LDST)
4905     return false;
4906   // Only allow byte offsets.
4907   if (ShAmt % 8)
4908     return false;
4909 
4910   // Do not generate loads of non-round integer types since these can
4911   // be expensive (and would be wrong if the type is not byte sized).
4912   if (!MemVT.isRound())
4913     return false;
4914 
4915   // Don't change the width of a volatile or atomic loads.
4916   if (!LDST->isSimple())
4917     return false;
4918 
4919   // Verify that we are actually reducing a load width here.
4920   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4921     return false;
4922 
4923   // Ensure that this isn't going to produce an unsupported memory access.
4924   if (ShAmt) {
4925     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
4926     const unsigned ByteShAmt = ShAmt / 8;
4927     const Align LDSTAlign = LDST->getAlign();
4928     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
4929     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4930                                 LDST->getAddressSpace(), NarrowAlign,
4931                                 LDST->getMemOperand()->getFlags()))
4932       return false;
4933   }
4934 
4935   // It's not possible to generate a constant of extended or untyped type.
4936   EVT PtrType = LDST->getBasePtr().getValueType();
4937   if (PtrType == MVT::Untyped || PtrType.isExtended())
4938     return false;
4939 
4940   if (isa<LoadSDNode>(LDST)) {
4941     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4942     // Don't transform one with multiple uses, this would require adding a new
4943     // load.
4944     if (!SDValue(Load, 0).hasOneUse())
4945       return false;
4946 
4947     if (LegalOperations &&
4948         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4949       return false;
4950 
4951     // For the transform to be legal, the load must produce only two values
4952     // (the value loaded and the chain).  Don't transform a pre-increment
4953     // load, for example, which produces an extra value.  Otherwise the
4954     // transformation is not equivalent, and the downstream logic to replace
4955     // uses gets things wrong.
4956     if (Load->getNumValues() > 2)
4957       return false;
4958 
4959     // If the load that we're shrinking is an extload and we're not just
4960     // discarding the extension we can't simply shrink the load. Bail.
4961     // TODO: It would be possible to merge the extensions in some cases.
4962     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4963         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4964       return false;
4965 
4966     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4967       return false;
4968   } else {
4969     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4970     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4971     // Can't write outside the original store
4972     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4973       return false;
4974 
4975     if (LegalOperations &&
4976         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4977       return false;
4978   }
4979   return true;
4980 }
4981 
4982 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4983                                     SmallVectorImpl<LoadSDNode*> &Loads,
4984                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4985                                     ConstantSDNode *Mask,
4986                                     SDNode *&NodeToMask) {
4987   // Recursively search for the operands, looking for loads which can be
4988   // narrowed.
4989   for (SDValue Op : N->op_values()) {
4990     if (Op.getValueType().isVector())
4991       return false;
4992 
4993     // Some constants may need fixing up later if they are too large.
4994     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4995       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4996           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4997         NodesWithConsts.insert(N);
4998       continue;
4999     }
5000 
5001     if (!Op.hasOneUse())
5002       return false;
5003 
5004     switch(Op.getOpcode()) {
5005     case ISD::LOAD: {
5006       auto *Load = cast<LoadSDNode>(Op);
5007       EVT ExtVT;
5008       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5009           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5010 
5011         // ZEXTLOAD is already small enough.
5012         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5013             ExtVT.bitsGE(Load->getMemoryVT()))
5014           continue;
5015 
5016         // Use LE to convert equal sized loads to zext.
5017         if (ExtVT.bitsLE(Load->getMemoryVT()))
5018           Loads.push_back(Load);
5019 
5020         continue;
5021       }
5022       return false;
5023     }
5024     case ISD::ZERO_EXTEND:
5025     case ISD::AssertZext: {
5026       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5027       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5028       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5029         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5030         Op.getOperand(0).getValueType();
5031 
5032       // We can accept extending nodes if the mask is wider or an equal
5033       // width to the original type.
5034       if (ExtVT.bitsGE(VT))
5035         continue;
5036       break;
5037     }
5038     case ISD::OR:
5039     case ISD::XOR:
5040     case ISD::AND:
5041       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5042                              NodeToMask))
5043         return false;
5044       continue;
5045     }
5046 
5047     // Allow one node which will masked along with any loads found.
5048     if (NodeToMask)
5049       return false;
5050 
5051     // Also ensure that the node to be masked only produces one data result.
5052     NodeToMask = Op.getNode();
5053     if (NodeToMask->getNumValues() > 1) {
5054       bool HasValue = false;
5055       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5056         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5057         if (VT != MVT::Glue && VT != MVT::Other) {
5058           if (HasValue) {
5059             NodeToMask = nullptr;
5060             return false;
5061           }
5062           HasValue = true;
5063         }
5064       }
5065       assert(HasValue && "Node to be masked has no data result?");
5066     }
5067   }
5068   return true;
5069 }
5070 
5071 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5072   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5073   if (!Mask)
5074     return false;
5075 
5076   if (!Mask->getAPIntValue().isMask())
5077     return false;
5078 
5079   // No need to do anything if the and directly uses a load.
5080   if (isa<LoadSDNode>(N->getOperand(0)))
5081     return false;
5082 
5083   SmallVector<LoadSDNode*, 8> Loads;
5084   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5085   SDNode *FixupNode = nullptr;
5086   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5087     if (Loads.size() == 0)
5088       return false;
5089 
5090     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5091     SDValue MaskOp = N->getOperand(1);
5092 
5093     // If it exists, fixup the single node we allow in the tree that needs
5094     // masking.
5095     if (FixupNode) {
5096       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5097       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5098                                 FixupNode->getValueType(0),
5099                                 SDValue(FixupNode, 0), MaskOp);
5100       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5101       if (And.getOpcode() == ISD ::AND)
5102         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5103     }
5104 
5105     // Narrow any constants that need it.
5106     for (auto *LogicN : NodesWithConsts) {
5107       SDValue Op0 = LogicN->getOperand(0);
5108       SDValue Op1 = LogicN->getOperand(1);
5109 
5110       if (isa<ConstantSDNode>(Op0))
5111           std::swap(Op0, Op1);
5112 
5113       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5114                                 Op1, MaskOp);
5115 
5116       DAG.UpdateNodeOperands(LogicN, Op0, And);
5117     }
5118 
5119     // Create narrow loads.
5120     for (auto *Load : Loads) {
5121       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5122       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5123                                 SDValue(Load, 0), MaskOp);
5124       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5125       if (And.getOpcode() == ISD ::AND)
5126         And = SDValue(
5127             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5128       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5129       assert(NewLoad &&
5130              "Shouldn't be masking the load if it can't be narrowed");
5131       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5132     }
5133     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5134     return true;
5135   }
5136   return false;
5137 }
5138 
5139 // Unfold
5140 //    x &  (-1 'logical shift' y)
5141 // To
5142 //    (x 'opposite logical shift' y) 'logical shift' y
5143 // if it is better for performance.
5144 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5145   assert(N->getOpcode() == ISD::AND);
5146 
5147   SDValue N0 = N->getOperand(0);
5148   SDValue N1 = N->getOperand(1);
5149 
5150   // Do we actually prefer shifts over mask?
5151   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5152     return SDValue();
5153 
5154   // Try to match  (-1 '[outer] logical shift' y)
5155   unsigned OuterShift;
5156   unsigned InnerShift; // The opposite direction to the OuterShift.
5157   SDValue Y;           // Shift amount.
5158   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5159     if (!M.hasOneUse())
5160       return false;
5161     OuterShift = M->getOpcode();
5162     if (OuterShift == ISD::SHL)
5163       InnerShift = ISD::SRL;
5164     else if (OuterShift == ISD::SRL)
5165       InnerShift = ISD::SHL;
5166     else
5167       return false;
5168     if (!isAllOnesConstant(M->getOperand(0)))
5169       return false;
5170     Y = M->getOperand(1);
5171     return true;
5172   };
5173 
5174   SDValue X;
5175   if (matchMask(N1))
5176     X = N0;
5177   else if (matchMask(N0))
5178     X = N1;
5179   else
5180     return SDValue();
5181 
5182   SDLoc DL(N);
5183   EVT VT = N->getValueType(0);
5184 
5185   //     tmp = x   'opposite logical shift' y
5186   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5187   //     ret = tmp 'logical shift' y
5188   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5189 
5190   return T1;
5191 }
5192 
5193 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5194 /// For a target with a bit test, this is expected to become test + set and save
5195 /// at least 1 instruction.
5196 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5197   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5198 
5199   // This is probably not worthwhile without a supported type.
5200   EVT VT = And->getValueType(0);
5201   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5202   if (!TLI.isTypeLegal(VT))
5203     return SDValue();
5204 
5205   // Look through an optional extension and find a 'not'.
5206   // TODO: Should we favor test+set even without the 'not' op?
5207   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5208   if (Not.getOpcode() == ISD::ANY_EXTEND)
5209     Not = Not.getOperand(0);
5210   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5211     return SDValue();
5212 
5213   // Look though an optional truncation. The source operand may not be the same
5214   // type as the original 'and', but that is ok because we are masking off
5215   // everything but the low bit.
5216   SDValue Srl = Not.getOperand(0);
5217   if (Srl.getOpcode() == ISD::TRUNCATE)
5218     Srl = Srl.getOperand(0);
5219 
5220   // Match a shift-right by constant.
5221   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5222       !isa<ConstantSDNode>(Srl.getOperand(1)))
5223     return SDValue();
5224 
5225   // We might have looked through casts that make this transform invalid.
5226   // TODO: If the source type is wider than the result type, do the mask and
5227   //       compare in the source type.
5228   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5229   unsigned VTBitWidth = VT.getSizeInBits();
5230   if (ShiftAmt.uge(VTBitWidth))
5231     return SDValue();
5232 
5233   // Turn this into a bit-test pattern using mask op + setcc:
5234   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5235   SDLoc DL(And);
5236   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5237   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5238   SDValue Mask = DAG.getConstant(
5239       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5240   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5241   SDValue Zero = DAG.getConstant(0, DL, VT);
5242   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5243   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5244 }
5245 
5246 SDValue DAGCombiner::visitAND(SDNode *N) {
5247   SDValue N0 = N->getOperand(0);
5248   SDValue N1 = N->getOperand(1);
5249   EVT VT = N1.getValueType();
5250 
5251   // x & x --> x
5252   if (N0 == N1)
5253     return N0;
5254 
5255   // fold vector ops
5256   if (VT.isVector()) {
5257     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5258       return FoldedVOp;
5259 
5260     // fold (and x, 0) -> 0, vector edition
5261     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5262       // do not return N0, because undef node may exist in N0
5263       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5264                              SDLoc(N), N0.getValueType());
5265     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5266       // do not return N1, because undef node may exist in N1
5267       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5268                              SDLoc(N), N1.getValueType());
5269 
5270     // fold (and x, -1) -> x, vector edition
5271     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5272       return N1;
5273     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5274       return N0;
5275   }
5276 
5277   // fold (and c1, c2) -> c1&c2
5278   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5279   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5280     return C;
5281 
5282   // canonicalize constant to RHS
5283   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5284       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5285     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5286 
5287   // fold (and x, -1) -> x
5288   if (isAllOnesConstant(N1))
5289     return N0;
5290 
5291   // if (and x, c) is known to be zero, return 0
5292   unsigned BitWidth = VT.getScalarSizeInBits();
5293   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5294                                    APInt::getAllOnesValue(BitWidth)))
5295     return DAG.getConstant(0, SDLoc(N), VT);
5296 
5297   if (SDValue NewSel = foldBinOpIntoSelect(N))
5298     return NewSel;
5299 
5300   // reassociate and
5301   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5302     return RAND;
5303 
5304   // Try to convert a constant mask AND into a shuffle clear mask.
5305   if (VT.isVector())
5306     if (SDValue Shuffle = XformToShuffleWithZero(N))
5307       return Shuffle;
5308 
5309   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5310     return Combined;
5311 
5312   // fold (and (or x, C), D) -> D if (C & D) == D
5313   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5314     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5315   };
5316   if (N0.getOpcode() == ISD::OR &&
5317       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5318     return N1;
5319   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5320   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5321     SDValue N0Op0 = N0.getOperand(0);
5322     APInt Mask = ~N1C->getAPIntValue();
5323     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5324     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5325       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5326                                  N0.getValueType(), N0Op0);
5327 
5328       // Replace uses of the AND with uses of the Zero extend node.
5329       CombineTo(N, Zext);
5330 
5331       // We actually want to replace all uses of the any_extend with the
5332       // zero_extend, to avoid duplicating things.  This will later cause this
5333       // AND to be folded.
5334       CombineTo(N0.getNode(), Zext);
5335       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5336     }
5337   }
5338 
5339   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5340   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5341   // already be zero by virtue of the width of the base type of the load.
5342   //
5343   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5344   // more cases.
5345   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5346        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5347        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5348        N0.getOperand(0).getResNo() == 0) ||
5349       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5350     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5351                                          N0 : N0.getOperand(0) );
5352 
5353     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5354     // This can be a pure constant or a vector splat, in which case we treat the
5355     // vector as a scalar and use the splat value.
5356     APInt Constant = APInt::getNullValue(1);
5357     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5358       Constant = C->getAPIntValue();
5359     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5360       APInt SplatValue, SplatUndef;
5361       unsigned SplatBitSize;
5362       bool HasAnyUndefs;
5363       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5364                                              SplatBitSize, HasAnyUndefs);
5365       if (IsSplat) {
5366         // Undef bits can contribute to a possible optimisation if set, so
5367         // set them.
5368         SplatValue |= SplatUndef;
5369 
5370         // The splat value may be something like "0x00FFFFFF", which means 0 for
5371         // the first vector value and FF for the rest, repeating. We need a mask
5372         // that will apply equally to all members of the vector, so AND all the
5373         // lanes of the constant together.
5374         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5375 
5376         // If the splat value has been compressed to a bitlength lower
5377         // than the size of the vector lane, we need to re-expand it to
5378         // the lane size.
5379         if (EltBitWidth > SplatBitSize)
5380           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5381                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5382             SplatValue |= SplatValue.shl(SplatBitSize);
5383 
5384         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5385         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5386         if ((SplatBitSize % EltBitWidth) == 0) {
5387           Constant = APInt::getAllOnesValue(EltBitWidth);
5388           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5389             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5390         }
5391       }
5392     }
5393 
5394     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5395     // actually legal and isn't going to get expanded, else this is a false
5396     // optimisation.
5397     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5398                                                     Load->getValueType(0),
5399                                                     Load->getMemoryVT());
5400 
5401     // Resize the constant to the same size as the original memory access before
5402     // extension. If it is still the AllOnesValue then this AND is completely
5403     // unneeded.
5404     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5405 
5406     bool B;
5407     switch (Load->getExtensionType()) {
5408     default: B = false; break;
5409     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5410     case ISD::ZEXTLOAD:
5411     case ISD::NON_EXTLOAD: B = true; break;
5412     }
5413 
5414     if (B && Constant.isAllOnesValue()) {
5415       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5416       // preserve semantics once we get rid of the AND.
5417       SDValue NewLoad(Load, 0);
5418 
5419       // Fold the AND away. NewLoad may get replaced immediately.
5420       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5421 
5422       if (Load->getExtensionType() == ISD::EXTLOAD) {
5423         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5424                               Load->getValueType(0), SDLoc(Load),
5425                               Load->getChain(), Load->getBasePtr(),
5426                               Load->getOffset(), Load->getMemoryVT(),
5427                               Load->getMemOperand());
5428         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5429         if (Load->getNumValues() == 3) {
5430           // PRE/POST_INC loads have 3 values.
5431           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5432                            NewLoad.getValue(2) };
5433           CombineTo(Load, To, 3, true);
5434         } else {
5435           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5436         }
5437       }
5438 
5439       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5440     }
5441   }
5442 
5443   // fold (and (load x), 255) -> (zextload x, i8)
5444   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5445   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5446   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5447                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5448                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5449     if (SDValue Res = ReduceLoadWidth(N)) {
5450       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5451         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5452       AddToWorklist(N);
5453       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5454       return SDValue(N, 0);
5455     }
5456   }
5457 
5458   if (LegalTypes) {
5459     // Attempt to propagate the AND back up to the leaves which, if they're
5460     // loads, can be combined to narrow loads and the AND node can be removed.
5461     // Perform after legalization so that extend nodes will already be
5462     // combined into the loads.
5463     if (BackwardsPropagateMask(N))
5464       return SDValue(N, 0);
5465   }
5466 
5467   if (SDValue Combined = visitANDLike(N0, N1, N))
5468     return Combined;
5469 
5470   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5471   if (N0.getOpcode() == N1.getOpcode())
5472     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5473       return V;
5474 
5475   // Masking the negated extension of a boolean is just the zero-extended
5476   // boolean:
5477   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5478   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5479   //
5480   // Note: the SimplifyDemandedBits fold below can make an information-losing
5481   // transform, and then we have no way to find this better fold.
5482   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5483     if (isNullOrNullSplat(N0.getOperand(0))) {
5484       SDValue SubRHS = N0.getOperand(1);
5485       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5486           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5487         return SubRHS;
5488       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5489           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5490         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5491     }
5492   }
5493 
5494   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5495   // fold (and (sra)) -> (and (srl)) when possible.
5496   if (SimplifyDemandedBits(SDValue(N, 0)))
5497     return SDValue(N, 0);
5498 
5499   // fold (zext_inreg (extload x)) -> (zextload x)
5500   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5501   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5502       (ISD::isEXTLoad(N0.getNode()) ||
5503        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5504     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5505     EVT MemVT = LN0->getMemoryVT();
5506     // If we zero all the possible extended bits, then we can turn this into
5507     // a zextload if we are running before legalize or the operation is legal.
5508     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5509     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5510     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5511     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5512         ((!LegalOperations && LN0->isSimple()) ||
5513          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5514       SDValue ExtLoad =
5515           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5516                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5517       AddToWorklist(N);
5518       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5519       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5520     }
5521   }
5522 
5523   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5524   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5525     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5526                                            N0.getOperand(1), false))
5527       return BSwap;
5528   }
5529 
5530   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5531     return Shifts;
5532 
5533   if (TLI.hasBitTest(N0, N1))
5534     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5535       return V;
5536 
5537   return SDValue();
5538 }
5539 
5540 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5541 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5542                                         bool DemandHighBits) {
5543   if (!LegalOperations)
5544     return SDValue();
5545 
5546   EVT VT = N->getValueType(0);
5547   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5548     return SDValue();
5549   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5550     return SDValue();
5551 
5552   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5553   bool LookPassAnd0 = false;
5554   bool LookPassAnd1 = false;
5555   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5556       std::swap(N0, N1);
5557   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5558       std::swap(N0, N1);
5559   if (N0.getOpcode() == ISD::AND) {
5560     if (!N0.getNode()->hasOneUse())
5561       return SDValue();
5562     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5563     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5564     // This is needed for X86.
5565     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5566                   N01C->getZExtValue() != 0xFFFF))
5567       return SDValue();
5568     N0 = N0.getOperand(0);
5569     LookPassAnd0 = true;
5570   }
5571 
5572   if (N1.getOpcode() == ISD::AND) {
5573     if (!N1.getNode()->hasOneUse())
5574       return SDValue();
5575     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5576     if (!N11C || N11C->getZExtValue() != 0xFF)
5577       return SDValue();
5578     N1 = N1.getOperand(0);
5579     LookPassAnd1 = true;
5580   }
5581 
5582   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5583     std::swap(N0, N1);
5584   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5585     return SDValue();
5586   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5587     return SDValue();
5588 
5589   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5590   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5591   if (!N01C || !N11C)
5592     return SDValue();
5593   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5594     return SDValue();
5595 
5596   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5597   SDValue N00 = N0->getOperand(0);
5598   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5599     if (!N00.getNode()->hasOneUse())
5600       return SDValue();
5601     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5602     if (!N001C || N001C->getZExtValue() != 0xFF)
5603       return SDValue();
5604     N00 = N00.getOperand(0);
5605     LookPassAnd0 = true;
5606   }
5607 
5608   SDValue N10 = N1->getOperand(0);
5609   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5610     if (!N10.getNode()->hasOneUse())
5611       return SDValue();
5612     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5613     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5614     // for X86.
5615     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5616                    N101C->getZExtValue() != 0xFFFF))
5617       return SDValue();
5618     N10 = N10.getOperand(0);
5619     LookPassAnd1 = true;
5620   }
5621 
5622   if (N00 != N10)
5623     return SDValue();
5624 
5625   // Make sure everything beyond the low halfword gets set to zero since the SRL
5626   // 16 will clear the top bits.
5627   unsigned OpSizeInBits = VT.getSizeInBits();
5628   if (DemandHighBits && OpSizeInBits > 16) {
5629     // If the left-shift isn't masked out then the only way this is a bswap is
5630     // if all bits beyond the low 8 are 0. In that case the entire pattern
5631     // reduces to a left shift anyway: leave it for other parts of the combiner.
5632     if (!LookPassAnd0)
5633       return SDValue();
5634 
5635     // However, if the right shift isn't masked out then it might be because
5636     // it's not needed. See if we can spot that too.
5637     if (!LookPassAnd1 &&
5638         !DAG.MaskedValueIsZero(
5639             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5640       return SDValue();
5641   }
5642 
5643   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5644   if (OpSizeInBits > 16) {
5645     SDLoc DL(N);
5646     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5647                       DAG.getConstant(OpSizeInBits - 16, DL,
5648                                       getShiftAmountTy(VT)));
5649   }
5650   return Res;
5651 }
5652 
5653 /// Return true if the specified node is an element that makes up a 32-bit
5654 /// packed halfword byteswap.
5655 /// ((x & 0x000000ff) << 8) |
5656 /// ((x & 0x0000ff00) >> 8) |
5657 /// ((x & 0x00ff0000) << 8) |
5658 /// ((x & 0xff000000) >> 8)
5659 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5660   if (!N.getNode()->hasOneUse())
5661     return false;
5662 
5663   unsigned Opc = N.getOpcode();
5664   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5665     return false;
5666 
5667   SDValue N0 = N.getOperand(0);
5668   unsigned Opc0 = N0.getOpcode();
5669   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5670     return false;
5671 
5672   ConstantSDNode *N1C = nullptr;
5673   // SHL or SRL: look upstream for AND mask operand
5674   if (Opc == ISD::AND)
5675     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5676   else if (Opc0 == ISD::AND)
5677     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5678   if (!N1C)
5679     return false;
5680 
5681   unsigned MaskByteOffset;
5682   switch (N1C->getZExtValue()) {
5683   default:
5684     return false;
5685   case 0xFF:       MaskByteOffset = 0; break;
5686   case 0xFF00:     MaskByteOffset = 1; break;
5687   case 0xFFFF:
5688     // In case demanded bits didn't clear the bits that will be shifted out.
5689     // This is needed for X86.
5690     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5691       MaskByteOffset = 1;
5692       break;
5693     }
5694     return false;
5695   case 0xFF0000:   MaskByteOffset = 2; break;
5696   case 0xFF000000: MaskByteOffset = 3; break;
5697   }
5698 
5699   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5700   if (Opc == ISD::AND) {
5701     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5702       // (x >> 8) & 0xff
5703       // (x >> 8) & 0xff0000
5704       if (Opc0 != ISD::SRL)
5705         return false;
5706       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5707       if (!C || C->getZExtValue() != 8)
5708         return false;
5709     } else {
5710       // (x << 8) & 0xff00
5711       // (x << 8) & 0xff000000
5712       if (Opc0 != ISD::SHL)
5713         return false;
5714       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5715       if (!C || C->getZExtValue() != 8)
5716         return false;
5717     }
5718   } else if (Opc == ISD::SHL) {
5719     // (x & 0xff) << 8
5720     // (x & 0xff0000) << 8
5721     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5722       return false;
5723     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5724     if (!C || C->getZExtValue() != 8)
5725       return false;
5726   } else { // Opc == ISD::SRL
5727     // (x & 0xff00) >> 8
5728     // (x & 0xff000000) >> 8
5729     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5730       return false;
5731     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5732     if (!C || C->getZExtValue() != 8)
5733       return false;
5734   }
5735 
5736   if (Parts[MaskByteOffset])
5737     return false;
5738 
5739   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5740   return true;
5741 }
5742 
5743 // Match 2 elements of a packed halfword bswap.
5744 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
5745   if (N.getOpcode() == ISD::OR)
5746     return isBSwapHWordElement(N.getOperand(0), Parts) &&
5747            isBSwapHWordElement(N.getOperand(1), Parts);
5748 
5749   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
5750     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
5751     if (!C || C->getAPIntValue() != 16)
5752       return false;
5753     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
5754     return true;
5755   }
5756 
5757   return false;
5758 }
5759 
5760 // Match this pattern:
5761 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
5762 // And rewrite this to:
5763 //   (rotr (bswap A), 16)
5764 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
5765                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
5766                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
5767   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
5768          "MatchBSwapHWordOrAndAnd: expecting i32");
5769   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5770     return SDValue();
5771   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
5772     return SDValue();
5773   // TODO: this is too restrictive; lifting this restriction requires more tests
5774   if (!N0->hasOneUse() || !N1->hasOneUse())
5775     return SDValue();
5776   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
5777   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
5778   if (!Mask0 || !Mask1)
5779     return SDValue();
5780   if (Mask0->getAPIntValue() != 0xff00ff00 ||
5781       Mask1->getAPIntValue() != 0x00ff00ff)
5782     return SDValue();
5783   SDValue Shift0 = N0.getOperand(0);
5784   SDValue Shift1 = N1.getOperand(0);
5785   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
5786     return SDValue();
5787   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
5788   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
5789   if (!ShiftAmt0 || !ShiftAmt1)
5790     return SDValue();
5791   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
5792     return SDValue();
5793   if (Shift0.getOperand(0) != Shift1.getOperand(0))
5794     return SDValue();
5795 
5796   SDLoc DL(N);
5797   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
5798   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
5799   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5800 }
5801 
5802 /// Match a 32-bit packed halfword bswap. That is
5803 /// ((x & 0x000000ff) << 8) |
5804 /// ((x & 0x0000ff00) >> 8) |
5805 /// ((x & 0x00ff0000) << 8) |
5806 /// ((x & 0xff000000) >> 8)
5807 /// => (rotl (bswap x), 16)
5808 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5809   if (!LegalOperations)
5810     return SDValue();
5811 
5812   EVT VT = N->getValueType(0);
5813   if (VT != MVT::i32)
5814     return SDValue();
5815   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5816     return SDValue();
5817 
5818   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
5819                                               getShiftAmountTy(VT)))
5820   return BSwap;
5821 
5822   // Try again with commuted operands.
5823   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
5824                                               getShiftAmountTy(VT)))
5825   return BSwap;
5826 
5827 
5828   // Look for either
5829   // (or (bswaphpair), (bswaphpair))
5830   // (or (or (bswaphpair), (and)), (and))
5831   // (or (or (and), (bswaphpair)), (and))
5832   SDNode *Parts[4] = {};
5833 
5834   if (isBSwapHWordPair(N0, Parts)) {
5835     // (or (or (and), (and)), (or (and), (and)))
5836     if (!isBSwapHWordPair(N1, Parts))
5837       return SDValue();
5838   } else if (N0.getOpcode() == ISD::OR) {
5839     // (or (or (or (and), (and)), (and)), (and))
5840     if (!isBSwapHWordElement(N1, Parts))
5841       return SDValue();
5842     SDValue N00 = N0.getOperand(0);
5843     SDValue N01 = N0.getOperand(1);
5844     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
5845         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
5846       return SDValue();
5847   } else
5848     return SDValue();
5849 
5850   // Make sure the parts are all coming from the same node.
5851   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5852     return SDValue();
5853 
5854   SDLoc DL(N);
5855   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5856                               SDValue(Parts[0], 0));
5857 
5858   // Result of the bswap should be rotated by 16. If it's not legal, then
5859   // do  (x << 16) | (x >> 16).
5860   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5861   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5862     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5863   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5864     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5865   return DAG.getNode(ISD::OR, DL, VT,
5866                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5867                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5868 }
5869 
5870 /// This contains all DAGCombine rules which reduce two values combined by
5871 /// an Or operation to a single value \see visitANDLike().
5872 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5873   EVT VT = N1.getValueType();
5874   SDLoc DL(N);
5875 
5876   // fold (or x, undef) -> -1
5877   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5878     return DAG.getAllOnesConstant(DL, VT);
5879 
5880   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5881     return V;
5882 
5883   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5884   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5885       // Don't increase # computations.
5886       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5887     // We can only do this xform if we know that bits from X that are set in C2
5888     // but not in C1 are already zero.  Likewise for Y.
5889     if (const ConstantSDNode *N0O1C =
5890         getAsNonOpaqueConstant(N0.getOperand(1))) {
5891       if (const ConstantSDNode *N1O1C =
5892           getAsNonOpaqueConstant(N1.getOperand(1))) {
5893         // We can only do this xform if we know that bits from X that are set in
5894         // C2 but not in C1 are already zero.  Likewise for Y.
5895         const APInt &LHSMask = N0O1C->getAPIntValue();
5896         const APInt &RHSMask = N1O1C->getAPIntValue();
5897 
5898         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5899             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5900           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5901                                   N0.getOperand(0), N1.getOperand(0));
5902           return DAG.getNode(ISD::AND, DL, VT, X,
5903                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5904         }
5905       }
5906     }
5907   }
5908 
5909   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5910   if (N0.getOpcode() == ISD::AND &&
5911       N1.getOpcode() == ISD::AND &&
5912       N0.getOperand(0) == N1.getOperand(0) &&
5913       // Don't increase # computations.
5914       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5915     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5916                             N0.getOperand(1), N1.getOperand(1));
5917     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5918   }
5919 
5920   return SDValue();
5921 }
5922 
5923 /// OR combines for which the commuted variant will be tried as well.
5924 static SDValue visitORCommutative(
5925     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5926   EVT VT = N0.getValueType();
5927   if (N0.getOpcode() == ISD::AND) {
5928     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5929     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5930       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5931 
5932     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5933     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5934       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5935   }
5936 
5937   return SDValue();
5938 }
5939 
5940 SDValue DAGCombiner::visitOR(SDNode *N) {
5941   SDValue N0 = N->getOperand(0);
5942   SDValue N1 = N->getOperand(1);
5943   EVT VT = N1.getValueType();
5944 
5945   // x | x --> x
5946   if (N0 == N1)
5947     return N0;
5948 
5949   // fold vector ops
5950   if (VT.isVector()) {
5951     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5952       return FoldedVOp;
5953 
5954     // fold (or x, 0) -> x, vector edition
5955     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5956       return N1;
5957     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5958       return N0;
5959 
5960     // fold (or x, -1) -> -1, vector edition
5961     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5962       // do not return N0, because undef node may exist in N0
5963       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5964     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5965       // do not return N1, because undef node may exist in N1
5966       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5967 
5968     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5969     // Do this only if the resulting shuffle is legal.
5970     if (isa<ShuffleVectorSDNode>(N0) &&
5971         isa<ShuffleVectorSDNode>(N1) &&
5972         // Avoid folding a node with illegal type.
5973         TLI.isTypeLegal(VT)) {
5974       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5975       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5976       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5977       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5978       // Ensure both shuffles have a zero input.
5979       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5980         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5981         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5982         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5983         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5984         bool CanFold = true;
5985         int NumElts = VT.getVectorNumElements();
5986         SmallVector<int, 4> Mask(NumElts);
5987 
5988         for (int i = 0; i != NumElts; ++i) {
5989           int M0 = SV0->getMaskElt(i);
5990           int M1 = SV1->getMaskElt(i);
5991 
5992           // Determine if either index is pointing to a zero vector.
5993           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5994           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5995 
5996           // If one element is zero and the otherside is undef, keep undef.
5997           // This also handles the case that both are undef.
5998           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5999             Mask[i] = -1;
6000             continue;
6001           }
6002 
6003           // Make sure only one of the elements is zero.
6004           if (M0Zero == M1Zero) {
6005             CanFold = false;
6006             break;
6007           }
6008 
6009           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6010 
6011           // We have a zero and non-zero element. If the non-zero came from
6012           // SV0 make the index a LHS index. If it came from SV1, make it
6013           // a RHS index. We need to mod by NumElts because we don't care
6014           // which operand it came from in the original shuffles.
6015           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6016         }
6017 
6018         if (CanFold) {
6019           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6020           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6021 
6022           SDValue LegalShuffle =
6023               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6024                                           Mask, DAG);
6025           if (LegalShuffle)
6026             return LegalShuffle;
6027         }
6028       }
6029     }
6030   }
6031 
6032   // fold (or c1, c2) -> c1|c2
6033   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6034   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6035     return C;
6036 
6037   // canonicalize constant to RHS
6038   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6039      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6040     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6041 
6042   // fold (or x, 0) -> x
6043   if (isNullConstant(N1))
6044     return N0;
6045 
6046   // fold (or x, -1) -> -1
6047   if (isAllOnesConstant(N1))
6048     return N1;
6049 
6050   if (SDValue NewSel = foldBinOpIntoSelect(N))
6051     return NewSel;
6052 
6053   // fold (or x, c) -> c iff (x & ~c) == 0
6054   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6055     return N1;
6056 
6057   if (SDValue Combined = visitORLike(N0, N1, N))
6058     return Combined;
6059 
6060   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6061     return Combined;
6062 
6063   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6064   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6065     return BSwap;
6066   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6067     return BSwap;
6068 
6069   // reassociate or
6070   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6071     return ROR;
6072 
6073   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6074   // iff (c1 & c2) != 0 or c1/c2 are undef.
6075   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6076     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6077   };
6078   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6079       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6080     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6081                                                  {N1, N0.getOperand(1)})) {
6082       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6083       AddToWorklist(IOR.getNode());
6084       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6085     }
6086   }
6087 
6088   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6089     return Combined;
6090   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6091     return Combined;
6092 
6093   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6094   if (N0.getOpcode() == N1.getOpcode())
6095     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6096       return V;
6097 
6098   // See if this is some rotate idiom.
6099   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6100     return Rot;
6101 
6102   if (SDValue Load = MatchLoadCombine(N))
6103     return Load;
6104 
6105   // Simplify the operands using demanded-bits information.
6106   if (SimplifyDemandedBits(SDValue(N, 0)))
6107     return SDValue(N, 0);
6108 
6109   // If OR can be rewritten into ADD, try combines based on ADD.
6110   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6111       DAG.haveNoCommonBitsSet(N0, N1))
6112     if (SDValue Combined = visitADDLike(N))
6113       return Combined;
6114 
6115   return SDValue();
6116 }
6117 
6118 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6119   if (Op.getOpcode() == ISD::AND &&
6120       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6121     Mask = Op.getOperand(1);
6122     return Op.getOperand(0);
6123   }
6124   return Op;
6125 }
6126 
6127 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6128 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6129                             SDValue &Mask) {
6130   Op = stripConstantMask(DAG, Op, Mask);
6131   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6132     Shift = Op;
6133     return true;
6134   }
6135   return false;
6136 }
6137 
6138 /// Helper function for visitOR to extract the needed side of a rotate idiom
6139 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6140 /// InstCombine merged some outside op with one of the shifts from
6141 /// the rotate pattern.
6142 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6143 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6144 /// patterns:
6145 ///
6146 ///   (or (add v v) (shrl v bitwidth-1)):
6147 ///     expands (add v v) -> (shl v 1)
6148 ///
6149 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6150 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6151 ///
6152 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6153 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6154 ///
6155 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6156 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6157 ///
6158 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6159 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6160 ///
6161 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6162 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6163                                      SDValue ExtractFrom, SDValue &Mask,
6164                                      const SDLoc &DL) {
6165   assert(OppShift && ExtractFrom && "Empty SDValue");
6166   assert(
6167       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6168       "Existing shift must be valid as a rotate half");
6169 
6170   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6171 
6172   // Value and Type of the shift.
6173   SDValue OppShiftLHS = OppShift.getOperand(0);
6174   EVT ShiftedVT = OppShiftLHS.getValueType();
6175 
6176   // Amount of the existing shift.
6177   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6178 
6179   // (add v v) -> (shl v 1)
6180   // TODO: Should this be a general DAG canonicalization?
6181   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6182       ExtractFrom.getOpcode() == ISD::ADD &&
6183       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6184       ExtractFrom.getOperand(0) == OppShiftLHS &&
6185       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6186     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6187                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6188 
6189   // Preconditions:
6190   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6191   //
6192   // Find opcode of the needed shift to be extracted from (op0 v c0).
6193   unsigned Opcode = ISD::DELETED_NODE;
6194   bool IsMulOrDiv = false;
6195   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6196   // opcode or its arithmetic (mul or udiv) variant.
6197   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6198     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6199     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6200       return false;
6201     Opcode = NeededShift;
6202     return true;
6203   };
6204   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6205   // that the needed shift can be extracted from.
6206   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6207       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6208     return SDValue();
6209 
6210   // op0 must be the same opcode on both sides, have the same LHS argument,
6211   // and produce the same value type.
6212   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6213       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6214       ShiftedVT != ExtractFrom.getValueType())
6215     return SDValue();
6216 
6217   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6218   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6219   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6220   ConstantSDNode *ExtractFromCst =
6221       isConstOrConstSplat(ExtractFrom.getOperand(1));
6222   // TODO: We should be able to handle non-uniform constant vectors for these values
6223   // Check that we have constant values.
6224   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6225       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6226       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6227     return SDValue();
6228 
6229   // Compute the shift amount we need to extract to complete the rotate.
6230   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6231   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6232     return SDValue();
6233   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6234   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6235   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6236   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6237   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6238 
6239   // Now try extract the needed shift from the ExtractFrom op and see if the
6240   // result matches up with the existing shift's LHS op.
6241   if (IsMulOrDiv) {
6242     // Op to extract from is a mul or udiv by a constant.
6243     // Check:
6244     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6245     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6246     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6247                                                  NeededShiftAmt.getZExtValue());
6248     APInt ResultAmt;
6249     APInt Rem;
6250     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6251     if (Rem != 0 || ResultAmt != OppLHSAmt)
6252       return SDValue();
6253   } else {
6254     // Op to extract from is a shift by a constant.
6255     // Check:
6256     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6257     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6258                                           ExtractFromAmt.getBitWidth()))
6259       return SDValue();
6260   }
6261 
6262   // Return the expanded shift op that should allow a rotate to be formed.
6263   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6264   EVT ResVT = ExtractFrom.getValueType();
6265   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6266   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6267 }
6268 
6269 // Return true if we can prove that, whenever Neg and Pos are both in the
6270 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6271 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6272 //
6273 //     (or (shift1 X, Neg), (shift2 X, Pos))
6274 //
6275 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6276 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6277 // to consider shift amounts with defined behavior.
6278 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6279                            SelectionDAG &DAG) {
6280   // If EltSize is a power of 2 then:
6281   //
6282   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6283   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6284   //
6285   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6286   // for the stronger condition:
6287   //
6288   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6289   //
6290   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6291   // we can just replace Neg with Neg' for the rest of the function.
6292   //
6293   // In other cases we check for the even stronger condition:
6294   //
6295   //     Neg == EltSize - Pos                                    [B]
6296   //
6297   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6298   // behavior if Pos == 0 (and consequently Neg == EltSize).
6299   //
6300   // We could actually use [A] whenever EltSize is a power of 2, but the
6301   // only extra cases that it would match are those uninteresting ones
6302   // where Neg and Pos are never in range at the same time.  E.g. for
6303   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6304   // as well as (sub 32, Pos), but:
6305   //
6306   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6307   //
6308   // always invokes undefined behavior for 32-bit X.
6309   //
6310   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6311   unsigned MaskLoBits = 0;
6312   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6313     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6314       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6315       unsigned Bits = Log2_64(EltSize);
6316       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6317           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6318         Neg = Neg.getOperand(0);
6319         MaskLoBits = Bits;
6320       }
6321     }
6322   }
6323 
6324   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6325   if (Neg.getOpcode() != ISD::SUB)
6326     return false;
6327   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6328   if (!NegC)
6329     return false;
6330   SDValue NegOp1 = Neg.getOperand(1);
6331 
6332   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6333   // Pos'.  The truncation is redundant for the purpose of the equality.
6334   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6335     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6336       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6337       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6338           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6339            MaskLoBits))
6340         Pos = Pos.getOperand(0);
6341     }
6342   }
6343 
6344   // The condition we need is now:
6345   //
6346   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6347   //
6348   // If NegOp1 == Pos then we need:
6349   //
6350   //              EltSize & Mask == NegC & Mask
6351   //
6352   // (because "x & Mask" is a truncation and distributes through subtraction).
6353   //
6354   // We also need to account for a potential truncation of NegOp1 if the amount
6355   // has already been legalized to a shift amount type.
6356   APInt Width;
6357   if ((Pos == NegOp1) ||
6358       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6359     Width = NegC->getAPIntValue();
6360 
6361   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6362   // Then the condition we want to prove becomes:
6363   //
6364   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6365   //
6366   // which, again because "x & Mask" is a truncation, becomes:
6367   //
6368   //                NegC & Mask == (EltSize - PosC) & Mask
6369   //             EltSize & Mask == (NegC + PosC) & Mask
6370   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6371     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6372       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6373     else
6374       return false;
6375   } else
6376     return false;
6377 
6378   // Now we just need to check that EltSize & Mask == Width & Mask.
6379   if (MaskLoBits)
6380     // EltSize & Mask is 0 since Mask is EltSize - 1.
6381     return Width.getLoBits(MaskLoBits) == 0;
6382   return Width == EltSize;
6383 }
6384 
6385 // A subroutine of MatchRotate used once we have found an OR of two opposite
6386 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6387 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6388 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6389 // Neg with outer conversions stripped away.
6390 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6391                                        SDValue Neg, SDValue InnerPos,
6392                                        SDValue InnerNeg, unsigned PosOpcode,
6393                                        unsigned NegOpcode, const SDLoc &DL) {
6394   // fold (or (shl x, (*ext y)),
6395   //          (srl x, (*ext (sub 32, y)))) ->
6396   //   (rotl x, y) or (rotr x, (sub 32, y))
6397   //
6398   // fold (or (shl x, (*ext (sub 32, y))),
6399   //          (srl x, (*ext y))) ->
6400   //   (rotr x, y) or (rotl x, (sub 32, y))
6401   EVT VT = Shifted.getValueType();
6402   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6403     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6404     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6405                        HasPos ? Pos : Neg);
6406   }
6407 
6408   return SDValue();
6409 }
6410 
6411 // A subroutine of MatchRotate used once we have found an OR of two opposite
6412 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
6413 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6414 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6415 // Neg with outer conversions stripped away.
6416 // TODO: Merge with MatchRotatePosNeg.
6417 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6418                                        SDValue Neg, SDValue InnerPos,
6419                                        SDValue InnerNeg, unsigned PosOpcode,
6420                                        unsigned NegOpcode, const SDLoc &DL) {
6421   EVT VT = N0.getValueType();
6422   unsigned EltBits = VT.getScalarSizeInBits();
6423 
6424   // fold (or (shl x0, (*ext y)),
6425   //          (srl x1, (*ext (sub 32, y)))) ->
6426   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6427   //
6428   // fold (or (shl x0, (*ext (sub 32, y))),
6429   //          (srl x1, (*ext y))) ->
6430   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6431   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
6432     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6433     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6434                        HasPos ? Pos : Neg);
6435   }
6436 
6437   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6438   // so for now just use the PosOpcode case if its legal.
6439   // TODO: When can we use the NegOpcode case?
6440   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6441     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6442       if (Op.getOpcode() != BinOpc)
6443         return false;
6444       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6445       return Cst && (Cst->getAPIntValue() == Imm);
6446     };
6447 
6448     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6449     //   -> (fshl x0, x1, y)
6450     if (IsBinOpImm(N1, ISD::SRL, 1) &&
6451         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6452         InnerPos == InnerNeg.getOperand(0) &&
6453         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6454       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6455     }
6456 
6457     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6458     //   -> (fshr x0, x1, y)
6459     if (IsBinOpImm(N0, ISD::SHL, 1) &&
6460         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6461         InnerNeg == InnerPos.getOperand(0) &&
6462         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6463       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6464     }
6465 
6466     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6467     //   -> (fshr x0, x1, y)
6468     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6469     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6470         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6471         InnerNeg == InnerPos.getOperand(0) &&
6472         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6473       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6474     }
6475   }
6476 
6477   return SDValue();
6478 }
6479 
6480 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6481 // idioms for rotate, and if the target supports rotation instructions, generate
6482 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6483 // with different shifted sources.
6484 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6485   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6486   EVT VT = LHS.getValueType();
6487   if (!TLI.isTypeLegal(VT))
6488     return SDValue();
6489 
6490   // The target must have at least one rotate/funnel flavor.
6491   bool HasROTL = hasOperation(ISD::ROTL, VT);
6492   bool HasROTR = hasOperation(ISD::ROTR, VT);
6493   bool HasFSHL = hasOperation(ISD::FSHL, VT);
6494   bool HasFSHR = hasOperation(ISD::FSHR, VT);
6495   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6496     return SDValue();
6497 
6498   // Check for truncated rotate.
6499   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6500       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6501     assert(LHS.getValueType() == RHS.getValueType());
6502     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6503       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6504     }
6505   }
6506 
6507   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6508   SDValue LHSShift;   // The shift.
6509   SDValue LHSMask;    // AND value if any.
6510   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6511 
6512   SDValue RHSShift;   // The shift.
6513   SDValue RHSMask;    // AND value if any.
6514   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6515 
6516   // If neither side matched a rotate half, bail
6517   if (!LHSShift && !RHSShift)
6518     return SDValue();
6519 
6520   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6521   // side of the rotate, so try to handle that here. In all cases we need to
6522   // pass the matched shift from the opposite side to compute the opcode and
6523   // needed shift amount to extract.  We still want to do this if both sides
6524   // matched a rotate half because one half may be a potential overshift that
6525   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6526   // single one).
6527 
6528   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6529   if (LHSShift)
6530     if (SDValue NewRHSShift =
6531             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6532       RHSShift = NewRHSShift;
6533   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6534   if (RHSShift)
6535     if (SDValue NewLHSShift =
6536             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6537       LHSShift = NewLHSShift;
6538 
6539   // If a side is still missing, nothing else we can do.
6540   if (!RHSShift || !LHSShift)
6541     return SDValue();
6542 
6543   // At this point we've matched or extracted a shift op on each side.
6544 
6545   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6546     return SDValue(); // Shifts must disagree.
6547 
6548   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
6549   if (!IsRotate && !(HasFSHL || HasFSHR))
6550     return SDValue(); // Requires funnel shift support.
6551 
6552   // Canonicalize shl to left side in a shl/srl pair.
6553   if (RHSShift.getOpcode() == ISD::SHL) {
6554     std::swap(LHS, RHS);
6555     std::swap(LHSShift, RHSShift);
6556     std::swap(LHSMask, RHSMask);
6557   }
6558 
6559   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6560   SDValue LHSShiftArg = LHSShift.getOperand(0);
6561   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6562   SDValue RHSShiftArg = RHSShift.getOperand(0);
6563   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6564 
6565   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6566   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6567   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
6568   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
6569   // iff C1+C2 == EltSizeInBits
6570   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6571                                         ConstantSDNode *RHS) {
6572     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6573   };
6574   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6575     SDValue Res;
6576     if (IsRotate && (HasROTL || HasROTR))
6577       Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
6578                         HasROTL ? LHSShiftAmt : RHSShiftAmt);
6579     else
6580       Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
6581                         RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
6582 
6583     // If there is an AND of either shifted operand, apply it to the result.
6584     if (LHSMask.getNode() || RHSMask.getNode()) {
6585       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6586       SDValue Mask = AllOnes;
6587 
6588       if (LHSMask.getNode()) {
6589         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6590         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6591                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6592       }
6593       if (RHSMask.getNode()) {
6594         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6595         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6596                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6597       }
6598 
6599       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
6600     }
6601 
6602     return Res;
6603   }
6604 
6605   // If there is a mask here, and we have a variable shift, we can't be sure
6606   // that we're masking out the right stuff.
6607   if (LHSMask.getNode() || RHSMask.getNode())
6608     return SDValue();
6609 
6610   // If the shift amount is sign/zext/any-extended just peel it off.
6611   SDValue LExtOp0 = LHSShiftAmt;
6612   SDValue RExtOp0 = RHSShiftAmt;
6613   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6614        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6615        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6616        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6617       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6618        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6619        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6620        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6621     LExtOp0 = LHSShiftAmt.getOperand(0);
6622     RExtOp0 = RHSShiftAmt.getOperand(0);
6623   }
6624 
6625   if (IsRotate && (HasROTL || HasROTR)) {
6626     SDValue TryL =
6627         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
6628                           RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6629     if (TryL)
6630       return TryL;
6631 
6632     SDValue TryR =
6633         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
6634                           LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6635     if (TryR)
6636       return TryR;
6637   }
6638 
6639   SDValue TryL =
6640       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6641                         LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
6642   if (TryL)
6643     return TryL;
6644 
6645   SDValue TryR =
6646       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6647                         RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
6648   if (TryR)
6649     return TryR;
6650 
6651   return SDValue();
6652 }
6653 
6654 namespace {
6655 
6656 /// Represents known origin of an individual byte in load combine pattern. The
6657 /// value of the byte is either constant zero or comes from memory.
6658 struct ByteProvider {
6659   // For constant zero providers Load is set to nullptr. For memory providers
6660   // Load represents the node which loads the byte from memory.
6661   // ByteOffset is the offset of the byte in the value produced by the load.
6662   LoadSDNode *Load = nullptr;
6663   unsigned ByteOffset = 0;
6664 
6665   ByteProvider() = default;
6666 
6667   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6668     return ByteProvider(Load, ByteOffset);
6669   }
6670 
6671   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6672 
6673   bool isConstantZero() const { return !Load; }
6674   bool isMemory() const { return Load; }
6675 
6676   bool operator==(const ByteProvider &Other) const {
6677     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6678   }
6679 
6680 private:
6681   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6682       : Load(Load), ByteOffset(ByteOffset) {}
6683 };
6684 
6685 } // end anonymous namespace
6686 
6687 /// Recursively traverses the expression calculating the origin of the requested
6688 /// byte of the given value. Returns None if the provider can't be calculated.
6689 ///
6690 /// For all the values except the root of the expression verifies that the value
6691 /// has exactly one use and if it's not true return None. This way if the origin
6692 /// of the byte is returned it's guaranteed that the values which contribute to
6693 /// the byte are not used outside of this expression.
6694 ///
6695 /// Because the parts of the expression are not allowed to have more than one
6696 /// use this function iterates over trees, not DAGs. So it never visits the same
6697 /// node more than once.
6698 static const Optional<ByteProvider>
6699 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6700                       bool Root = false) {
6701   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6702   if (Depth == 10)
6703     return None;
6704 
6705   if (!Root && !Op.hasOneUse())
6706     return None;
6707 
6708   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6709   unsigned BitWidth = Op.getValueSizeInBits();
6710   if (BitWidth % 8 != 0)
6711     return None;
6712   unsigned ByteWidth = BitWidth / 8;
6713   assert(Index < ByteWidth && "invalid index requested");
6714   (void) ByteWidth;
6715 
6716   switch (Op.getOpcode()) {
6717   case ISD::OR: {
6718     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6719     if (!LHS)
6720       return None;
6721     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6722     if (!RHS)
6723       return None;
6724 
6725     if (LHS->isConstantZero())
6726       return RHS;
6727     if (RHS->isConstantZero())
6728       return LHS;
6729     return None;
6730   }
6731   case ISD::SHL: {
6732     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6733     if (!ShiftOp)
6734       return None;
6735 
6736     uint64_t BitShift = ShiftOp->getZExtValue();
6737     if (BitShift % 8 != 0)
6738       return None;
6739     uint64_t ByteShift = BitShift / 8;
6740 
6741     return Index < ByteShift
6742                ? ByteProvider::getConstantZero()
6743                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6744                                        Depth + 1);
6745   }
6746   case ISD::ANY_EXTEND:
6747   case ISD::SIGN_EXTEND:
6748   case ISD::ZERO_EXTEND: {
6749     SDValue NarrowOp = Op->getOperand(0);
6750     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6751     if (NarrowBitWidth % 8 != 0)
6752       return None;
6753     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6754 
6755     if (Index >= NarrowByteWidth)
6756       return Op.getOpcode() == ISD::ZERO_EXTEND
6757                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6758                  : None;
6759     return calculateByteProvider(NarrowOp, Index, Depth + 1);
6760   }
6761   case ISD::BSWAP:
6762     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6763                                  Depth + 1);
6764   case ISD::LOAD: {
6765     auto L = cast<LoadSDNode>(Op.getNode());
6766     if (!L->isSimple() || L->isIndexed())
6767       return None;
6768 
6769     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6770     if (NarrowBitWidth % 8 != 0)
6771       return None;
6772     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6773 
6774     if (Index >= NarrowByteWidth)
6775       return L->getExtensionType() == ISD::ZEXTLOAD
6776                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6777                  : None;
6778     return ByteProvider::getMemory(L, Index);
6779   }
6780   }
6781 
6782   return None;
6783 }
6784 
6785 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6786   return i;
6787 }
6788 
6789 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6790   return BW - i - 1;
6791 }
6792 
6793 // Check if the bytes offsets we are looking at match with either big or
6794 // little endian value loaded. Return true for big endian, false for little
6795 // endian, and None if match failed.
6796 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
6797                                   int64_t FirstOffset) {
6798   // The endian can be decided only when it is 2 bytes at least.
6799   unsigned Width = ByteOffsets.size();
6800   if (Width < 2)
6801     return None;
6802 
6803   bool BigEndian = true, LittleEndian = true;
6804   for (unsigned i = 0; i < Width; i++) {
6805     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6806     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6807     BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6808     if (!BigEndian && !LittleEndian)
6809       return None;
6810   }
6811 
6812   assert((BigEndian != LittleEndian) && "It should be either big endian or"
6813                                         "little endian");
6814   return BigEndian;
6815 }
6816 
6817 static SDValue stripTruncAndExt(SDValue Value) {
6818   switch (Value.getOpcode()) {
6819   case ISD::TRUNCATE:
6820   case ISD::ZERO_EXTEND:
6821   case ISD::SIGN_EXTEND:
6822   case ISD::ANY_EXTEND:
6823     return stripTruncAndExt(Value.getOperand(0));
6824   }
6825   return Value;
6826 }
6827 
6828 /// Match a pattern where a wide type scalar value is stored by several narrow
6829 /// stores. Fold it into a single store or a BSWAP and a store if the targets
6830 /// supports it.
6831 ///
6832 /// Assuming little endian target:
6833 ///  i8 *p = ...
6834 ///  i32 val = ...
6835 ///  p[0] = (val >> 0) & 0xFF;
6836 ///  p[1] = (val >> 8) & 0xFF;
6837 ///  p[2] = (val >> 16) & 0xFF;
6838 ///  p[3] = (val >> 24) & 0xFF;
6839 /// =>
6840 ///  *((i32)p) = val;
6841 ///
6842 ///  i8 *p = ...
6843 ///  i32 val = ...
6844 ///  p[0] = (val >> 24) & 0xFF;
6845 ///  p[1] = (val >> 16) & 0xFF;
6846 ///  p[2] = (val >> 8) & 0xFF;
6847 ///  p[3] = (val >> 0) & 0xFF;
6848 /// =>
6849 ///  *((i32)p) = BSWAP(val);
6850 SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6851   // Collect all the stores in the chain.
6852   SDValue Chain;
6853   SmallVector<StoreSDNode *, 8> Stores;
6854   for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6855     // TODO: Allow unordered atomics when wider type is legal (see D66309)
6856     if (Store->getMemoryVT() != MVT::i8 ||
6857         !Store->isSimple() || Store->isIndexed())
6858       return SDValue();
6859     Stores.push_back(Store);
6860     Chain = Store->getChain();
6861   }
6862   // Handle the simple type only.
6863   unsigned Width = Stores.size();
6864   EVT VT = EVT::getIntegerVT(
6865     *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6866   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6867     return SDValue();
6868 
6869   if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6870     return SDValue();
6871 
6872   // Check if all the bytes of the combined value we are looking at are stored
6873   // to the same base address. Collect bytes offsets from Base address into
6874   // ByteOffsets.
6875   SDValue CombinedValue;
6876   SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
6877   int64_t FirstOffset = INT64_MAX;
6878   StoreSDNode *FirstStore = nullptr;
6879   Optional<BaseIndexOffset> Base;
6880   for (auto Store : Stores) {
6881     // All the stores store different byte of the CombinedValue. A truncate is
6882     // required to get that byte value.
6883     SDValue Trunc = Store->getValue();
6884     if (Trunc.getOpcode() != ISD::TRUNCATE)
6885       return SDValue();
6886     // A shift operation is required to get the right byte offset, except the
6887     // first byte.
6888     int64_t Offset = 0;
6889     SDValue Value = Trunc.getOperand(0);
6890     if (Value.getOpcode() == ISD::SRL ||
6891         Value.getOpcode() == ISD::SRA) {
6892       auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1));
6893       // Trying to match the following pattern. The shift offset must be
6894       // a constant and a multiple of 8. It is the byte offset in "y".
6895       //
6896       // x = srl y, offset
6897       // i8 z = trunc x
6898       // store z, ...
6899       if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6900         return SDValue();
6901 
6902      Offset = ShiftOffset->getSExtValue()/8;
6903      Value = Value.getOperand(0);
6904     }
6905 
6906     // Stores must share the same combined value with different offsets.
6907     if (!CombinedValue)
6908       CombinedValue = Value;
6909     else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6910       return SDValue();
6911 
6912     // The trunc and all the extend operation should be stripped to get the
6913     // real value we are stored.
6914     else if (CombinedValue.getValueType() != VT) {
6915       if (Value.getValueType() == VT ||
6916           Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6917         CombinedValue = Value;
6918       // Give up if the combined value type is smaller than the store size.
6919       if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6920         return SDValue();
6921     }
6922 
6923     // Stores must share the same base address
6924     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6925     int64_t ByteOffsetFromBase = 0;
6926     if (!Base)
6927       Base = Ptr;
6928     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6929       return SDValue();
6930 
6931     // Remember the first byte store
6932     if (ByteOffsetFromBase < FirstOffset) {
6933       FirstStore = Store;
6934       FirstOffset = ByteOffsetFromBase;
6935     }
6936     // Map the offset in the store and the offset in the combined value, and
6937     // early return if it has been set before.
6938     if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
6939       return SDValue();
6940     ByteOffsets[Offset] = ByteOffsetFromBase;
6941   }
6942 
6943   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6944   assert(FirstStore && "First store must be set");
6945 
6946   // Check if the bytes of the combined value we are looking at match with
6947   // either big or little endian value store.
6948   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6949   if (!IsBigEndian.hasValue())
6950     return SDValue();
6951 
6952   // The node we are looking at matches with the pattern, check if we can
6953   // replace it with a single bswap if needed and store.
6954 
6955   // If the store needs byte swap check if the target supports it
6956   bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6957 
6958   // Before legalize we can introduce illegal bswaps which will be later
6959   // converted to an explicit bswap sequence. This way we end up with a single
6960   // store and byte shuffling instead of several stores and byte shuffling.
6961   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6962     return SDValue();
6963 
6964   // Check that a store of the wide type is both allowed and fast on the target
6965   bool Fast = false;
6966   bool Allowed =
6967       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6968                              *FirstStore->getMemOperand(), &Fast);
6969   if (!Allowed || !Fast)
6970     return SDValue();
6971 
6972   if (VT != CombinedValue.getValueType()) {
6973     assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6974            "Get unexpected store value to combine");
6975     CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6976                              CombinedValue);
6977   }
6978 
6979   if (NeedsBswap)
6980     CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6981 
6982   SDValue NewStore =
6983     DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
6984                  FirstStore->getPointerInfo(), FirstStore->getAlignment());
6985 
6986   // Rely on other DAG combine rules to remove the other individual stores.
6987   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6988   return NewStore;
6989 }
6990 
6991 /// Match a pattern where a wide type scalar value is loaded by several narrow
6992 /// loads and combined by shifts and ors. Fold it into a single load or a load
6993 /// and a BSWAP if the targets supports it.
6994 ///
6995 /// Assuming little endian target:
6996 ///  i8 *a = ...
6997 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6998 /// =>
6999 ///  i32 val = *((i32)a)
7000 ///
7001 ///  i8 *a = ...
7002 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7003 /// =>
7004 ///  i32 val = BSWAP(*((i32)a))
7005 ///
7006 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7007 /// interact well with the worklist mechanism. When a part of the pattern is
7008 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7009 /// but the root node of the pattern which triggers the load combine is not
7010 /// necessarily a direct user of the changed node. For example, once the address
7011 /// of t28 load is reassociated load combine won't be triggered:
7012 ///             t25: i32 = add t4, Constant:i32<2>
7013 ///           t26: i64 = sign_extend t25
7014 ///        t27: i64 = add t2, t26
7015 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7016 ///     t29: i32 = zero_extend t28
7017 ///   t32: i32 = shl t29, Constant:i8<8>
7018 /// t33: i32 = or t23, t32
7019 /// As a possible fix visitLoad can check if the load can be a part of a load
7020 /// combine pattern and add corresponding OR roots to the worklist.
7021 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7022   assert(N->getOpcode() == ISD::OR &&
7023          "Can only match load combining against OR nodes");
7024 
7025   // Handles simple types only
7026   EVT VT = N->getValueType(0);
7027   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7028     return SDValue();
7029   unsigned ByteWidth = VT.getSizeInBits() / 8;
7030 
7031   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7032   auto MemoryByteOffset = [&] (ByteProvider P) {
7033     assert(P.isMemory() && "Must be a memory byte provider");
7034     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7035     assert(LoadBitWidth % 8 == 0 &&
7036            "can only analyze providers for individual bytes not bit");
7037     unsigned LoadByteWidth = LoadBitWidth / 8;
7038     return IsBigEndianTarget
7039             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
7040             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
7041   };
7042 
7043   Optional<BaseIndexOffset> Base;
7044   SDValue Chain;
7045 
7046   SmallPtrSet<LoadSDNode *, 8> Loads;
7047   Optional<ByteProvider> FirstByteProvider;
7048   int64_t FirstOffset = INT64_MAX;
7049 
7050   // Check if all the bytes of the OR we are looking at are loaded from the same
7051   // base address. Collect bytes offsets from Base address in ByteOffsets.
7052   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7053   unsigned ZeroExtendedBytes = 0;
7054   for (int i = ByteWidth - 1; i >= 0; --i) {
7055     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7056     if (!P)
7057       return SDValue();
7058 
7059     if (P->isConstantZero()) {
7060       // It's OK for the N most significant bytes to be 0, we can just
7061       // zero-extend the load.
7062       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7063         return SDValue();
7064       continue;
7065     }
7066     assert(P->isMemory() && "provenance should either be memory or zero");
7067 
7068     LoadSDNode *L = P->Load;
7069     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7070            !L->isIndexed() &&
7071            "Must be enforced by calculateByteProvider");
7072     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7073 
7074     // All loads must share the same chain
7075     SDValue LChain = L->getChain();
7076     if (!Chain)
7077       Chain = LChain;
7078     else if (Chain != LChain)
7079       return SDValue();
7080 
7081     // Loads must share the same base address
7082     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7083     int64_t ByteOffsetFromBase = 0;
7084     if (!Base)
7085       Base = Ptr;
7086     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7087       return SDValue();
7088 
7089     // Calculate the offset of the current byte from the base address
7090     ByteOffsetFromBase += MemoryByteOffset(*P);
7091     ByteOffsets[i] = ByteOffsetFromBase;
7092 
7093     // Remember the first byte load
7094     if (ByteOffsetFromBase < FirstOffset) {
7095       FirstByteProvider = P;
7096       FirstOffset = ByteOffsetFromBase;
7097     }
7098 
7099     Loads.insert(L);
7100   }
7101   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7102          "memory, so there must be at least one load which produces the value");
7103   assert(Base && "Base address of the accessed memory location must be set");
7104   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7105 
7106   bool NeedsZext = ZeroExtendedBytes > 0;
7107 
7108   EVT MemVT =
7109       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7110 
7111   if (!MemVT.isSimple())
7112     return SDValue();
7113 
7114   // Before legalize we can introduce too wide illegal loads which will be later
7115   // split into legal sized loads. This enables us to combine i64 load by i8
7116   // patterns to a couple of i32 loads on 32 bit targets.
7117   if (LegalOperations &&
7118       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7119                             MemVT))
7120     return SDValue();
7121 
7122   // Check if the bytes of the OR we are looking at match with either big or
7123   // little endian value load
7124   Optional<bool> IsBigEndian = isBigEndian(
7125       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7126   if (!IsBigEndian.hasValue())
7127     return SDValue();
7128 
7129   assert(FirstByteProvider && "must be set");
7130 
7131   // Ensure that the first byte is loaded from zero offset of the first load.
7132   // So the combined value can be loaded from the first load address.
7133   if (MemoryByteOffset(*FirstByteProvider) != 0)
7134     return SDValue();
7135   LoadSDNode *FirstLoad = FirstByteProvider->Load;
7136 
7137   // The node we are looking at matches with the pattern, check if we can
7138   // replace it with a single (possibly zero-extended) load and bswap + shift if
7139   // needed.
7140 
7141   // If the load needs byte swap check if the target supports it
7142   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7143 
7144   // Before legalize we can introduce illegal bswaps which will be later
7145   // converted to an explicit bswap sequence. This way we end up with a single
7146   // load and byte shuffling instead of several loads and byte shuffling.
7147   // We do not introduce illegal bswaps when zero-extending as this tends to
7148   // introduce too many arithmetic instructions.
7149   if (NeedsBswap && (LegalOperations || NeedsZext) &&
7150       !TLI.isOperationLegal(ISD::BSWAP, VT))
7151     return SDValue();
7152 
7153   // If we need to bswap and zero extend, we have to insert a shift. Check that
7154   // it is legal.
7155   if (NeedsBswap && NeedsZext && LegalOperations &&
7156       !TLI.isOperationLegal(ISD::SHL, VT))
7157     return SDValue();
7158 
7159   // Check that a load of the wide type is both allowed and fast on the target
7160   bool Fast = false;
7161   bool Allowed =
7162       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7163                              *FirstLoad->getMemOperand(), &Fast);
7164   if (!Allowed || !Fast)
7165     return SDValue();
7166 
7167   SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7168                                    SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
7169                                    FirstLoad->getPointerInfo(), MemVT,
7170                                    FirstLoad->getAlignment());
7171 
7172   // Transfer chain users from old loads to the new load.
7173   for (LoadSDNode *L : Loads)
7174     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7175 
7176   if (!NeedsBswap)
7177     return NewLoad;
7178 
7179   SDValue ShiftedLoad =
7180       NeedsZext
7181           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7182                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7183                                                    SDLoc(N), LegalOperations))
7184           : NewLoad;
7185   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7186 }
7187 
7188 // If the target has andn, bsl, or a similar bit-select instruction,
7189 // we want to unfold masked merge, with canonical pattern of:
7190 //   |        A  |  |B|
7191 //   ((x ^ y) & m) ^ y
7192 //    |  D  |
7193 // Into:
7194 //   (x & m) | (y & ~m)
7195 // If y is a constant, and the 'andn' does not work with immediates,
7196 // we unfold into a different pattern:
7197 //   ~(~x & m) & (m | y)
7198 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7199 //       the very least that breaks andnpd / andnps patterns, and because those
7200 //       patterns are simplified in IR and shouldn't be created in the DAG
7201 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7202   assert(N->getOpcode() == ISD::XOR);
7203 
7204   // Don't touch 'not' (i.e. where y = -1).
7205   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7206     return SDValue();
7207 
7208   EVT VT = N->getValueType(0);
7209 
7210   // There are 3 commutable operators in the pattern,
7211   // so we have to deal with 8 possible variants of the basic pattern.
7212   SDValue X, Y, M;
7213   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7214     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7215       return false;
7216     SDValue Xor = And.getOperand(XorIdx);
7217     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7218       return false;
7219     SDValue Xor0 = Xor.getOperand(0);
7220     SDValue Xor1 = Xor.getOperand(1);
7221     // Don't touch 'not' (i.e. where y = -1).
7222     if (isAllOnesOrAllOnesSplat(Xor1))
7223       return false;
7224     if (Other == Xor0)
7225       std::swap(Xor0, Xor1);
7226     if (Other != Xor1)
7227       return false;
7228     X = Xor0;
7229     Y = Xor1;
7230     M = And.getOperand(XorIdx ? 0 : 1);
7231     return true;
7232   };
7233 
7234   SDValue N0 = N->getOperand(0);
7235   SDValue N1 = N->getOperand(1);
7236   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7237       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7238     return SDValue();
7239 
7240   // Don't do anything if the mask is constant. This should not be reachable.
7241   // InstCombine should have already unfolded this pattern, and DAGCombiner
7242   // probably shouldn't produce it, too.
7243   if (isa<ConstantSDNode>(M.getNode()))
7244     return SDValue();
7245 
7246   // We can transform if the target has AndNot
7247   if (!TLI.hasAndNot(M))
7248     return SDValue();
7249 
7250   SDLoc DL(N);
7251 
7252   // If Y is a constant, check that 'andn' works with immediates.
7253   if (!TLI.hasAndNot(Y)) {
7254     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7255     // If not, we need to do a bit more work to make sure andn is still used.
7256     SDValue NotX = DAG.getNOT(DL, X, VT);
7257     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7258     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7259     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7260     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7261   }
7262 
7263   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7264   SDValue NotM = DAG.getNOT(DL, M, VT);
7265   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7266 
7267   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7268 }
7269 
7270 SDValue DAGCombiner::visitXOR(SDNode *N) {
7271   SDValue N0 = N->getOperand(0);
7272   SDValue N1 = N->getOperand(1);
7273   EVT VT = N0.getValueType();
7274 
7275   // fold vector ops
7276   if (VT.isVector()) {
7277     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7278       return FoldedVOp;
7279 
7280     // fold (xor x, 0) -> x, vector edition
7281     if (ISD::isBuildVectorAllZeros(N0.getNode()))
7282       return N1;
7283     if (ISD::isBuildVectorAllZeros(N1.getNode()))
7284       return N0;
7285   }
7286 
7287   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7288   SDLoc DL(N);
7289   if (N0.isUndef() && N1.isUndef())
7290     return DAG.getConstant(0, DL, VT);
7291 
7292   // fold (xor x, undef) -> undef
7293   if (N0.isUndef())
7294     return N0;
7295   if (N1.isUndef())
7296     return N1;
7297 
7298   // fold (xor c1, c2) -> c1^c2
7299   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7300     return C;
7301 
7302   // canonicalize constant to RHS
7303   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7304      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7305     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7306 
7307   // fold (xor x, 0) -> x
7308   if (isNullConstant(N1))
7309     return N0;
7310 
7311   if (SDValue NewSel = foldBinOpIntoSelect(N))
7312     return NewSel;
7313 
7314   // reassociate xor
7315   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7316     return RXOR;
7317 
7318   // fold !(x cc y) -> (x !cc y)
7319   unsigned N0Opcode = N0.getOpcode();
7320   SDValue LHS, RHS, CC;
7321   if (TLI.isConstTrueVal(N1.getNode()) &&
7322       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7323     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7324                                                LHS.getValueType());
7325     if (!LegalOperations ||
7326         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7327       switch (N0Opcode) {
7328       default:
7329         llvm_unreachable("Unhandled SetCC Equivalent!");
7330       case ISD::SETCC:
7331         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7332       case ISD::SELECT_CC:
7333         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7334                                N0.getOperand(3), NotCC);
7335       case ISD::STRICT_FSETCC:
7336       case ISD::STRICT_FSETCCS: {
7337         if (N0.hasOneUse()) {
7338           // FIXME Can we handle multiple uses? Could we token factor the chain
7339           // results from the new/old setcc?
7340           SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7341                                        N0.getOperand(0),
7342                                        N0Opcode == ISD::STRICT_FSETCCS);
7343           CombineTo(N, SetCC);
7344           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7345           recursivelyDeleteUnusedNodes(N0.getNode());
7346           return SDValue(N, 0); // Return N so it doesn't get rechecked!
7347         }
7348         break;
7349       }
7350       }
7351     }
7352   }
7353 
7354   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7355   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7356       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7357     SDValue V = N0.getOperand(0);
7358     SDLoc DL0(N0);
7359     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7360                     DAG.getConstant(1, DL0, V.getValueType()));
7361     AddToWorklist(V.getNode());
7362     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7363   }
7364 
7365   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7366   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7367       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7368     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7369     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7370       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7371       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7372       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7373       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7374       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7375     }
7376   }
7377   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7378   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7379       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7380     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7381     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7382       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7383       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7384       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7385       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7386       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7387     }
7388   }
7389 
7390   // fold (not (neg x)) -> (add X, -1)
7391   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7392   // Y is a constant or the subtract has a single use.
7393   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7394       isNullConstant(N0.getOperand(0))) {
7395     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7396                        DAG.getAllOnesConstant(DL, VT));
7397   }
7398 
7399   // fold (not (add X, -1)) -> (neg X)
7400   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7401       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7402     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7403                        N0.getOperand(0));
7404   }
7405 
7406   // fold (xor (and x, y), y) -> (and (not x), y)
7407   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7408     SDValue X = N0.getOperand(0);
7409     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7410     AddToWorklist(NotX.getNode());
7411     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7412   }
7413 
7414   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7415     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7416     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7417     unsigned BitWidth = VT.getScalarSizeInBits();
7418     if (XorC && ShiftC) {
7419       // Don't crash on an oversized shift. We can not guarantee that a bogus
7420       // shift has been simplified to undef.
7421       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7422       if (ShiftAmt < BitWidth) {
7423         APInt Ones = APInt::getAllOnesValue(BitWidth);
7424         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7425         if (XorC->getAPIntValue() == Ones) {
7426           // If the xor constant is a shifted -1, do a 'not' before the shift:
7427           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7428           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7429           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7430           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7431         }
7432       }
7433     }
7434   }
7435 
7436   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7437   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7438     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7439     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7440     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7441       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7442       SDValue S0 = S.getOperand(0);
7443       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
7444         unsigned OpSizeInBits = VT.getScalarSizeInBits();
7445         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7446           if (C->getAPIntValue() == (OpSizeInBits - 1))
7447             return DAG.getNode(ISD::ABS, DL, VT, S0);
7448       }
7449     }
7450   }
7451 
7452   // fold (xor x, x) -> 0
7453   if (N0 == N1)
7454     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7455 
7456   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7457   // Here is a concrete example of this equivalence:
7458   // i16   x ==  14
7459   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7460   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7461   //
7462   // =>
7463   //
7464   // i16     ~1      == 0b1111111111111110
7465   // i16 rol(~1, 14) == 0b1011111111111111
7466   //
7467   // Some additional tips to help conceptualize this transform:
7468   // - Try to see the operation as placing a single zero in a value of all ones.
7469   // - There exists no value for x which would allow the result to contain zero.
7470   // - Values of x larger than the bitwidth are undefined and do not require a
7471   //   consistent result.
7472   // - Pushing the zero left requires shifting one bits in from the right.
7473   // A rotate left of ~1 is a nice way of achieving the desired result.
7474   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7475       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7476     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7477                        N0.getOperand(1));
7478   }
7479 
7480   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7481   if (N0Opcode == N1.getOpcode())
7482     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7483       return V;
7484 
7485   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7486   if (SDValue MM = unfoldMaskedMerge(N))
7487     return MM;
7488 
7489   // Simplify the expression using non-local knowledge.
7490   if (SimplifyDemandedBits(SDValue(N, 0)))
7491     return SDValue(N, 0);
7492 
7493   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7494     return Combined;
7495 
7496   return SDValue();
7497 }
7498 
7499 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7500 /// shift-by-constant operand with identical opcode, we may be able to convert
7501 /// that into 2 independent shifts followed by the logic op. This is a
7502 /// throughput improvement.
7503 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7504   // Match a one-use bitwise logic op.
7505   SDValue LogicOp = Shift->getOperand(0);
7506   if (!LogicOp.hasOneUse())
7507     return SDValue();
7508 
7509   unsigned LogicOpcode = LogicOp.getOpcode();
7510   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7511       LogicOpcode != ISD::XOR)
7512     return SDValue();
7513 
7514   // Find a matching one-use shift by constant.
7515   unsigned ShiftOpcode = Shift->getOpcode();
7516   SDValue C1 = Shift->getOperand(1);
7517   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7518   assert(C1Node && "Expected a shift with constant operand");
7519   const APInt &C1Val = C1Node->getAPIntValue();
7520   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7521                              const APInt *&ShiftAmtVal) {
7522     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7523       return false;
7524 
7525     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7526     if (!ShiftCNode)
7527       return false;
7528 
7529     // Capture the shifted operand and shift amount value.
7530     ShiftOp = V.getOperand(0);
7531     ShiftAmtVal = &ShiftCNode->getAPIntValue();
7532 
7533     // Shift amount types do not have to match their operand type, so check that
7534     // the constants are the same width.
7535     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7536       return false;
7537 
7538     // The fold is not valid if the sum of the shift values exceeds bitwidth.
7539     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7540       return false;
7541 
7542     return true;
7543   };
7544 
7545   // Logic ops are commutative, so check each operand for a match.
7546   SDValue X, Y;
7547   const APInt *C0Val;
7548   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7549     Y = LogicOp.getOperand(1);
7550   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7551     Y = LogicOp.getOperand(0);
7552   else
7553     return SDValue();
7554 
7555   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7556   SDLoc DL(Shift);
7557   EVT VT = Shift->getValueType(0);
7558   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7559   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7560   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7561   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7562   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7563 }
7564 
7565 /// Handle transforms common to the three shifts, when the shift amount is a
7566 /// constant.
7567 /// We are looking for: (shift being one of shl/sra/srl)
7568 ///   shift (binop X, C0), C1
7569 /// And want to transform into:
7570 ///   binop (shift X, C1), (shift C0, C1)
7571 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7572   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
7573 
7574   // Do not turn a 'not' into a regular xor.
7575   if (isBitwiseNot(N->getOperand(0)))
7576     return SDValue();
7577 
7578   // The inner binop must be one-use, since we want to replace it.
7579   SDValue LHS = N->getOperand(0);
7580   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7581     return SDValue();
7582 
7583   // TODO: This is limited to early combining because it may reveal regressions
7584   //       otherwise. But since we just checked a target hook to see if this is
7585   //       desirable, that should have filtered out cases where this interferes
7586   //       with some other pattern matching.
7587   if (!LegalTypes)
7588     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7589       return R;
7590 
7591   // We want to pull some binops through shifts, so that we have (and (shift))
7592   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
7593   // thing happens with address calculations, so it's important to canonicalize
7594   // it.
7595   switch (LHS.getOpcode()) {
7596   default:
7597     return SDValue();
7598   case ISD::OR:
7599   case ISD::XOR:
7600   case ISD::AND:
7601     break;
7602   case ISD::ADD:
7603     if (N->getOpcode() != ISD::SHL)
7604       return SDValue(); // only shl(add) not sr[al](add).
7605     break;
7606   }
7607 
7608   // We require the RHS of the binop to be a constant and not opaque as well.
7609   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7610   if (!BinOpCst)
7611     return SDValue();
7612 
7613   // FIXME: disable this unless the input to the binop is a shift by a constant
7614   // or is copy/select. Enable this in other cases when figure out it's exactly
7615   // profitable.
7616   SDValue BinOpLHSVal = LHS.getOperand(0);
7617   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7618                             BinOpLHSVal.getOpcode() == ISD::SRA ||
7619                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
7620                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7621   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7622                         BinOpLHSVal.getOpcode() == ISD::SELECT;
7623 
7624   if (!IsShiftByConstant && !IsCopyOrSelect)
7625     return SDValue();
7626 
7627   if (IsCopyOrSelect && N->hasOneUse())
7628     return SDValue();
7629 
7630   // Fold the constants, shifting the binop RHS by the shift amount.
7631   SDLoc DL(N);
7632   EVT VT = N->getValueType(0);
7633   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
7634                                N->getOperand(1));
7635   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7636 
7637   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
7638                                  N->getOperand(1));
7639   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
7640 }
7641 
7642 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7643   assert(N->getOpcode() == ISD::TRUNCATE);
7644   assert(N->getOperand(0).getOpcode() == ISD::AND);
7645 
7646   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7647   EVT TruncVT = N->getValueType(0);
7648   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7649       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7650     SDValue N01 = N->getOperand(0).getOperand(1);
7651     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7652       SDLoc DL(N);
7653       SDValue N00 = N->getOperand(0).getOperand(0);
7654       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7655       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7656       AddToWorklist(Trunc00.getNode());
7657       AddToWorklist(Trunc01.getNode());
7658       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7659     }
7660   }
7661 
7662   return SDValue();
7663 }
7664 
7665 SDValue DAGCombiner::visitRotate(SDNode *N) {
7666   SDLoc dl(N);
7667   SDValue N0 = N->getOperand(0);
7668   SDValue N1 = N->getOperand(1);
7669   EVT VT = N->getValueType(0);
7670   unsigned Bitsize = VT.getScalarSizeInBits();
7671 
7672   // fold (rot x, 0) -> x
7673   if (isNullOrNullSplat(N1))
7674     return N0;
7675 
7676   // fold (rot x, c) -> x iff (c % BitSize) == 0
7677   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7678     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7679     if (DAG.MaskedValueIsZero(N1, ModuloMask))
7680       return N0;
7681   }
7682 
7683   // fold (rot x, c) -> (rot x, c % BitSize)
7684   bool OutOfRange = false;
7685   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
7686     OutOfRange |= C->getAPIntValue().uge(Bitsize);
7687     return true;
7688   };
7689   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
7690     EVT AmtVT = N1.getValueType();
7691     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
7692     if (SDValue Amt =
7693             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
7694       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
7695   }
7696 
7697   // rot i16 X, 8 --> bswap X
7698   auto *RotAmtC = isConstOrConstSplat(N1);
7699   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
7700       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
7701     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
7702 
7703   // Simplify the operands using demanded-bits information.
7704   if (SimplifyDemandedBits(SDValue(N, 0)))
7705     return SDValue(N, 0);
7706 
7707   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7708   if (N1.getOpcode() == ISD::TRUNCATE &&
7709       N1.getOperand(0).getOpcode() == ISD::AND) {
7710     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7711       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7712   }
7713 
7714   unsigned NextOp = N0.getOpcode();
7715   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7716   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7717     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7718     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7719     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7720       EVT ShiftVT = C1->getValueType(0);
7721       bool SameSide = (N->getOpcode() == NextOp);
7722       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7723       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
7724               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
7725         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7726         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7727             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
7728         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7729                            CombinedShiftNorm);
7730       }
7731     }
7732   }
7733   return SDValue();
7734 }
7735 
7736 SDValue DAGCombiner::visitSHL(SDNode *N) {
7737   SDValue N0 = N->getOperand(0);
7738   SDValue N1 = N->getOperand(1);
7739   if (SDValue V = DAG.simplifyShift(N0, N1))
7740     return V;
7741 
7742   EVT VT = N0.getValueType();
7743   EVT ShiftVT = N1.getValueType();
7744   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7745 
7746   // fold vector ops
7747   if (VT.isVector()) {
7748     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7749       return FoldedVOp;
7750 
7751     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7752     // If setcc produces all-one true value then:
7753     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7754     if (N1CV && N1CV->isConstant()) {
7755       if (N0.getOpcode() == ISD::AND) {
7756         SDValue N00 = N0->getOperand(0);
7757         SDValue N01 = N0->getOperand(1);
7758         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7759 
7760         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7761             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7762                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7763           if (SDValue C =
7764                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
7765             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7766         }
7767       }
7768     }
7769   }
7770 
7771   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7772 
7773   // fold (shl c1, c2) -> c1<<c2
7774   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
7775     return C;
7776 
7777   if (SDValue NewSel = foldBinOpIntoSelect(N))
7778     return NewSel;
7779 
7780   // if (shl x, c) is known to be zero, return 0
7781   if (DAG.MaskedValueIsZero(SDValue(N, 0),
7782                             APInt::getAllOnesValue(OpSizeInBits)))
7783     return DAG.getConstant(0, SDLoc(N), VT);
7784 
7785   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7786   if (N1.getOpcode() == ISD::TRUNCATE &&
7787       N1.getOperand(0).getOpcode() == ISD::AND) {
7788     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7789       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7790   }
7791 
7792   if (SimplifyDemandedBits(SDValue(N, 0)))
7793     return SDValue(N, 0);
7794 
7795   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7796   if (N0.getOpcode() == ISD::SHL) {
7797     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7798                                           ConstantSDNode *RHS) {
7799       APInt c1 = LHS->getAPIntValue();
7800       APInt c2 = RHS->getAPIntValue();
7801       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7802       return (c1 + c2).uge(OpSizeInBits);
7803     };
7804     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7805       return DAG.getConstant(0, SDLoc(N), VT);
7806 
7807     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7808                                        ConstantSDNode *RHS) {
7809       APInt c1 = LHS->getAPIntValue();
7810       APInt c2 = RHS->getAPIntValue();
7811       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7812       return (c1 + c2).ult(OpSizeInBits);
7813     };
7814     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7815       SDLoc DL(N);
7816       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7817       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7818     }
7819   }
7820 
7821   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7822   // For this to be valid, the second form must not preserve any of the bits
7823   // that are shifted out by the inner shift in the first form.  This means
7824   // the outer shift size must be >= the number of bits added by the ext.
7825   // As a corollary, we don't care what kind of ext it is.
7826   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7827        N0.getOpcode() == ISD::ANY_EXTEND ||
7828        N0.getOpcode() == ISD::SIGN_EXTEND) &&
7829       N0.getOperand(0).getOpcode() == ISD::SHL) {
7830     SDValue N0Op0 = N0.getOperand(0);
7831     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7832     EVT InnerVT = N0Op0.getValueType();
7833     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7834 
7835     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7836                                                          ConstantSDNode *RHS) {
7837       APInt c1 = LHS->getAPIntValue();
7838       APInt c2 = RHS->getAPIntValue();
7839       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7840       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7841              (c1 + c2).uge(OpSizeInBits);
7842     };
7843     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7844                                   /*AllowUndefs*/ false,
7845                                   /*AllowTypeMismatch*/ true))
7846       return DAG.getConstant(0, SDLoc(N), VT);
7847 
7848     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7849                                                       ConstantSDNode *RHS) {
7850       APInt c1 = LHS->getAPIntValue();
7851       APInt c2 = RHS->getAPIntValue();
7852       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7853       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7854              (c1 + c2).ult(OpSizeInBits);
7855     };
7856     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7857                                   /*AllowUndefs*/ false,
7858                                   /*AllowTypeMismatch*/ true)) {
7859       SDLoc DL(N);
7860       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7861       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7862       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7863       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7864     }
7865   }
7866 
7867   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7868   // Only fold this if the inner zext has no other uses to avoid increasing
7869   // the total number of instructions.
7870   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7871       N0.getOperand(0).getOpcode() == ISD::SRL) {
7872     SDValue N0Op0 = N0.getOperand(0);
7873     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7874 
7875     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7876       APInt c1 = LHS->getAPIntValue();
7877       APInt c2 = RHS->getAPIntValue();
7878       zeroExtendToMatch(c1, c2);
7879       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7880     };
7881     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7882                                   /*AllowUndefs*/ false,
7883                                   /*AllowTypeMismatch*/ true)) {
7884       SDLoc DL(N);
7885       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7886       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7887       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7888       AddToWorklist(NewSHL.getNode());
7889       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7890     }
7891   }
7892 
7893   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
7894   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
7895   // TODO - support non-uniform vector shift amounts.
7896   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
7897       N0->getFlags().hasExact()) {
7898     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7899       uint64_t C1 = N0C1->getZExtValue();
7900       uint64_t C2 = N1C->getZExtValue();
7901       SDLoc DL(N);
7902       if (C1 <= C2)
7903         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7904                            DAG.getConstant(C2 - C1, DL, ShiftVT));
7905       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7906                          DAG.getConstant(C1 - C2, DL, ShiftVT));
7907     }
7908   }
7909 
7910   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7911   //                               (and (srl x, (sub c1, c2), MASK)
7912   // Only fold this if the inner shift has no other uses -- if it does, folding
7913   // this will increase the total number of instructions.
7914   // TODO - drop hasOneUse requirement if c1 == c2?
7915   // TODO - support non-uniform vector shift amounts.
7916   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
7917       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
7918     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7919       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7920         uint64_t c1 = N0C1->getZExtValue();
7921         uint64_t c2 = N1C->getZExtValue();
7922         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7923         SDValue Shift;
7924         if (c2 > c1) {
7925           Mask <<= c2 - c1;
7926           SDLoc DL(N);
7927           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7928                               DAG.getConstant(c2 - c1, DL, ShiftVT));
7929         } else {
7930           Mask.lshrInPlace(c1 - c2);
7931           SDLoc DL(N);
7932           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7933                               DAG.getConstant(c1 - c2, DL, ShiftVT));
7934         }
7935         SDLoc DL(N0);
7936         return DAG.getNode(ISD::AND, DL, VT, Shift,
7937                            DAG.getConstant(Mask, DL, VT));
7938       }
7939     }
7940   }
7941 
7942   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7943   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7944       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7945     SDLoc DL(N);
7946     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7947     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7948     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7949   }
7950 
7951   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7952   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7953   // Variant of version done on multiply, except mul by a power of 2 is turned
7954   // into a shift.
7955   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7956       N0.getNode()->hasOneUse() &&
7957       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7958       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7959       TLI.isDesirableToCommuteWithShift(N, Level)) {
7960     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7961     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7962     AddToWorklist(Shl0.getNode());
7963     AddToWorklist(Shl1.getNode());
7964     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7965   }
7966 
7967   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7968   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7969       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7970       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7971     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7972     if (isConstantOrConstantVector(Shl))
7973       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7974   }
7975 
7976   if (N1C && !N1C->isOpaque())
7977     if (SDValue NewSHL = visitShiftByConstant(N))
7978       return NewSHL;
7979 
7980   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
7981   if (N0.getOpcode() == ISD::VSCALE)
7982     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
7983       auto DL = SDLoc(N);
7984       APInt C0 = N0.getConstantOperandAPInt(0);
7985       APInt C1 = NC1->getAPIntValue();
7986       return DAG.getVScale(DL, VT, C0 << C1);
7987     }
7988 
7989   return SDValue();
7990 }
7991 
7992 // Transform a right shift of a multiply into a multiply-high.
7993 // Examples:
7994 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
7995 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
7996 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
7997                                   const TargetLowering &TLI) {
7998   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
7999          "SRL or SRA node is required here!");
8000 
8001   // Check the shift amount. Proceed with the transformation if the shift
8002   // amount is constant.
8003   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8004   if (!ShiftAmtSrc)
8005     return SDValue();
8006 
8007   SDLoc DL(N);
8008 
8009   // The operation feeding into the shift must be a multiply.
8010   SDValue ShiftOperand = N->getOperand(0);
8011   if (ShiftOperand.getOpcode() != ISD::MUL)
8012     return SDValue();
8013 
8014   // Both operands must be equivalent extend nodes.
8015   SDValue LeftOp = ShiftOperand.getOperand(0);
8016   SDValue RightOp = ShiftOperand.getOperand(1);
8017   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8018   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8019 
8020   if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8021     return SDValue();
8022 
8023   EVT WideVT1 = LeftOp.getValueType();
8024   EVT WideVT2 = RightOp.getValueType();
8025   (void)WideVT2;
8026   // Proceed with the transformation if the wide types match.
8027   assert((WideVT1 == WideVT2) &&
8028          "Cannot have a multiply node with two different operand types.");
8029 
8030   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8031   // Check that the two extend nodes are the same type.
8032   if (NarrowVT !=  RightOp.getOperand(0).getValueType())
8033     return SDValue();
8034 
8035   // Only transform into mulh if mulh for the narrow type is cheaper than
8036   // a multiply followed by a shift. This should also check if mulh is
8037   // legal for NarrowVT on the target.
8038   if (!TLI.isMulhCheaperThanMulShift(NarrowVT))
8039       return SDValue();
8040 
8041   // Proceed with the transformation if the wide type is twice as large
8042   // as the narrow type.
8043   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8044   if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8045     return SDValue();
8046 
8047   // Check the shift amount with the narrow type size.
8048   // Proceed with the transformation if the shift amount is the width
8049   // of the narrow type.
8050   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8051   if (ShiftAmt != NarrowVTSize)
8052     return SDValue();
8053 
8054   // If the operation feeding into the MUL is a sign extend (sext),
8055   // we use mulhs. Othewise, zero extends (zext) use mulhu.
8056   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8057 
8058   SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8059                                RightOp.getOperand(0));
8060   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8061                                      : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8062 }
8063 
8064 SDValue DAGCombiner::visitSRA(SDNode *N) {
8065   SDValue N0 = N->getOperand(0);
8066   SDValue N1 = N->getOperand(1);
8067   if (SDValue V = DAG.simplifyShift(N0, N1))
8068     return V;
8069 
8070   EVT VT = N0.getValueType();
8071   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8072 
8073   // Arithmetic shifting an all-sign-bit value is a no-op.
8074   // fold (sra 0, x) -> 0
8075   // fold (sra -1, x) -> -1
8076   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8077     return N0;
8078 
8079   // fold vector ops
8080   if (VT.isVector())
8081     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8082       return FoldedVOp;
8083 
8084   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8085 
8086   // fold (sra c1, c2) -> (sra c1, c2)
8087   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8088     return C;
8089 
8090   if (SDValue NewSel = foldBinOpIntoSelect(N))
8091     return NewSel;
8092 
8093   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8094   // sext_inreg.
8095   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8096     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8097     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8098     if (VT.isVector())
8099       ExtVT = EVT::getVectorVT(*DAG.getContext(),
8100                                ExtVT, VT.getVectorNumElements());
8101     if (!LegalOperations ||
8102         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8103         TargetLowering::Legal)
8104       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8105                          N0.getOperand(0), DAG.getValueType(ExtVT));
8106   }
8107 
8108   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8109   // clamp (add c1, c2) to max shift.
8110   if (N0.getOpcode() == ISD::SRA) {
8111     SDLoc DL(N);
8112     EVT ShiftVT = N1.getValueType();
8113     EVT ShiftSVT = ShiftVT.getScalarType();
8114     SmallVector<SDValue, 16> ShiftValues;
8115 
8116     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8117       APInt c1 = LHS->getAPIntValue();
8118       APInt c2 = RHS->getAPIntValue();
8119       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8120       APInt Sum = c1 + c2;
8121       unsigned ShiftSum =
8122           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8123       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8124       return true;
8125     };
8126     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8127       SDValue ShiftValue;
8128       if (VT.isVector())
8129         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8130       else
8131         ShiftValue = ShiftValues[0];
8132       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8133     }
8134   }
8135 
8136   // fold (sra (shl X, m), (sub result_size, n))
8137   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8138   // result_size - n != m.
8139   // If truncate is free for the target sext(shl) is likely to result in better
8140   // code.
8141   if (N0.getOpcode() == ISD::SHL && N1C) {
8142     // Get the two constanst of the shifts, CN0 = m, CN = n.
8143     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8144     if (N01C) {
8145       LLVMContext &Ctx = *DAG.getContext();
8146       // Determine what the truncate's result bitsize and type would be.
8147       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8148 
8149       if (VT.isVector())
8150         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8151 
8152       // Determine the residual right-shift amount.
8153       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8154 
8155       // If the shift is not a no-op (in which case this should be just a sign
8156       // extend already), the truncated to type is legal, sign_extend is legal
8157       // on that type, and the truncate to that type is both legal and free,
8158       // perform the transform.
8159       if ((ShiftAmt > 0) &&
8160           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8161           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8162           TLI.isTruncateFree(VT, TruncVT)) {
8163         SDLoc DL(N);
8164         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8165             getShiftAmountTy(N0.getOperand(0).getValueType()));
8166         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8167                                     N0.getOperand(0), Amt);
8168         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8169                                     Shift);
8170         return DAG.getNode(ISD::SIGN_EXTEND, DL,
8171                            N->getValueType(0), Trunc);
8172       }
8173     }
8174   }
8175 
8176   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8177   //   sra (add (shl X, N1C), AddC), N1C -->
8178   //   sext (add (trunc X to (width - N1C)), AddC')
8179   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8180       N0.getOperand(0).getOpcode() == ISD::SHL &&
8181       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8182     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8183       SDValue Shl = N0.getOperand(0);
8184       // Determine what the truncate's type would be and ask the target if that
8185       // is a free operation.
8186       LLVMContext &Ctx = *DAG.getContext();
8187       unsigned ShiftAmt = N1C->getZExtValue();
8188       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8189       if (VT.isVector())
8190         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8191 
8192       // TODO: The simple type check probably belongs in the default hook
8193       //       implementation and/or target-specific overrides (because
8194       //       non-simple types likely require masking when legalized), but that
8195       //       restriction may conflict with other transforms.
8196       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8197           TLI.isTruncateFree(VT, TruncVT)) {
8198         SDLoc DL(N);
8199         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8200         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8201                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8202         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8203         return DAG.getSExtOrTrunc(Add, DL, VT);
8204       }
8205     }
8206   }
8207 
8208   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8209   if (N1.getOpcode() == ISD::TRUNCATE &&
8210       N1.getOperand(0).getOpcode() == ISD::AND) {
8211     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8212       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8213   }
8214 
8215   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8216   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8217   //      if c1 is equal to the number of bits the trunc removes
8218   // TODO - support non-uniform vector shift amounts.
8219   if (N0.getOpcode() == ISD::TRUNCATE &&
8220       (N0.getOperand(0).getOpcode() == ISD::SRL ||
8221        N0.getOperand(0).getOpcode() == ISD::SRA) &&
8222       N0.getOperand(0).hasOneUse() &&
8223       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8224     SDValue N0Op0 = N0.getOperand(0);
8225     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8226       EVT LargeVT = N0Op0.getValueType();
8227       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8228       if (LargeShift->getAPIntValue() == TruncBits) {
8229         SDLoc DL(N);
8230         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8231                                       getShiftAmountTy(LargeVT));
8232         SDValue SRA =
8233             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8234         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8235       }
8236     }
8237   }
8238 
8239   // Simplify, based on bits shifted out of the LHS.
8240   if (SimplifyDemandedBits(SDValue(N, 0)))
8241     return SDValue(N, 0);
8242 
8243   // If the sign bit is known to be zero, switch this to a SRL.
8244   if (DAG.SignBitIsZero(N0))
8245     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8246 
8247   if (N1C && !N1C->isOpaque())
8248     if (SDValue NewSRA = visitShiftByConstant(N))
8249       return NewSRA;
8250 
8251   // Try to transform this shift into a multiply-high if
8252   // it matches the appropriate pattern detected in combineShiftToMULH.
8253   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8254     return MULH;
8255 
8256   return SDValue();
8257 }
8258 
8259 SDValue DAGCombiner::visitSRL(SDNode *N) {
8260   SDValue N0 = N->getOperand(0);
8261   SDValue N1 = N->getOperand(1);
8262   if (SDValue V = DAG.simplifyShift(N0, N1))
8263     return V;
8264 
8265   EVT VT = N0.getValueType();
8266   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8267 
8268   // fold vector ops
8269   if (VT.isVector())
8270     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8271       return FoldedVOp;
8272 
8273   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8274 
8275   // fold (srl c1, c2) -> c1 >>u c2
8276   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8277     return C;
8278 
8279   if (SDValue NewSel = foldBinOpIntoSelect(N))
8280     return NewSel;
8281 
8282   // if (srl x, c) is known to be zero, return 0
8283   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8284                                    APInt::getAllOnesValue(OpSizeInBits)))
8285     return DAG.getConstant(0, SDLoc(N), VT);
8286 
8287   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8288   if (N0.getOpcode() == ISD::SRL) {
8289     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8290                                           ConstantSDNode *RHS) {
8291       APInt c1 = LHS->getAPIntValue();
8292       APInt c2 = RHS->getAPIntValue();
8293       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8294       return (c1 + c2).uge(OpSizeInBits);
8295     };
8296     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8297       return DAG.getConstant(0, SDLoc(N), VT);
8298 
8299     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8300                                        ConstantSDNode *RHS) {
8301       APInt c1 = LHS->getAPIntValue();
8302       APInt c2 = RHS->getAPIntValue();
8303       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8304       return (c1 + c2).ult(OpSizeInBits);
8305     };
8306     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8307       SDLoc DL(N);
8308       EVT ShiftVT = N1.getValueType();
8309       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8310       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8311     }
8312   }
8313 
8314   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8315       N0.getOperand(0).getOpcode() == ISD::SRL) {
8316     SDValue InnerShift = N0.getOperand(0);
8317     // TODO - support non-uniform vector shift amounts.
8318     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8319       uint64_t c1 = N001C->getZExtValue();
8320       uint64_t c2 = N1C->getZExtValue();
8321       EVT InnerShiftVT = InnerShift.getValueType();
8322       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8323       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8324       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8325       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8326       if (c1 + OpSizeInBits == InnerShiftSize) {
8327         SDLoc DL(N);
8328         if (c1 + c2 >= InnerShiftSize)
8329           return DAG.getConstant(0, DL, VT);
8330         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8331         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8332                                        InnerShift.getOperand(0), NewShiftAmt);
8333         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8334       }
8335       // In the more general case, we can clear the high bits after the shift:
8336       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8337       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8338           c1 + c2 < InnerShiftSize) {
8339         SDLoc DL(N);
8340         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8341         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8342                                        InnerShift.getOperand(0), NewShiftAmt);
8343         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8344                                                             OpSizeInBits - c2),
8345                                        DL, InnerShiftVT);
8346         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8347         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8348       }
8349     }
8350   }
8351 
8352   // fold (srl (shl x, c), c) -> (and x, cst2)
8353   // TODO - (srl (shl x, c1), c2).
8354   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8355       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8356     SDLoc DL(N);
8357     SDValue Mask =
8358         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8359     AddToWorklist(Mask.getNode());
8360     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8361   }
8362 
8363   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8364   // TODO - support non-uniform vector shift amounts.
8365   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8366     // Shifting in all undef bits?
8367     EVT SmallVT = N0.getOperand(0).getValueType();
8368     unsigned BitSize = SmallVT.getScalarSizeInBits();
8369     if (N1C->getAPIntValue().uge(BitSize))
8370       return DAG.getUNDEF(VT);
8371 
8372     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8373       uint64_t ShiftAmt = N1C->getZExtValue();
8374       SDLoc DL0(N0);
8375       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8376                                        N0.getOperand(0),
8377                           DAG.getConstant(ShiftAmt, DL0,
8378                                           getShiftAmountTy(SmallVT)));
8379       AddToWorklist(SmallShift.getNode());
8380       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8381       SDLoc DL(N);
8382       return DAG.getNode(ISD::AND, DL, VT,
8383                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8384                          DAG.getConstant(Mask, DL, VT));
8385     }
8386   }
8387 
8388   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
8389   // bit, which is unmodified by sra.
8390   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8391     if (N0.getOpcode() == ISD::SRA)
8392       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8393   }
8394 
8395   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
8396   if (N1C && N0.getOpcode() == ISD::CTLZ &&
8397       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8398     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8399 
8400     // If any of the input bits are KnownOne, then the input couldn't be all
8401     // zeros, thus the result of the srl will always be zero.
8402     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8403 
8404     // If all of the bits input the to ctlz node are known to be zero, then
8405     // the result of the ctlz is "32" and the result of the shift is one.
8406     APInt UnknownBits = ~Known.Zero;
8407     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8408 
8409     // Otherwise, check to see if there is exactly one bit input to the ctlz.
8410     if (UnknownBits.isPowerOf2()) {
8411       // Okay, we know that only that the single bit specified by UnknownBits
8412       // could be set on input to the CTLZ node. If this bit is set, the SRL
8413       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8414       // to an SRL/XOR pair, which is likely to simplify more.
8415       unsigned ShAmt = UnknownBits.countTrailingZeros();
8416       SDValue Op = N0.getOperand(0);
8417 
8418       if (ShAmt) {
8419         SDLoc DL(N0);
8420         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8421                   DAG.getConstant(ShAmt, DL,
8422                                   getShiftAmountTy(Op.getValueType())));
8423         AddToWorklist(Op.getNode());
8424       }
8425 
8426       SDLoc DL(N);
8427       return DAG.getNode(ISD::XOR, DL, VT,
8428                          Op, DAG.getConstant(1, DL, VT));
8429     }
8430   }
8431 
8432   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8433   if (N1.getOpcode() == ISD::TRUNCATE &&
8434       N1.getOperand(0).getOpcode() == ISD::AND) {
8435     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8436       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8437   }
8438 
8439   // fold operands of srl based on knowledge that the low bits are not
8440   // demanded.
8441   if (SimplifyDemandedBits(SDValue(N, 0)))
8442     return SDValue(N, 0);
8443 
8444   if (N1C && !N1C->isOpaque())
8445     if (SDValue NewSRL = visitShiftByConstant(N))
8446       return NewSRL;
8447 
8448   // Attempt to convert a srl of a load into a narrower zero-extending load.
8449   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8450     return NarrowLoad;
8451 
8452   // Here is a common situation. We want to optimize:
8453   //
8454   //   %a = ...
8455   //   %b = and i32 %a, 2
8456   //   %c = srl i32 %b, 1
8457   //   brcond i32 %c ...
8458   //
8459   // into
8460   //
8461   //   %a = ...
8462   //   %b = and %a, 2
8463   //   %c = setcc eq %b, 0
8464   //   brcond %c ...
8465   //
8466   // However when after the source operand of SRL is optimized into AND, the SRL
8467   // itself may not be optimized further. Look for it and add the BRCOND into
8468   // the worklist.
8469   if (N->hasOneUse()) {
8470     SDNode *Use = *N->use_begin();
8471     if (Use->getOpcode() == ISD::BRCOND)
8472       AddToWorklist(Use);
8473     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8474       // Also look pass the truncate.
8475       Use = *Use->use_begin();
8476       if (Use->getOpcode() == ISD::BRCOND)
8477         AddToWorklist(Use);
8478     }
8479   }
8480 
8481   // Try to transform this shift into a multiply-high if
8482   // it matches the appropriate pattern detected in combineShiftToMULH.
8483   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8484     return MULH;
8485 
8486   return SDValue();
8487 }
8488 
8489 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8490   EVT VT = N->getValueType(0);
8491   SDValue N0 = N->getOperand(0);
8492   SDValue N1 = N->getOperand(1);
8493   SDValue N2 = N->getOperand(2);
8494   bool IsFSHL = N->getOpcode() == ISD::FSHL;
8495   unsigned BitWidth = VT.getScalarSizeInBits();
8496 
8497   // fold (fshl N0, N1, 0) -> N0
8498   // fold (fshr N0, N1, 0) -> N1
8499   if (isPowerOf2_32(BitWidth))
8500     if (DAG.MaskedValueIsZero(
8501             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8502       return IsFSHL ? N0 : N1;
8503 
8504   auto IsUndefOrZero = [](SDValue V) {
8505     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8506   };
8507 
8508   // TODO - support non-uniform vector shift amounts.
8509   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8510     EVT ShAmtTy = N2.getValueType();
8511 
8512     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8513     if (Cst->getAPIntValue().uge(BitWidth)) {
8514       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8515       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8516                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8517     }
8518 
8519     unsigned ShAmt = Cst->getZExtValue();
8520     if (ShAmt == 0)
8521       return IsFSHL ? N0 : N1;
8522 
8523     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8524     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8525     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8526     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8527     if (IsUndefOrZero(N0))
8528       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8529                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8530                                          SDLoc(N), ShAmtTy));
8531     if (IsUndefOrZero(N1))
8532       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8533                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8534                                          SDLoc(N), ShAmtTy));
8535 
8536     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8537     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8538     // TODO - bigendian support once we have test coverage.
8539     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
8540     // TODO - permit LHS EXTLOAD if extensions are shifted out.
8541     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
8542         !DAG.getDataLayout().isBigEndian()) {
8543       auto *LHS = dyn_cast<LoadSDNode>(N0);
8544       auto *RHS = dyn_cast<LoadSDNode>(N1);
8545       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
8546           LHS->getAddressSpace() == RHS->getAddressSpace() &&
8547           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
8548           ISD::isNON_EXTLoad(LHS)) {
8549         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
8550           SDLoc DL(RHS);
8551           uint64_t PtrOff =
8552               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
8553           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
8554           bool Fast = false;
8555           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8556                                      RHS->getAddressSpace(), NewAlign,
8557                                      RHS->getMemOperand()->getFlags(), &Fast) &&
8558               Fast) {
8559             SDValue NewPtr =
8560                 DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL);
8561             AddToWorklist(NewPtr.getNode());
8562             SDValue Load = DAG.getLoad(
8563                 VT, DL, RHS->getChain(), NewPtr,
8564                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8565                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
8566             // Replace the old load's chain with the new load's chain.
8567             WorklistRemover DeadNodes(*this);
8568             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
8569             return Load;
8570           }
8571         }
8572       }
8573     }
8574   }
8575 
8576   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
8577   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
8578   // iff We know the shift amount is in range.
8579   // TODO: when is it worth doing SUB(BW, N2) as well?
8580   if (isPowerOf2_32(BitWidth)) {
8581     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
8582     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8583       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
8584     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8585       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
8586   }
8587 
8588   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
8589   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
8590   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
8591   // is legal as well we might be better off avoiding non-constant (BW - N2).
8592   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8593   if (N0 == N1 && hasOperation(RotOpc, VT))
8594     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8595 
8596   // Simplify, based on bits shifted out of N0/N1.
8597   if (SimplifyDemandedBits(SDValue(N, 0)))
8598     return SDValue(N, 0);
8599 
8600   return SDValue();
8601 }
8602 
8603 SDValue DAGCombiner::visitABS(SDNode *N) {
8604   SDValue N0 = N->getOperand(0);
8605   EVT VT = N->getValueType(0);
8606 
8607   // fold (abs c1) -> c2
8608   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8609     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8610   // fold (abs (abs x)) -> (abs x)
8611   if (N0.getOpcode() == ISD::ABS)
8612     return N0;
8613   // fold (abs x) -> x iff not-negative
8614   if (DAG.SignBitIsZero(N0))
8615     return N0;
8616   return SDValue();
8617 }
8618 
8619 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
8620   SDValue N0 = N->getOperand(0);
8621   EVT VT = N->getValueType(0);
8622 
8623   // fold (bswap c1) -> c2
8624   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8625     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
8626   // fold (bswap (bswap x)) -> x
8627   if (N0.getOpcode() == ISD::BSWAP)
8628     return N0->getOperand(0);
8629   return SDValue();
8630 }
8631 
8632 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
8633   SDValue N0 = N->getOperand(0);
8634   EVT VT = N->getValueType(0);
8635 
8636   // fold (bitreverse c1) -> c2
8637   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8638     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
8639   // fold (bitreverse (bitreverse x)) -> x
8640   if (N0.getOpcode() == ISD::BITREVERSE)
8641     return N0.getOperand(0);
8642   return SDValue();
8643 }
8644 
8645 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
8646   SDValue N0 = N->getOperand(0);
8647   EVT VT = N->getValueType(0);
8648 
8649   // fold (ctlz c1) -> c2
8650   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8651     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
8652 
8653   // If the value is known never to be zero, switch to the undef version.
8654   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
8655     if (DAG.isKnownNeverZero(N0))
8656       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8657   }
8658 
8659   return SDValue();
8660 }
8661 
8662 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
8663   SDValue N0 = N->getOperand(0);
8664   EVT VT = N->getValueType(0);
8665 
8666   // fold (ctlz_zero_undef c1) -> c2
8667   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8668     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8669   return SDValue();
8670 }
8671 
8672 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8673   SDValue N0 = N->getOperand(0);
8674   EVT VT = N->getValueType(0);
8675 
8676   // fold (cttz c1) -> c2
8677   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8678     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8679 
8680   // If the value is known never to be zero, switch to the undef version.
8681   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8682     if (DAG.isKnownNeverZero(N0))
8683       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8684   }
8685 
8686   return SDValue();
8687 }
8688 
8689 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8690   SDValue N0 = N->getOperand(0);
8691   EVT VT = N->getValueType(0);
8692 
8693   // fold (cttz_zero_undef c1) -> c2
8694   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8695     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8696   return SDValue();
8697 }
8698 
8699 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8700   SDValue N0 = N->getOperand(0);
8701   EVT VT = N->getValueType(0);
8702 
8703   // fold (ctpop c1) -> c2
8704   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8705     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8706   return SDValue();
8707 }
8708 
8709 // FIXME: This should be checking for no signed zeros on individual operands, as
8710 // well as no nans.
8711 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8712                                          SDValue RHS,
8713                                          const TargetLowering &TLI) {
8714   const TargetOptions &Options = DAG.getTarget().Options;
8715   EVT VT = LHS.getValueType();
8716 
8717   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8718          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8719          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8720 }
8721 
8722 /// Generate Min/Max node
8723 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8724                                    SDValue RHS, SDValue True, SDValue False,
8725                                    ISD::CondCode CC, const TargetLowering &TLI,
8726                                    SelectionDAG &DAG) {
8727   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8728     return SDValue();
8729 
8730   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8731   switch (CC) {
8732   case ISD::SETOLT:
8733   case ISD::SETOLE:
8734   case ISD::SETLT:
8735   case ISD::SETLE:
8736   case ISD::SETULT:
8737   case ISD::SETULE: {
8738     // Since it's known never nan to get here already, either fminnum or
8739     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8740     // expanded in terms of it.
8741     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8742     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8743       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8744 
8745     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8746     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8747       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8748     return SDValue();
8749   }
8750   case ISD::SETOGT:
8751   case ISD::SETOGE:
8752   case ISD::SETGT:
8753   case ISD::SETGE:
8754   case ISD::SETUGT:
8755   case ISD::SETUGE: {
8756     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8757     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8758       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8759 
8760     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8761     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8762       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8763     return SDValue();
8764   }
8765   default:
8766     return SDValue();
8767   }
8768 }
8769 
8770 /// If a (v)select has a condition value that is a sign-bit test, try to smear
8771 /// the condition operand sign-bit across the value width and use it as a mask.
8772 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
8773   SDValue Cond = N->getOperand(0);
8774   SDValue C1 = N->getOperand(1);
8775   SDValue C2 = N->getOperand(2);
8776   assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
8777          "Expected select-of-constants");
8778 
8779   EVT VT = N->getValueType(0);
8780   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
8781       VT != Cond.getOperand(0).getValueType())
8782     return SDValue();
8783 
8784   // The inverted-condition + commuted-select variants of these patterns are
8785   // canonicalized to these forms in IR.
8786   SDValue X = Cond.getOperand(0);
8787   SDValue CondC = Cond.getOperand(1);
8788   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
8789   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
8790       isAllOnesOrAllOnesSplat(C2)) {
8791     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
8792     SDLoc DL(N);
8793     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8794     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8795     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
8796   }
8797   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
8798     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
8799     SDLoc DL(N);
8800     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8801     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8802     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
8803   }
8804   return SDValue();
8805 }
8806 
8807 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8808   SDValue Cond = N->getOperand(0);
8809   SDValue N1 = N->getOperand(1);
8810   SDValue N2 = N->getOperand(2);
8811   EVT VT = N->getValueType(0);
8812   EVT CondVT = Cond.getValueType();
8813   SDLoc DL(N);
8814 
8815   if (!VT.isInteger())
8816     return SDValue();
8817 
8818   auto *C1 = dyn_cast<ConstantSDNode>(N1);
8819   auto *C2 = dyn_cast<ConstantSDNode>(N2);
8820   if (!C1 || !C2)
8821     return SDValue();
8822 
8823   // Only do this before legalization to avoid conflicting with target-specific
8824   // transforms in the other direction (create a select from a zext/sext). There
8825   // is also a target-independent combine here in DAGCombiner in the other
8826   // direction for (select Cond, -1, 0) when the condition is not i1.
8827   if (CondVT == MVT::i1 && !LegalOperations) {
8828     if (C1->isNullValue() && C2->isOne()) {
8829       // select Cond, 0, 1 --> zext (!Cond)
8830       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8831       if (VT != MVT::i1)
8832         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8833       return NotCond;
8834     }
8835     if (C1->isNullValue() && C2->isAllOnesValue()) {
8836       // select Cond, 0, -1 --> sext (!Cond)
8837       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8838       if (VT != MVT::i1)
8839         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8840       return NotCond;
8841     }
8842     if (C1->isOne() && C2->isNullValue()) {
8843       // select Cond, 1, 0 --> zext (Cond)
8844       if (VT != MVT::i1)
8845         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8846       return Cond;
8847     }
8848     if (C1->isAllOnesValue() && C2->isNullValue()) {
8849       // select Cond, -1, 0 --> sext (Cond)
8850       if (VT != MVT::i1)
8851         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8852       return Cond;
8853     }
8854 
8855     // Use a target hook because some targets may prefer to transform in the
8856     // other direction.
8857     if (TLI.convertSelectOfConstantsToMath(VT)) {
8858       // For any constants that differ by 1, we can transform the select into an
8859       // extend and add.
8860       const APInt &C1Val = C1->getAPIntValue();
8861       const APInt &C2Val = C2->getAPIntValue();
8862       if (C1Val - 1 == C2Val) {
8863         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8864         if (VT != MVT::i1)
8865           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8866         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8867       }
8868       if (C1Val + 1 == C2Val) {
8869         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8870         if (VT != MVT::i1)
8871           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8872         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8873       }
8874 
8875       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
8876       if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
8877         if (VT != MVT::i1)
8878           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8879         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
8880         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
8881       }
8882 
8883       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
8884         return V;
8885     }
8886 
8887     return SDValue();
8888   }
8889 
8890   // fold (select Cond, 0, 1) -> (xor Cond, 1)
8891   // We can't do this reliably if integer based booleans have different contents
8892   // to floating point based booleans. This is because we can't tell whether we
8893   // have an integer-based boolean or a floating-point-based boolean unless we
8894   // can find the SETCC that produced it and inspect its operands. This is
8895   // fairly easy if C is the SETCC node, but it can potentially be
8896   // undiscoverable (or not reasonably discoverable). For example, it could be
8897   // in another basic block or it could require searching a complicated
8898   // expression.
8899   if (CondVT.isInteger() &&
8900       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8901           TargetLowering::ZeroOrOneBooleanContent &&
8902       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8903           TargetLowering::ZeroOrOneBooleanContent &&
8904       C1->isNullValue() && C2->isOne()) {
8905     SDValue NotCond =
8906         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8907     if (VT.bitsEq(CondVT))
8908       return NotCond;
8909     return DAG.getZExtOrTrunc(NotCond, DL, VT);
8910   }
8911 
8912   return SDValue();
8913 }
8914 
8915 SDValue DAGCombiner::visitSELECT(SDNode *N) {
8916   SDValue N0 = N->getOperand(0);
8917   SDValue N1 = N->getOperand(1);
8918   SDValue N2 = N->getOperand(2);
8919   EVT VT = N->getValueType(0);
8920   EVT VT0 = N0.getValueType();
8921   SDLoc DL(N);
8922   SDNodeFlags Flags = N->getFlags();
8923 
8924   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8925     return V;
8926 
8927   // fold (select X, X, Y) -> (or X, Y)
8928   // fold (select X, 1, Y) -> (or C, Y)
8929   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
8930     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8931 
8932   if (SDValue V = foldSelectOfConstants(N))
8933     return V;
8934 
8935   // fold (select C, 0, X) -> (and (not C), X)
8936   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
8937     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8938     AddToWorklist(NOTNode.getNode());
8939     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8940   }
8941   // fold (select C, X, 1) -> (or (not C), X)
8942   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
8943     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8944     AddToWorklist(NOTNode.getNode());
8945     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8946   }
8947   // fold (select X, Y, X) -> (and X, Y)
8948   // fold (select X, Y, 0) -> (and X, Y)
8949   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
8950     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8951 
8952   // If we can fold this based on the true/false value, do so.
8953   if (SimplifySelectOps(N, N1, N2))
8954     return SDValue(N, 0); // Don't revisit N.
8955 
8956   if (VT0 == MVT::i1) {
8957     // The code in this block deals with the following 2 equivalences:
8958     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8959     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8960     // The target can specify its preferred form with the
8961     // shouldNormalizeToSelectSequence() callback. However we always transform
8962     // to the right anyway if we find the inner select exists in the DAG anyway
8963     // and we always transform to the left side if we know that we can further
8964     // optimize the combination of the conditions.
8965     bool normalizeToSequence =
8966         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
8967     // select (and Cond0, Cond1), X, Y
8968     //   -> select Cond0, (select Cond1, X, Y), Y
8969     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
8970       SDValue Cond0 = N0->getOperand(0);
8971       SDValue Cond1 = N0->getOperand(1);
8972       SDValue InnerSelect =
8973           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8974       if (normalizeToSequence || !InnerSelect.use_empty())
8975         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8976                            InnerSelect, N2, Flags);
8977       // Cleanup on failure.
8978       if (InnerSelect.use_empty())
8979         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8980     }
8981     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8982     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
8983       SDValue Cond0 = N0->getOperand(0);
8984       SDValue Cond1 = N0->getOperand(1);
8985       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8986                                         Cond1, N1, N2, Flags);
8987       if (normalizeToSequence || !InnerSelect.use_empty())
8988         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8989                            InnerSelect, Flags);
8990       // Cleanup on failure.
8991       if (InnerSelect.use_empty())
8992         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8993     }
8994 
8995     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8996     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
8997       SDValue N1_0 = N1->getOperand(0);
8998       SDValue N1_1 = N1->getOperand(1);
8999       SDValue N1_2 = N1->getOperand(2);
9000       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9001         // Create the actual and node if we can generate good code for it.
9002         if (!normalizeToSequence) {
9003           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9004           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9005                              N2, Flags);
9006         }
9007         // Otherwise see if we can optimize the "and" to a better pattern.
9008         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9009           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9010                              N2, Flags);
9011         }
9012       }
9013     }
9014     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9015     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9016       SDValue N2_0 = N2->getOperand(0);
9017       SDValue N2_1 = N2->getOperand(1);
9018       SDValue N2_2 = N2->getOperand(2);
9019       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9020         // Create the actual or node if we can generate good code for it.
9021         if (!normalizeToSequence) {
9022           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9023           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9024                              N2_2, Flags);
9025         }
9026         // Otherwise see if we can optimize to a better pattern.
9027         if (SDValue Combined = visitORLike(N0, N2_0, N))
9028           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9029                              N2_2, Flags);
9030       }
9031     }
9032   }
9033 
9034   // select (not Cond), N1, N2 -> select Cond, N2, N1
9035   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9036     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9037     SelectOp->setFlags(Flags);
9038     return SelectOp;
9039   }
9040 
9041   // Fold selects based on a setcc into other things, such as min/max/abs.
9042   if (N0.getOpcode() == ISD::SETCC) {
9043     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9044     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9045 
9046     // select (fcmp lt x, y), x, y -> fminnum x, y
9047     // select (fcmp gt x, y), x, y -> fmaxnum x, y
9048     //
9049     // This is OK if we don't care what happens if either operand is a NaN.
9050     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9051       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9052                                                 CC, TLI, DAG))
9053         return FMinMax;
9054 
9055     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9056     // This is conservatively limited to pre-legal-operations to give targets
9057     // a chance to reverse the transform if they want to do that. Also, it is
9058     // unlikely that the pattern would be formed late, so it's probably not
9059     // worth going through the other checks.
9060     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9061         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9062         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9063       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9064       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9065       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9066         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9067         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9068         //
9069         // The IR equivalent of this transform would have this form:
9070         //   %a = add %x, C
9071         //   %c = icmp ugt %x, ~C
9072         //   %r = select %c, -1, %a
9073         //   =>
9074         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9075         //   %u0 = extractvalue %u, 0
9076         //   %u1 = extractvalue %u, 1
9077         //   %r = select %u1, -1, %u0
9078         SDVTList VTs = DAG.getVTList(VT, VT0);
9079         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9080         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9081       }
9082     }
9083 
9084     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9085         (!LegalOperations &&
9086          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9087       // Any flags available in a select/setcc fold will be on the setcc as they
9088       // migrated from fcmp
9089       Flags = N0.getNode()->getFlags();
9090       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9091                                        N2, N0.getOperand(2));
9092       SelectNode->setFlags(Flags);
9093       return SelectNode;
9094     }
9095 
9096     return SimplifySelect(DL, N0, N1, N2);
9097   }
9098 
9099   return SDValue();
9100 }
9101 
9102 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9103 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9104 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9105   SDLoc DL(N);
9106   SDValue Cond = N->getOperand(0);
9107   SDValue LHS = N->getOperand(1);
9108   SDValue RHS = N->getOperand(2);
9109   EVT VT = N->getValueType(0);
9110   int NumElems = VT.getVectorNumElements();
9111   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9112          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9113          Cond.getOpcode() == ISD::BUILD_VECTOR);
9114 
9115   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9116   // binary ones here.
9117   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9118     return SDValue();
9119 
9120   // We're sure we have an even number of elements due to the
9121   // concat_vectors we have as arguments to vselect.
9122   // Skip BV elements until we find one that's not an UNDEF
9123   // After we find an UNDEF element, keep looping until we get to half the
9124   // length of the BV and see if all the non-undef nodes are the same.
9125   ConstantSDNode *BottomHalf = nullptr;
9126   for (int i = 0; i < NumElems / 2; ++i) {
9127     if (Cond->getOperand(i)->isUndef())
9128       continue;
9129 
9130     if (BottomHalf == nullptr)
9131       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9132     else if (Cond->getOperand(i).getNode() != BottomHalf)
9133       return SDValue();
9134   }
9135 
9136   // Do the same for the second half of the BuildVector
9137   ConstantSDNode *TopHalf = nullptr;
9138   for (int i = NumElems / 2; i < NumElems; ++i) {
9139     if (Cond->getOperand(i)->isUndef())
9140       continue;
9141 
9142     if (TopHalf == nullptr)
9143       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9144     else if (Cond->getOperand(i).getNode() != TopHalf)
9145       return SDValue();
9146   }
9147 
9148   assert(TopHalf && BottomHalf &&
9149          "One half of the selector was all UNDEFs and the other was all the "
9150          "same value. This should have been addressed before this function.");
9151   return DAG.getNode(
9152       ISD::CONCAT_VECTORS, DL, VT,
9153       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9154       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9155 }
9156 
9157 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9158   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9159   SDValue Mask = MSC->getMask();
9160   SDValue Chain = MSC->getChain();
9161   SDLoc DL(N);
9162 
9163   // Zap scatters with a zero mask.
9164   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9165     return Chain;
9166 
9167   return SDValue();
9168 }
9169 
9170 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9171   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9172   SDValue Mask = MST->getMask();
9173   SDValue Chain = MST->getChain();
9174   SDLoc DL(N);
9175 
9176   // Zap masked stores with a zero mask.
9177   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9178     return Chain;
9179 
9180   // Try transforming N to an indexed store.
9181   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9182     return SDValue(N, 0);
9183 
9184   return SDValue();
9185 }
9186 
9187 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9188   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9189   SDValue Mask = MGT->getMask();
9190   SDLoc DL(N);
9191 
9192   // Zap gathers with a zero mask.
9193   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9194     return CombineTo(N, MGT->getPassThru(), MGT->getChain());
9195 
9196   return SDValue();
9197 }
9198 
9199 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9200   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9201   SDValue Mask = MLD->getMask();
9202   SDLoc DL(N);
9203 
9204   // Zap masked loads with a zero mask.
9205   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9206     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9207 
9208   // Try transforming N to an indexed load.
9209   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9210     return SDValue(N, 0);
9211 
9212   return SDValue();
9213 }
9214 
9215 /// A vector select of 2 constant vectors can be simplified to math/logic to
9216 /// avoid a variable select instruction and possibly avoid constant loads.
9217 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9218   SDValue Cond = N->getOperand(0);
9219   SDValue N1 = N->getOperand(1);
9220   SDValue N2 = N->getOperand(2);
9221   EVT VT = N->getValueType(0);
9222   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9223       !TLI.convertSelectOfConstantsToMath(VT) ||
9224       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9225       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9226     return SDValue();
9227 
9228   // Check if we can use the condition value to increment/decrement a single
9229   // constant value. This simplifies a select to an add and removes a constant
9230   // load/materialization from the general case.
9231   bool AllAddOne = true;
9232   bool AllSubOne = true;
9233   unsigned Elts = VT.getVectorNumElements();
9234   for (unsigned i = 0; i != Elts; ++i) {
9235     SDValue N1Elt = N1.getOperand(i);
9236     SDValue N2Elt = N2.getOperand(i);
9237     if (N1Elt.isUndef() || N2Elt.isUndef())
9238       continue;
9239     if (N1Elt.getValueType() != N2Elt.getValueType())
9240       continue;
9241 
9242     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9243     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9244     if (C1 != C2 + 1)
9245       AllAddOne = false;
9246     if (C1 != C2 - 1)
9247       AllSubOne = false;
9248   }
9249 
9250   // Further simplifications for the extra-special cases where the constants are
9251   // all 0 or all -1 should be implemented as folds of these patterns.
9252   SDLoc DL(N);
9253   if (AllAddOne || AllSubOne) {
9254     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9255     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9256     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9257     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9258     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9259   }
9260 
9261   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9262   APInt Pow2C;
9263   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9264       isNullOrNullSplat(N2)) {
9265     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9266     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9267     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9268   }
9269 
9270   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9271     return V;
9272 
9273   // The general case for select-of-constants:
9274   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9275   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9276   // leave that to a machine-specific pass.
9277   return SDValue();
9278 }
9279 
9280 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9281   SDValue N0 = N->getOperand(0);
9282   SDValue N1 = N->getOperand(1);
9283   SDValue N2 = N->getOperand(2);
9284   EVT VT = N->getValueType(0);
9285   SDLoc DL(N);
9286 
9287   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9288     return V;
9289 
9290   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9291   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9292     return DAG.getSelect(DL, VT, F, N2, N1);
9293 
9294   // Canonicalize integer abs.
9295   // vselect (setg[te] X,  0),  X, -X ->
9296   // vselect (setgt    X, -1),  X, -X ->
9297   // vselect (setl[te] X,  0), -X,  X ->
9298   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9299   if (N0.getOpcode() == ISD::SETCC) {
9300     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9301     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9302     bool isAbs = false;
9303     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9304 
9305     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9306          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9307         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9308       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9309     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9310              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9311       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9312 
9313     if (isAbs) {
9314       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9315         return DAG.getNode(ISD::ABS, DL, VT, LHS);
9316 
9317       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9318                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
9319                                                   DL, getShiftAmountTy(VT)));
9320       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9321       AddToWorklist(Shift.getNode());
9322       AddToWorklist(Add.getNode());
9323       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9324     }
9325 
9326     // vselect x, y (fcmp lt x, y) -> fminnum x, y
9327     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9328     //
9329     // This is OK if we don't care about what happens if either operand is a
9330     // NaN.
9331     //
9332     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9333       if (SDValue FMinMax =
9334               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9335         return FMinMax;
9336     }
9337 
9338     // If this select has a condition (setcc) with narrower operands than the
9339     // select, try to widen the compare to match the select width.
9340     // TODO: This should be extended to handle any constant.
9341     // TODO: This could be extended to handle non-loading patterns, but that
9342     //       requires thorough testing to avoid regressions.
9343     if (isNullOrNullSplat(RHS)) {
9344       EVT NarrowVT = LHS.getValueType();
9345       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9346       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
9347       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
9348       unsigned WideWidth = WideVT.getScalarSizeInBits();
9349       bool IsSigned = isSignedIntSetCC(CC);
9350       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9351       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
9352           SetCCWidth != 1 && SetCCWidth < WideWidth &&
9353           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
9354           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
9355         // Both compare operands can be widened for free. The LHS can use an
9356         // extended load, and the RHS is a constant:
9357         //   vselect (ext (setcc load(X), C)), N1, N2 -->
9358         //   vselect (setcc extload(X), C'), N1, N2
9359         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9360         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
9361         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
9362         EVT WideSetCCVT = getSetCCResultType(WideVT);
9363         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
9364         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
9365       }
9366     }
9367   }
9368 
9369   if (SimplifySelectOps(N, N1, N2))
9370     return SDValue(N, 0);  // Don't revisit N.
9371 
9372   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
9373   if (ISD::isBuildVectorAllOnes(N0.getNode()))
9374     return N1;
9375   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
9376   if (ISD::isBuildVectorAllZeros(N0.getNode()))
9377     return N2;
9378 
9379   // The ConvertSelectToConcatVector function is assuming both the above
9380   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
9381   // and addressed.
9382   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
9383       N2.getOpcode() == ISD::CONCAT_VECTORS &&
9384       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
9385     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
9386       return CV;
9387   }
9388 
9389   if (SDValue V = foldVSelectOfConstants(N))
9390     return V;
9391 
9392   return SDValue();
9393 }
9394 
9395 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
9396   SDValue N0 = N->getOperand(0);
9397   SDValue N1 = N->getOperand(1);
9398   SDValue N2 = N->getOperand(2);
9399   SDValue N3 = N->getOperand(3);
9400   SDValue N4 = N->getOperand(4);
9401   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
9402 
9403   // fold select_cc lhs, rhs, x, x, cc -> x
9404   if (N2 == N3)
9405     return N2;
9406 
9407   // Determine if the condition we're dealing with is constant
9408   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
9409                                   CC, SDLoc(N), false)) {
9410     AddToWorklist(SCC.getNode());
9411 
9412     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
9413       if (!SCCC->isNullValue())
9414         return N2;    // cond always true -> true val
9415       else
9416         return N3;    // cond always false -> false val
9417     } else if (SCC->isUndef()) {
9418       // When the condition is UNDEF, just return the first operand. This is
9419       // coherent the DAG creation, no setcc node is created in this case
9420       return N2;
9421     } else if (SCC.getOpcode() == ISD::SETCC) {
9422       // Fold to a simpler select_cc
9423       SDValue SelectOp = DAG.getNode(
9424           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
9425           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
9426       SelectOp->setFlags(SCC->getFlags());
9427       return SelectOp;
9428     }
9429   }
9430 
9431   // If we can fold this based on the true/false value, do so.
9432   if (SimplifySelectOps(N, N2, N3))
9433     return SDValue(N, 0);  // Don't revisit N.
9434 
9435   // fold select_cc into other things, such as min/max/abs
9436   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
9437 }
9438 
9439 SDValue DAGCombiner::visitSETCC(SDNode *N) {
9440   // setcc is very commonly used as an argument to brcond. This pattern
9441   // also lend itself to numerous combines and, as a result, it is desired
9442   // we keep the argument to a brcond as a setcc as much as possible.
9443   bool PreferSetCC =
9444       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
9445 
9446   SDValue Combined = SimplifySetCC(
9447       N->getValueType(0), N->getOperand(0), N->getOperand(1),
9448       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
9449 
9450   if (!Combined)
9451     return SDValue();
9452 
9453   // If we prefer to have a setcc, and we don't, we'll try our best to
9454   // recreate one using rebuildSetCC.
9455   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
9456     SDValue NewSetCC = rebuildSetCC(Combined);
9457 
9458     // We don't have anything interesting to combine to.
9459     if (NewSetCC.getNode() == N)
9460       return SDValue();
9461 
9462     if (NewSetCC)
9463       return NewSetCC;
9464   }
9465 
9466   return Combined;
9467 }
9468 
9469 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
9470   SDValue LHS = N->getOperand(0);
9471   SDValue RHS = N->getOperand(1);
9472   SDValue Carry = N->getOperand(2);
9473   SDValue Cond = N->getOperand(3);
9474 
9475   // If Carry is false, fold to a regular SETCC.
9476   if (isNullConstant(Carry))
9477     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9478 
9479   return SDValue();
9480 }
9481 
9482 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9483 /// a build_vector of constants.
9484 /// This function is called by the DAGCombiner when visiting sext/zext/aext
9485 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9486 /// Vector extends are not folded if operations are legal; this is to
9487 /// avoid introducing illegal build_vector dag nodes.
9488 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
9489                                          SelectionDAG &DAG, bool LegalTypes) {
9490   unsigned Opcode = N->getOpcode();
9491   SDValue N0 = N->getOperand(0);
9492   EVT VT = N->getValueType(0);
9493   SDLoc DL(N);
9494 
9495   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
9496          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
9497          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
9498          && "Expected EXTEND dag node in input!");
9499 
9500   // fold (sext c1) -> c1
9501   // fold (zext c1) -> c1
9502   // fold (aext c1) -> c1
9503   if (isa<ConstantSDNode>(N0))
9504     return DAG.getNode(Opcode, DL, VT, N0);
9505 
9506   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9507   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9508   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9509   if (N0->getOpcode() == ISD::SELECT) {
9510     SDValue Op1 = N0->getOperand(1);
9511     SDValue Op2 = N0->getOperand(2);
9512     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
9513         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
9514       // For any_extend, choose sign extension of the constants to allow a
9515       // possible further transform to sign_extend_inreg.i.e.
9516       //
9517       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9518       // t2: i64 = any_extend t1
9519       // -->
9520       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9521       // -->
9522       // t4: i64 = sign_extend_inreg t3
9523       unsigned FoldOpc = Opcode;
9524       if (FoldOpc == ISD::ANY_EXTEND)
9525         FoldOpc = ISD::SIGN_EXTEND;
9526       return DAG.getSelect(DL, VT, N0->getOperand(0),
9527                            DAG.getNode(FoldOpc, DL, VT, Op1),
9528                            DAG.getNode(FoldOpc, DL, VT, Op2));
9529     }
9530   }
9531 
9532   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
9533   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
9534   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
9535   EVT SVT = VT.getScalarType();
9536   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
9537       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
9538     return SDValue();
9539 
9540   // We can fold this node into a build_vector.
9541   unsigned VTBits = SVT.getSizeInBits();
9542   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
9543   SmallVector<SDValue, 8> Elts;
9544   unsigned NumElts = VT.getVectorNumElements();
9545 
9546   // For zero-extensions, UNDEF elements still guarantee to have the upper
9547   // bits set to zero.
9548   bool IsZext =
9549       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
9550 
9551   for (unsigned i = 0; i != NumElts; ++i) {
9552     SDValue Op = N0.getOperand(i);
9553     if (Op.isUndef()) {
9554       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
9555       continue;
9556     }
9557 
9558     SDLoc DL(Op);
9559     // Get the constant value and if needed trunc it to the size of the type.
9560     // Nodes like build_vector might have constants wider than the scalar type.
9561     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
9562     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
9563       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
9564     else
9565       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
9566   }
9567 
9568   return DAG.getBuildVector(VT, DL, Elts);
9569 }
9570 
9571 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
9572 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
9573 // transformation. Returns true if extension are possible and the above
9574 // mentioned transformation is profitable.
9575 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
9576                                     unsigned ExtOpc,
9577                                     SmallVectorImpl<SDNode *> &ExtendNodes,
9578                                     const TargetLowering &TLI) {
9579   bool HasCopyToRegUses = false;
9580   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
9581   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
9582                             UE = N0.getNode()->use_end();
9583        UI != UE; ++UI) {
9584     SDNode *User = *UI;
9585     if (User == N)
9586       continue;
9587     if (UI.getUse().getResNo() != N0.getResNo())
9588       continue;
9589     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
9590     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
9591       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
9592       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
9593         // Sign bits will be lost after a zext.
9594         return false;
9595       bool Add = false;
9596       for (unsigned i = 0; i != 2; ++i) {
9597         SDValue UseOp = User->getOperand(i);
9598         if (UseOp == N0)
9599           continue;
9600         if (!isa<ConstantSDNode>(UseOp))
9601           return false;
9602         Add = true;
9603       }
9604       if (Add)
9605         ExtendNodes.push_back(User);
9606       continue;
9607     }
9608     // If truncates aren't free and there are users we can't
9609     // extend, it isn't worthwhile.
9610     if (!isTruncFree)
9611       return false;
9612     // Remember if this value is live-out.
9613     if (User->getOpcode() == ISD::CopyToReg)
9614       HasCopyToRegUses = true;
9615   }
9616 
9617   if (HasCopyToRegUses) {
9618     bool BothLiveOut = false;
9619     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9620          UI != UE; ++UI) {
9621       SDUse &Use = UI.getUse();
9622       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9623         BothLiveOut = true;
9624         break;
9625       }
9626     }
9627     if (BothLiveOut)
9628       // Both unextended and extended values are live out. There had better be
9629       // a good reason for the transformation.
9630       return ExtendNodes.size();
9631   }
9632   return true;
9633 }
9634 
9635 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9636                                   SDValue OrigLoad, SDValue ExtLoad,
9637                                   ISD::NodeType ExtType) {
9638   // Extend SetCC uses if necessary.
9639   SDLoc DL(ExtLoad);
9640   for (SDNode *SetCC : SetCCs) {
9641     SmallVector<SDValue, 4> Ops;
9642 
9643     for (unsigned j = 0; j != 2; ++j) {
9644       SDValue SOp = SetCC->getOperand(j);
9645       if (SOp == OrigLoad)
9646         Ops.push_back(ExtLoad);
9647       else
9648         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9649     }
9650 
9651     Ops.push_back(SetCC->getOperand(2));
9652     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9653   }
9654 }
9655 
9656 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9657 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9658   SDValue N0 = N->getOperand(0);
9659   EVT DstVT = N->getValueType(0);
9660   EVT SrcVT = N0.getValueType();
9661 
9662   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9663           N->getOpcode() == ISD::ZERO_EXTEND) &&
9664          "Unexpected node type (not an extend)!");
9665 
9666   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9667   // For example, on a target with legal v4i32, but illegal v8i32, turn:
9668   //   (v8i32 (sext (v8i16 (load x))))
9669   // into:
9670   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
9671   //                          (v4i32 (sextload (x + 16)))))
9672   // Where uses of the original load, i.e.:
9673   //   (v8i16 (load x))
9674   // are replaced with:
9675   //   (v8i16 (truncate
9676   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
9677   //                            (v4i32 (sextload (x + 16)))))))
9678   //
9679   // This combine is only applicable to illegal, but splittable, vectors.
9680   // All legal types, and illegal non-vector types, are handled elsewhere.
9681   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9682   //
9683   if (N0->getOpcode() != ISD::LOAD)
9684     return SDValue();
9685 
9686   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9687 
9688   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9689       !N0.hasOneUse() || !LN0->isSimple() ||
9690       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
9691       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9692     return SDValue();
9693 
9694   SmallVector<SDNode *, 4> SetCCs;
9695   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9696     return SDValue();
9697 
9698   ISD::LoadExtType ExtType =
9699       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9700 
9701   // Try to split the vector types to get down to legal types.
9702   EVT SplitSrcVT = SrcVT;
9703   EVT SplitDstVT = DstVT;
9704   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9705          SplitSrcVT.getVectorNumElements() > 1) {
9706     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9707     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9708   }
9709 
9710   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9711     return SDValue();
9712 
9713   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
9714 
9715   SDLoc DL(N);
9716   const unsigned NumSplits =
9717       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9718   const unsigned Stride = SplitSrcVT.getStoreSize();
9719   SmallVector<SDValue, 4> Loads;
9720   SmallVector<SDValue, 4> Chains;
9721 
9722   SDValue BasePtr = LN0->getBasePtr();
9723   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9724     const unsigned Offset = Idx * Stride;
9725     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9726 
9727     SDValue SplitLoad = DAG.getExtLoad(
9728         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9729         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9730         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9731 
9732     BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
9733 
9734     Loads.push_back(SplitLoad.getValue(0));
9735     Chains.push_back(SplitLoad.getValue(1));
9736   }
9737 
9738   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9739   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9740 
9741   // Simplify TF.
9742   AddToWorklist(NewChain.getNode());
9743 
9744   CombineTo(N, NewValue);
9745 
9746   // Replace uses of the original load (before extension)
9747   // with a truncate of the concatenated sextloaded vectors.
9748   SDValue Trunc =
9749       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9750   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9751   CombineTo(N0.getNode(), Trunc, NewChain);
9752   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9753 }
9754 
9755 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9756 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9757 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9758   assert(N->getOpcode() == ISD::ZERO_EXTEND);
9759   EVT VT = N->getValueType(0);
9760   EVT OrigVT = N->getOperand(0).getValueType();
9761   if (TLI.isZExtFree(OrigVT, VT))
9762     return SDValue();
9763 
9764   // and/or/xor
9765   SDValue N0 = N->getOperand(0);
9766   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9767         N0.getOpcode() == ISD::XOR) ||
9768       N0.getOperand(1).getOpcode() != ISD::Constant ||
9769       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9770     return SDValue();
9771 
9772   // shl/shr
9773   SDValue N1 = N0->getOperand(0);
9774   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9775       N1.getOperand(1).getOpcode() != ISD::Constant ||
9776       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9777     return SDValue();
9778 
9779   // load
9780   if (!isa<LoadSDNode>(N1.getOperand(0)))
9781     return SDValue();
9782   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9783   EVT MemVT = Load->getMemoryVT();
9784   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9785       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9786     return SDValue();
9787 
9788 
9789   // If the shift op is SHL, the logic op must be AND, otherwise the result
9790   // will be wrong.
9791   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9792     return SDValue();
9793 
9794   if (!N0.hasOneUse() || !N1.hasOneUse())
9795     return SDValue();
9796 
9797   SmallVector<SDNode*, 4> SetCCs;
9798   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9799                                ISD::ZERO_EXTEND, SetCCs, TLI))
9800     return SDValue();
9801 
9802   // Actually do the transformation.
9803   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9804                                    Load->getChain(), Load->getBasePtr(),
9805                                    Load->getMemoryVT(), Load->getMemOperand());
9806 
9807   SDLoc DL1(N1);
9808   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9809                               N1.getOperand(1));
9810 
9811   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
9812   SDLoc DL0(N0);
9813   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9814                             DAG.getConstant(Mask, DL0, VT));
9815 
9816   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9817   CombineTo(N, And);
9818   if (SDValue(Load, 0).hasOneUse()) {
9819     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9820   } else {
9821     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9822                                 Load->getValueType(0), ExtLoad);
9823     CombineTo(Load, Trunc, ExtLoad.getValue(1));
9824   }
9825 
9826   // N0 is dead at this point.
9827   recursivelyDeleteUnusedNodes(N0.getNode());
9828 
9829   return SDValue(N,0); // Return N so it doesn't get rechecked!
9830 }
9831 
9832 /// If we're narrowing or widening the result of a vector select and the final
9833 /// size is the same size as a setcc (compare) feeding the select, then try to
9834 /// apply the cast operation to the select's operands because matching vector
9835 /// sizes for a select condition and other operands should be more efficient.
9836 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9837   unsigned CastOpcode = Cast->getOpcode();
9838   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
9839           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
9840           CastOpcode == ISD::FP_ROUND) &&
9841          "Unexpected opcode for vector select narrowing/widening");
9842 
9843   // We only do this transform before legal ops because the pattern may be
9844   // obfuscated by target-specific operations after legalization. Do not create
9845   // an illegal select op, however, because that may be difficult to lower.
9846   EVT VT = Cast->getValueType(0);
9847   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
9848     return SDValue();
9849 
9850   SDValue VSel = Cast->getOperand(0);
9851   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
9852       VSel.getOperand(0).getOpcode() != ISD::SETCC)
9853     return SDValue();
9854 
9855   // Does the setcc have the same vector size as the casted select?
9856   SDValue SetCC = VSel.getOperand(0);
9857   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9858   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9859     return SDValue();
9860 
9861   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9862   SDValue A = VSel.getOperand(1);
9863   SDValue B = VSel.getOperand(2);
9864   SDValue CastA, CastB;
9865   SDLoc DL(Cast);
9866   if (CastOpcode == ISD::FP_ROUND) {
9867     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9868     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9869     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9870   } else {
9871     CastA = DAG.getNode(CastOpcode, DL, VT, A);
9872     CastB = DAG.getNode(CastOpcode, DL, VT, B);
9873   }
9874   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9875 }
9876 
9877 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9878 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9879 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
9880                                      const TargetLowering &TLI, EVT VT,
9881                                      bool LegalOperations, SDNode *N,
9882                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
9883   SDNode *N0Node = N0.getNode();
9884   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
9885                                                    : ISD::isZEXTLoad(N0Node);
9886   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
9887       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
9888     return SDValue();
9889 
9890   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9891   EVT MemVT = LN0->getMemoryVT();
9892   if ((LegalOperations || !LN0->isSimple() ||
9893        VT.isVector()) &&
9894       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
9895     return SDValue();
9896 
9897   SDValue ExtLoad =
9898       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9899                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9900   Combiner.CombineTo(N, ExtLoad);
9901   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9902   if (LN0->use_empty())
9903     Combiner.recursivelyDeleteUnusedNodes(LN0);
9904   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9905 }
9906 
9907 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9908 // Only generate vector extloads when 1) they're legal, and 2) they are
9909 // deemed desirable by the target.
9910 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
9911                                   const TargetLowering &TLI, EVT VT,
9912                                   bool LegalOperations, SDNode *N, SDValue N0,
9913                                   ISD::LoadExtType ExtLoadType,
9914                                   ISD::NodeType ExtOpc) {
9915   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9916       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
9917       ((LegalOperations || VT.isVector() ||
9918         !cast<LoadSDNode>(N0)->isSimple()) &&
9919        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
9920     return {};
9921 
9922   bool DoXform = true;
9923   SmallVector<SDNode *, 4> SetCCs;
9924   if (!N0.hasOneUse())
9925     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9926   if (VT.isVector())
9927     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9928   if (!DoXform)
9929     return {};
9930 
9931   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9932   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9933                                    LN0->getBasePtr(), N0.getValueType(),
9934                                    LN0->getMemOperand());
9935   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9936   // If the load value is used only by N, replace it via CombineTo N.
9937   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9938   Combiner.CombineTo(N, ExtLoad);
9939   if (NoReplaceTrunc) {
9940     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9941     Combiner.recursivelyDeleteUnusedNodes(LN0);
9942   } else {
9943     SDValue Trunc =
9944         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9945     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9946   }
9947   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9948 }
9949 
9950 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
9951                                         const TargetLowering &TLI, EVT VT,
9952                                         SDNode *N, SDValue N0,
9953                                         ISD::LoadExtType ExtLoadType,
9954                                         ISD::NodeType ExtOpc) {
9955   if (!N0.hasOneUse())
9956     return SDValue();
9957 
9958   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
9959   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
9960     return SDValue();
9961 
9962   if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
9963     return SDValue();
9964 
9965   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9966     return SDValue();
9967 
9968   SDLoc dl(Ld);
9969   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
9970   SDValue NewLoad = DAG.getMaskedLoad(
9971       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
9972       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
9973       ExtLoadType, Ld->isExpandingLoad());
9974   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
9975   return NewLoad;
9976 }
9977 
9978 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
9979                                        bool LegalOperations) {
9980   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9981           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
9982 
9983   SDValue SetCC = N->getOperand(0);
9984   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9985       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9986     return SDValue();
9987 
9988   SDValue X = SetCC.getOperand(0);
9989   SDValue Ones = SetCC.getOperand(1);
9990   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9991   EVT VT = N->getValueType(0);
9992   EVT XVT = X.getValueType();
9993   // setge X, C is canonicalized to setgt, so we do not need to match that
9994   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9995   // not require the 'not' op.
9996   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9997     // Invert and smear/shift the sign bit:
9998     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9999     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10000     SDLoc DL(N);
10001     unsigned ShCt = VT.getSizeInBits() - 1;
10002     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10003     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10004       SDValue NotX = DAG.getNOT(DL, X, VT);
10005       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10006       auto ShiftOpcode =
10007         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10008       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10009     }
10010   }
10011   return SDValue();
10012 }
10013 
10014 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
10015   SDValue N0 = N->getOperand(0);
10016   EVT VT = N->getValueType(0);
10017   SDLoc DL(N);
10018 
10019   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10020     return Res;
10021 
10022   // fold (sext (sext x)) -> (sext x)
10023   // fold (sext (aext x)) -> (sext x)
10024   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10025     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
10026 
10027   if (N0.getOpcode() == ISD::TRUNCATE) {
10028     // fold (sext (truncate (load x))) -> (sext (smaller load x))
10029     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
10030     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10031       SDNode *oye = N0.getOperand(0).getNode();
10032       if (NarrowLoad.getNode() != N0.getNode()) {
10033         CombineTo(N0.getNode(), NarrowLoad);
10034         // CombineTo deleted the truncate, if needed, but not what's under it.
10035         AddToWorklist(oye);
10036       }
10037       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10038     }
10039 
10040     // See if the value being truncated is already sign extended.  If so, just
10041     // eliminate the trunc/sext pair.
10042     SDValue Op = N0.getOperand(0);
10043     unsigned OpBits   = Op.getScalarValueSizeInBits();
10044     unsigned MidBits  = N0.getScalarValueSizeInBits();
10045     unsigned DestBits = VT.getScalarSizeInBits();
10046     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
10047 
10048     if (OpBits == DestBits) {
10049       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
10050       // bits, it is already ready.
10051       if (NumSignBits > DestBits-MidBits)
10052         return Op;
10053     } else if (OpBits < DestBits) {
10054       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
10055       // bits, just sext from i32.
10056       if (NumSignBits > OpBits-MidBits)
10057         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
10058     } else {
10059       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
10060       // bits, just truncate to i32.
10061       if (NumSignBits > OpBits-MidBits)
10062         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
10063     }
10064 
10065     // fold (sext (truncate x)) -> (sextinreg x).
10066     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
10067                                                  N0.getValueType())) {
10068       if (OpBits < DestBits)
10069         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
10070       else if (OpBits > DestBits)
10071         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
10072       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
10073                          DAG.getValueType(N0.getValueType()));
10074     }
10075   }
10076 
10077   // Try to simplify (sext (load x)).
10078   if (SDValue foldedExt =
10079           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10080                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
10081     return foldedExt;
10082 
10083   if (SDValue foldedExt =
10084       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
10085                                ISD::SIGN_EXTEND))
10086     return foldedExt;
10087 
10088   // fold (sext (load x)) to multiple smaller sextloads.
10089   // Only on illegal but splittable vectors.
10090   if (SDValue ExtLoad = CombineExtLoad(N))
10091     return ExtLoad;
10092 
10093   // Try to simplify (sext (sextload x)).
10094   if (SDValue foldedExt = tryToFoldExtOfExtload(
10095           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
10096     return foldedExt;
10097 
10098   // fold (sext (and/or/xor (load x), cst)) ->
10099   //      (and/or/xor (sextload x), (sext cst))
10100   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10101        N0.getOpcode() == ISD::XOR) &&
10102       isa<LoadSDNode>(N0.getOperand(0)) &&
10103       N0.getOperand(1).getOpcode() == ISD::Constant &&
10104       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10105     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10106     EVT MemVT = LN00->getMemoryVT();
10107     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
10108       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
10109       SmallVector<SDNode*, 4> SetCCs;
10110       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10111                                              ISD::SIGN_EXTEND, SetCCs, TLI);
10112       if (DoXform) {
10113         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
10114                                          LN00->getChain(), LN00->getBasePtr(),
10115                                          LN00->getMemoryVT(),
10116                                          LN00->getMemOperand());
10117         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
10118         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10119                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10120         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
10121         bool NoReplaceTruncAnd = !N0.hasOneUse();
10122         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10123         CombineTo(N, And);
10124         // If N0 has multiple uses, change other uses as well.
10125         if (NoReplaceTruncAnd) {
10126           SDValue TruncAnd =
10127               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10128           CombineTo(N0.getNode(), TruncAnd);
10129         }
10130         if (NoReplaceTrunc) {
10131           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10132         } else {
10133           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10134                                       LN00->getValueType(0), ExtLoad);
10135           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10136         }
10137         return SDValue(N,0); // Return N so it doesn't get rechecked!
10138       }
10139     }
10140   }
10141 
10142   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10143     return V;
10144 
10145   if (N0.getOpcode() == ISD::SETCC) {
10146     SDValue N00 = N0.getOperand(0);
10147     SDValue N01 = N0.getOperand(1);
10148     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10149     EVT N00VT = N0.getOperand(0).getValueType();
10150 
10151     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
10152     // Only do this before legalize for now.
10153     if (VT.isVector() && !LegalOperations &&
10154         TLI.getBooleanContents(N00VT) ==
10155             TargetLowering::ZeroOrNegativeOneBooleanContent) {
10156       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10157       // of the same size as the compared operands. Only optimize sext(setcc())
10158       // if this is the case.
10159       EVT SVT = getSetCCResultType(N00VT);
10160 
10161       // If we already have the desired type, don't change it.
10162       if (SVT != N0.getValueType()) {
10163         // We know that the # elements of the results is the same as the
10164         // # elements of the compare (and the # elements of the compare result
10165         // for that matter).  Check to see that they are the same size.  If so,
10166         // we know that the element size of the sext'd result matches the
10167         // element size of the compare operands.
10168         if (VT.getSizeInBits() == SVT.getSizeInBits())
10169           return DAG.getSetCC(DL, VT, N00, N01, CC);
10170 
10171         // If the desired elements are smaller or larger than the source
10172         // elements, we can use a matching integer vector type and then
10173         // truncate/sign extend.
10174         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10175         if (SVT == MatchingVecType) {
10176           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10177           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10178         }
10179       }
10180     }
10181 
10182     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
10183     // Here, T can be 1 or -1, depending on the type of the setcc and
10184     // getBooleanContents().
10185     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
10186 
10187     // To determine the "true" side of the select, we need to know the high bit
10188     // of the value returned by the setcc if it evaluates to true.
10189     // If the type of the setcc is i1, then the true case of the select is just
10190     // sext(i1 1), that is, -1.
10191     // If the type of the setcc is larger (say, i8) then the value of the high
10192     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
10193     // of the appropriate width.
10194     SDValue ExtTrueVal = (SetCCWidth == 1)
10195                              ? DAG.getAllOnesConstant(DL, VT)
10196                              : DAG.getBoolConstant(true, DL, VT, N00VT);
10197     SDValue Zero = DAG.getConstant(0, DL, VT);
10198     if (SDValue SCC =
10199             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
10200       return SCC;
10201 
10202     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
10203       EVT SetCCVT = getSetCCResultType(N00VT);
10204       // Don't do this transform for i1 because there's a select transform
10205       // that would reverse it.
10206       // TODO: We should not do this transform at all without a target hook
10207       // because a sext is likely cheaper than a select?
10208       if (SetCCVT.getScalarSizeInBits() != 1 &&
10209           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
10210         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
10211         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
10212       }
10213     }
10214   }
10215 
10216   // fold (sext x) -> (zext x) if the sign bit is known zero.
10217   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
10218       DAG.SignBitIsZero(N0))
10219     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
10220 
10221   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10222     return NewVSel;
10223 
10224   // Eliminate this sign extend by doing a negation in the destination type:
10225   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
10226   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
10227       isNullOrNullSplat(N0.getOperand(0)) &&
10228       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
10229       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
10230     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
10231     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
10232   }
10233   // Eliminate this sign extend by doing a decrement in the destination type:
10234   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
10235   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
10236       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
10237       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10238       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
10239     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
10240     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10241   }
10242 
10243   return SDValue();
10244 }
10245 
10246 // isTruncateOf - If N is a truncate of some other value, return true, record
10247 // the value being truncated in Op and which of Op's bits are zero/one in Known.
10248 // This function computes KnownBits to avoid a duplicated call to
10249 // computeKnownBits in the caller.
10250 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
10251                          KnownBits &Known) {
10252   if (N->getOpcode() == ISD::TRUNCATE) {
10253     Op = N->getOperand(0);
10254     Known = DAG.computeKnownBits(Op);
10255     return true;
10256   }
10257 
10258   if (N.getOpcode() != ISD::SETCC ||
10259       N.getValueType().getScalarType() != MVT::i1 ||
10260       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
10261     return false;
10262 
10263   SDValue Op0 = N->getOperand(0);
10264   SDValue Op1 = N->getOperand(1);
10265   assert(Op0.getValueType() == Op1.getValueType());
10266 
10267   if (isNullOrNullSplat(Op0))
10268     Op = Op1;
10269   else if (isNullOrNullSplat(Op1))
10270     Op = Op0;
10271   else
10272     return false;
10273 
10274   Known = DAG.computeKnownBits(Op);
10275 
10276   return (Known.Zero | 1).isAllOnesValue();
10277 }
10278 
10279 /// Given an extending node with a pop-count operand, if the target does not
10280 /// support a pop-count in the narrow source type but does support it in the
10281 /// destination type, widen the pop-count to the destination type.
10282 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
10283   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
10284           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
10285 
10286   SDValue CtPop = Extend->getOperand(0);
10287   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
10288     return SDValue();
10289 
10290   EVT VT = Extend->getValueType(0);
10291   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10292   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
10293       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
10294     return SDValue();
10295 
10296   // zext (ctpop X) --> ctpop (zext X)
10297   SDLoc DL(Extend);
10298   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
10299   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
10300 }
10301 
10302 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
10303   SDValue N0 = N->getOperand(0);
10304   EVT VT = N->getValueType(0);
10305 
10306   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10307     return Res;
10308 
10309   // fold (zext (zext x)) -> (zext x)
10310   // fold (zext (aext x)) -> (zext x)
10311   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10312     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
10313                        N0.getOperand(0));
10314 
10315   // fold (zext (truncate x)) -> (zext x) or
10316   //      (zext (truncate x)) -> (truncate x)
10317   // This is valid when the truncated bits of x are already zero.
10318   SDValue Op;
10319   KnownBits Known;
10320   if (isTruncateOf(DAG, N0, Op, Known)) {
10321     APInt TruncatedBits =
10322       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
10323       APInt(Op.getScalarValueSizeInBits(), 0) :
10324       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
10325                         N0.getScalarValueSizeInBits(),
10326                         std::min(Op.getScalarValueSizeInBits(),
10327                                  VT.getScalarSizeInBits()));
10328     if (TruncatedBits.isSubsetOf(Known.Zero))
10329       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10330   }
10331 
10332   // fold (zext (truncate x)) -> (and x, mask)
10333   if (N0.getOpcode() == ISD::TRUNCATE) {
10334     // fold (zext (truncate (load x))) -> (zext (smaller load x))
10335     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
10336     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10337       SDNode *oye = N0.getOperand(0).getNode();
10338       if (NarrowLoad.getNode() != N0.getNode()) {
10339         CombineTo(N0.getNode(), NarrowLoad);
10340         // CombineTo deleted the truncate, if needed, but not what's under it.
10341         AddToWorklist(oye);
10342       }
10343       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10344     }
10345 
10346     EVT SrcVT = N0.getOperand(0).getValueType();
10347     EVT MinVT = N0.getValueType();
10348 
10349     // Try to mask before the extension to avoid having to generate a larger mask,
10350     // possibly over several sub-vectors.
10351     if (SrcVT.bitsLT(VT) && VT.isVector()) {
10352       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
10353                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
10354         SDValue Op = N0.getOperand(0);
10355         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10356         AddToWorklist(Op.getNode());
10357         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10358         // Transfer the debug info; the new node is equivalent to N0.
10359         DAG.transferDbgValues(N0, ZExtOrTrunc);
10360         return ZExtOrTrunc;
10361       }
10362     }
10363 
10364     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
10365       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10366       AddToWorklist(Op.getNode());
10367       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10368       // We may safely transfer the debug info describing the truncate node over
10369       // to the equivalent and operation.
10370       DAG.transferDbgValues(N0, And);
10371       return And;
10372     }
10373   }
10374 
10375   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
10376   // if either of the casts is not free.
10377   if (N0.getOpcode() == ISD::AND &&
10378       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10379       N0.getOperand(1).getOpcode() == ISD::Constant &&
10380       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10381                            N0.getValueType()) ||
10382        !TLI.isZExtFree(N0.getValueType(), VT))) {
10383     SDValue X = N0.getOperand(0).getOperand(0);
10384     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
10385     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10386     SDLoc DL(N);
10387     return DAG.getNode(ISD::AND, DL, VT,
10388                        X, DAG.getConstant(Mask, DL, VT));
10389   }
10390 
10391   // Try to simplify (zext (load x)).
10392   if (SDValue foldedExt =
10393           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10394                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
10395     return foldedExt;
10396 
10397   if (SDValue foldedExt =
10398       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
10399                                ISD::ZERO_EXTEND))
10400     return foldedExt;
10401 
10402   // fold (zext (load x)) to multiple smaller zextloads.
10403   // Only on illegal but splittable vectors.
10404   if (SDValue ExtLoad = CombineExtLoad(N))
10405     return ExtLoad;
10406 
10407   // fold (zext (and/or/xor (load x), cst)) ->
10408   //      (and/or/xor (zextload x), (zext cst))
10409   // Unless (and (load x) cst) will match as a zextload already and has
10410   // additional users.
10411   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10412        N0.getOpcode() == ISD::XOR) &&
10413       isa<LoadSDNode>(N0.getOperand(0)) &&
10414       N0.getOperand(1).getOpcode() == ISD::Constant &&
10415       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10416     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10417     EVT MemVT = LN00->getMemoryVT();
10418     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
10419         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
10420       bool DoXform = true;
10421       SmallVector<SDNode*, 4> SetCCs;
10422       if (!N0.hasOneUse()) {
10423         if (N0.getOpcode() == ISD::AND) {
10424           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
10425           EVT LoadResultTy = AndC->getValueType(0);
10426           EVT ExtVT;
10427           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
10428             DoXform = false;
10429         }
10430       }
10431       if (DoXform)
10432         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10433                                           ISD::ZERO_EXTEND, SetCCs, TLI);
10434       if (DoXform) {
10435         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
10436                                          LN00->getChain(), LN00->getBasePtr(),
10437                                          LN00->getMemoryVT(),
10438                                          LN00->getMemOperand());
10439         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10440         SDLoc DL(N);
10441         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10442                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10443         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10444         bool NoReplaceTruncAnd = !N0.hasOneUse();
10445         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10446         CombineTo(N, And);
10447         // If N0 has multiple uses, change other uses as well.
10448         if (NoReplaceTruncAnd) {
10449           SDValue TruncAnd =
10450               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10451           CombineTo(N0.getNode(), TruncAnd);
10452         }
10453         if (NoReplaceTrunc) {
10454           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10455         } else {
10456           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10457                                       LN00->getValueType(0), ExtLoad);
10458           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10459         }
10460         return SDValue(N,0); // Return N so it doesn't get rechecked!
10461       }
10462     }
10463   }
10464 
10465   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10466   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10467   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
10468     return ZExtLoad;
10469 
10470   // Try to simplify (zext (zextload x)).
10471   if (SDValue foldedExt = tryToFoldExtOfExtload(
10472           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
10473     return foldedExt;
10474 
10475   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10476     return V;
10477 
10478   if (N0.getOpcode() == ISD::SETCC) {
10479     // Only do this before legalize for now.
10480     if (!LegalOperations && VT.isVector() &&
10481         N0.getValueType().getVectorElementType() == MVT::i1) {
10482       EVT N00VT = N0.getOperand(0).getValueType();
10483       if (getSetCCResultType(N00VT) == N0.getValueType())
10484         return SDValue();
10485 
10486       // We know that the # elements of the results is the same as the #
10487       // elements of the compare (and the # elements of the compare result for
10488       // that matter). Check to see that they are the same size. If so, we know
10489       // that the element size of the sext'd result matches the element size of
10490       // the compare operands.
10491       SDLoc DL(N);
10492       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
10493         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
10494         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
10495                                      N0.getOperand(1), N0.getOperand(2));
10496         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
10497       }
10498 
10499       // If the desired elements are smaller or larger than the source
10500       // elements we can use a matching integer vector type and then
10501       // truncate/any extend followed by zext_in_reg.
10502       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10503       SDValue VsetCC =
10504           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
10505                       N0.getOperand(1), N0.getOperand(2));
10506       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
10507                                     N0.getValueType());
10508     }
10509 
10510     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10511     SDLoc DL(N);
10512     if (SDValue SCC = SimplifySelectCC(
10513             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10514             DAG.getConstant(0, DL, VT),
10515             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10516       return SCC;
10517   }
10518 
10519   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
10520   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10521       isa<ConstantSDNode>(N0.getOperand(1)) &&
10522       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10523       N0.hasOneUse()) {
10524     SDValue ShAmt = N0.getOperand(1);
10525     if (N0.getOpcode() == ISD::SHL) {
10526       SDValue InnerZExt = N0.getOperand(0);
10527       // If the original shl may be shifting out bits, do not perform this
10528       // transformation.
10529       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
10530         InnerZExt.getOperand(0).getValueSizeInBits();
10531       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
10532         return SDValue();
10533     }
10534 
10535     SDLoc DL(N);
10536 
10537     // Ensure that the shift amount is wide enough for the shifted value.
10538     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
10539       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
10540 
10541     return DAG.getNode(N0.getOpcode(), DL, VT,
10542                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
10543                        ShAmt);
10544   }
10545 
10546   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10547     return NewVSel;
10548 
10549   if (SDValue NewCtPop = widenCtPop(N, DAG))
10550     return NewCtPop;
10551 
10552   return SDValue();
10553 }
10554 
10555 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
10556   SDValue N0 = N->getOperand(0);
10557   EVT VT = N->getValueType(0);
10558 
10559   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10560     return Res;
10561 
10562   // fold (aext (aext x)) -> (aext x)
10563   // fold (aext (zext x)) -> (zext x)
10564   // fold (aext (sext x)) -> (sext x)
10565   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
10566       N0.getOpcode() == ISD::ZERO_EXTEND ||
10567       N0.getOpcode() == ISD::SIGN_EXTEND)
10568     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10569 
10570   // fold (aext (truncate (load x))) -> (aext (smaller load x))
10571   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
10572   if (N0.getOpcode() == ISD::TRUNCATE) {
10573     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10574       SDNode *oye = N0.getOperand(0).getNode();
10575       if (NarrowLoad.getNode() != N0.getNode()) {
10576         CombineTo(N0.getNode(), NarrowLoad);
10577         // CombineTo deleted the truncate, if needed, but not what's under it.
10578         AddToWorklist(oye);
10579       }
10580       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10581     }
10582   }
10583 
10584   // fold (aext (truncate x))
10585   if (N0.getOpcode() == ISD::TRUNCATE)
10586     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10587 
10588   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
10589   // if the trunc is not free.
10590   if (N0.getOpcode() == ISD::AND &&
10591       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10592       N0.getOperand(1).getOpcode() == ISD::Constant &&
10593       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10594                           N0.getValueType())) {
10595     SDLoc DL(N);
10596     SDValue X = N0.getOperand(0).getOperand(0);
10597     X = DAG.getAnyExtOrTrunc(X, DL, VT);
10598     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10599     return DAG.getNode(ISD::AND, DL, VT,
10600                        X, DAG.getConstant(Mask, DL, VT));
10601   }
10602 
10603   // fold (aext (load x)) -> (aext (truncate (extload x)))
10604   // None of the supported targets knows how to perform load and any_ext
10605   // on vectors in one instruction.  We only perform this transformation on
10606   // scalars.
10607   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
10608       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10609       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10610     bool DoXform = true;
10611     SmallVector<SDNode*, 4> SetCCs;
10612     if (!N0.hasOneUse())
10613       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
10614                                         TLI);
10615     if (DoXform) {
10616       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10617       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10618                                        LN0->getChain(),
10619                                        LN0->getBasePtr(), N0.getValueType(),
10620                                        LN0->getMemOperand());
10621       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
10622       // If the load value is used only by N, replace it via CombineTo N.
10623       bool NoReplaceTrunc = N0.hasOneUse();
10624       CombineTo(N, ExtLoad);
10625       if (NoReplaceTrunc) {
10626         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10627         recursivelyDeleteUnusedNodes(LN0);
10628       } else {
10629         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
10630                                     N0.getValueType(), ExtLoad);
10631         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10632       }
10633       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10634     }
10635   }
10636 
10637   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10638   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10639   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
10640   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
10641       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
10642     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10643     ISD::LoadExtType ExtType = LN0->getExtensionType();
10644     EVT MemVT = LN0->getMemoryVT();
10645     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
10646       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10647                                        VT, LN0->getChain(), LN0->getBasePtr(),
10648                                        MemVT, LN0->getMemOperand());
10649       CombineTo(N, ExtLoad);
10650       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10651       recursivelyDeleteUnusedNodes(LN0);
10652       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10653     }
10654   }
10655 
10656   if (N0.getOpcode() == ISD::SETCC) {
10657     // For vectors:
10658     // aext(setcc) -> vsetcc
10659     // aext(setcc) -> truncate(vsetcc)
10660     // aext(setcc) -> aext(vsetcc)
10661     // Only do this before legalize for now.
10662     if (VT.isVector() && !LegalOperations) {
10663       EVT N00VT = N0.getOperand(0).getValueType();
10664       if (getSetCCResultType(N00VT) == N0.getValueType())
10665         return SDValue();
10666 
10667       // We know that the # elements of the results is the same as the
10668       // # elements of the compare (and the # elements of the compare result
10669       // for that matter).  Check to see that they are the same size.  If so,
10670       // we know that the element size of the sext'd result matches the
10671       // element size of the compare operands.
10672       if (VT.getSizeInBits() == N00VT.getSizeInBits())
10673         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10674                              N0.getOperand(1),
10675                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
10676 
10677       // If the desired elements are smaller or larger than the source
10678       // elements we can use a matching integer vector type and then
10679       // truncate/any extend
10680       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10681       SDValue VsetCC =
10682         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10683                       N0.getOperand(1),
10684                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
10685       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10686     }
10687 
10688     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10689     SDLoc DL(N);
10690     if (SDValue SCC = SimplifySelectCC(
10691             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10692             DAG.getConstant(0, DL, VT),
10693             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10694       return SCC;
10695   }
10696 
10697   if (SDValue NewCtPop = widenCtPop(N, DAG))
10698     return NewCtPop;
10699 
10700   return SDValue();
10701 }
10702 
10703 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10704   unsigned Opcode = N->getOpcode();
10705   SDValue N0 = N->getOperand(0);
10706   SDValue N1 = N->getOperand(1);
10707   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10708 
10709   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10710   if (N0.getOpcode() == Opcode &&
10711       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
10712     return N0;
10713 
10714   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10715       N0.getOperand(0).getOpcode() == Opcode) {
10716     // We have an assert, truncate, assert sandwich. Make one stronger assert
10717     // by asserting on the smallest asserted type to the larger source type.
10718     // This eliminates the later assert:
10719     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10720     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10721     SDValue BigA = N0.getOperand(0);
10722     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10723     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10724            "Asserting zero/sign-extended bits to a type larger than the "
10725            "truncated destination does not provide information");
10726 
10727     SDLoc DL(N);
10728     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
10729     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10730     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10731                                     BigA.getOperand(0), MinAssertVTVal);
10732     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10733   }
10734 
10735   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10736   // than X. Just move the AssertZext in front of the truncate and drop the
10737   // AssertSExt.
10738   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10739       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
10740       Opcode == ISD::AssertZext) {
10741     SDValue BigA = N0.getOperand(0);
10742     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10743     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10744            "Asserting zero/sign-extended bits to a type larger than the "
10745            "truncated destination does not provide information");
10746 
10747     if (AssertVT.bitsLT(BigA_AssertVT)) {
10748       SDLoc DL(N);
10749       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10750                                       BigA.getOperand(0), N1);
10751       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10752     }
10753   }
10754 
10755   return SDValue();
10756 }
10757 
10758 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
10759   SDLoc DL(N);
10760 
10761   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
10762   SDValue N0 = N->getOperand(0);
10763 
10764   // Fold (assertalign (assertalign x, AL0), AL1) ->
10765   // (assertalign x, max(AL0, AL1))
10766   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
10767     return DAG.getAssertAlign(DL, N0.getOperand(0),
10768                               std::max(AL, AAN->getAlign()));
10769 
10770   // In rare cases, there are trivial arithmetic ops in source operands. Sink
10771   // this assert down to source operands so that those arithmetic ops could be
10772   // exposed to the DAG combining.
10773   switch (N0.getOpcode()) {
10774   default:
10775     break;
10776   case ISD::ADD:
10777   case ISD::SUB: {
10778     unsigned AlignShift = Log2(AL);
10779     SDValue LHS = N0.getOperand(0);
10780     SDValue RHS = N0.getOperand(1);
10781     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
10782     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10783     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
10784       if (LHSAlignShift < AlignShift)
10785         LHS = DAG.getAssertAlign(DL, LHS, AL);
10786       if (RHSAlignShift < AlignShift)
10787         RHS = DAG.getAssertAlign(DL, RHS, AL);
10788       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
10789     }
10790     break;
10791   }
10792   }
10793 
10794   return SDValue();
10795 }
10796 
10797 /// If the result of a wider load is shifted to right of N  bits and then
10798 /// truncated to a narrower type and where N is a multiple of number of bits of
10799 /// the narrower type, transform it to a narrower load from address + N / num of
10800 /// bits of new type. Also narrow the load if the result is masked with an AND
10801 /// to effectively produce a smaller type. If the result is to be extended, also
10802 /// fold the extension to form a extending load.
10803 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10804   unsigned Opc = N->getOpcode();
10805 
10806   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10807   SDValue N0 = N->getOperand(0);
10808   EVT VT = N->getValueType(0);
10809   EVT ExtVT = VT;
10810 
10811   // This transformation isn't valid for vector loads.
10812   if (VT.isVector())
10813     return SDValue();
10814 
10815   unsigned ShAmt = 0;
10816   bool HasShiftedOffset = false;
10817   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10818   // extended to VT.
10819   if (Opc == ISD::SIGN_EXTEND_INREG) {
10820     ExtType = ISD::SEXTLOAD;
10821     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10822   } else if (Opc == ISD::SRL) {
10823     // Another special-case: SRL is basically zero-extending a narrower value,
10824     // or it maybe shifting a higher subword, half or byte into the lowest
10825     // bits.
10826     ExtType = ISD::ZEXTLOAD;
10827     N0 = SDValue(N, 0);
10828 
10829     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10830     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10831     if (!N01 || !LN0)
10832       return SDValue();
10833 
10834     uint64_t ShiftAmt = N01->getZExtValue();
10835     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10836     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
10837       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10838     else
10839       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10840                                 VT.getSizeInBits() - ShiftAmt);
10841   } else if (Opc == ISD::AND) {
10842     // An AND with a constant mask is the same as a truncate + zero-extend.
10843     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10844     if (!AndC)
10845       return SDValue();
10846 
10847     const APInt &Mask = AndC->getAPIntValue();
10848     unsigned ActiveBits = 0;
10849     if (Mask.isMask()) {
10850       ActiveBits = Mask.countTrailingOnes();
10851     } else if (Mask.isShiftedMask()) {
10852       ShAmt = Mask.countTrailingZeros();
10853       APInt ShiftedMask = Mask.lshr(ShAmt);
10854       ActiveBits = ShiftedMask.countTrailingOnes();
10855       HasShiftedOffset = true;
10856     } else
10857       return SDValue();
10858 
10859     ExtType = ISD::ZEXTLOAD;
10860     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10861   }
10862 
10863   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
10864     SDValue SRL = N0;
10865     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10866       ShAmt = ConstShift->getZExtValue();
10867       unsigned EVTBits = ExtVT.getSizeInBits();
10868       // Is the shift amount a multiple of size of VT?
10869       if ((ShAmt & (EVTBits-1)) == 0) {
10870         N0 = N0.getOperand(0);
10871         // Is the load width a multiple of size of VT?
10872         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10873           return SDValue();
10874       }
10875 
10876       // At this point, we must have a load or else we can't do the transform.
10877       auto *LN0 = dyn_cast<LoadSDNode>(N0);
10878       if (!LN0) return SDValue();
10879 
10880       // Because a SRL must be assumed to *need* to zero-extend the high bits
10881       // (as opposed to anyext the high bits), we can't combine the zextload
10882       // lowering of SRL and an sextload.
10883       if (LN0->getExtensionType() == ISD::SEXTLOAD)
10884         return SDValue();
10885 
10886       // If the shift amount is larger than the input type then we're not
10887       // accessing any of the loaded bytes.  If the load was a zextload/extload
10888       // then the result of the shift+trunc is zero/undef (handled elsewhere).
10889       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10890         return SDValue();
10891 
10892       // If the SRL is only used by a masking AND, we may be able to adjust
10893       // the ExtVT to make the AND redundant.
10894       SDNode *Mask = *(SRL->use_begin());
10895       if (Mask->getOpcode() == ISD::AND &&
10896           isa<ConstantSDNode>(Mask->getOperand(1))) {
10897         const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
10898         if (ShiftMask.isMask()) {
10899           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10900                                            ShiftMask.countTrailingOnes());
10901           // If the mask is smaller, recompute the type.
10902           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10903               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
10904             ExtVT = MaskedVT;
10905         }
10906       }
10907     }
10908   }
10909 
10910   // If the load is shifted left (and the result isn't shifted back right),
10911   // we can fold the truncate through the shift.
10912   unsigned ShLeftAmt = 0;
10913   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10914       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
10915     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10916       ShLeftAmt = N01->getZExtValue();
10917       N0 = N0.getOperand(0);
10918     }
10919   }
10920 
10921   // If we haven't found a load, we can't narrow it.
10922   if (!isa<LoadSDNode>(N0))
10923     return SDValue();
10924 
10925   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10926   // Reducing the width of a volatile load is illegal.  For atomics, we may be
10927   // able to reduce the width provided we never widen again. (see D66309)
10928   if (!LN0->isSimple() ||
10929       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10930     return SDValue();
10931 
10932   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10933     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10934     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10935     return LVTStoreBits - EVTStoreBits - ShAmt;
10936   };
10937 
10938   // For big endian targets, we need to adjust the offset to the pointer to
10939   // load the correct bytes.
10940   if (DAG.getDataLayout().isBigEndian())
10941     ShAmt = AdjustBigEndianShift(ShAmt);
10942 
10943   uint64_t PtrOff = ShAmt / 8;
10944   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10945   SDLoc DL(LN0);
10946   // The original load itself didn't wrap, so an offset within it doesn't.
10947   SDNodeFlags Flags;
10948   Flags.setNoUnsignedWrap(true);
10949   SDValue NewPtr =
10950       DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
10951   AddToWorklist(NewPtr.getNode());
10952 
10953   SDValue Load;
10954   if (ExtType == ISD::NON_EXTLOAD)
10955     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
10956                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10957                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10958   else
10959     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
10960                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10961                           NewAlign, LN0->getMemOperand()->getFlags(),
10962                           LN0->getAAInfo());
10963 
10964   // Replace the old load's chain with the new load's chain.
10965   WorklistRemover DeadNodes(*this);
10966   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10967 
10968   // Shift the result left, if we've swallowed a left shift.
10969   SDValue Result = Load;
10970   if (ShLeftAmt != 0) {
10971     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10972     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10973       ShImmTy = VT;
10974     // If the shift amount is as large as the result size (but, presumably,
10975     // no larger than the source) then the useful bits of the result are
10976     // zero; we can't simply return the shortened shift, because the result
10977     // of that operation is undefined.
10978     if (ShLeftAmt >= VT.getSizeInBits())
10979       Result = DAG.getConstant(0, DL, VT);
10980     else
10981       Result = DAG.getNode(ISD::SHL, DL, VT,
10982                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10983   }
10984 
10985   if (HasShiftedOffset) {
10986     // Recalculate the shift amount after it has been altered to calculate
10987     // the offset.
10988     if (DAG.getDataLayout().isBigEndian())
10989       ShAmt = AdjustBigEndianShift(ShAmt);
10990 
10991     // We're using a shifted mask, so the load now has an offset. This means
10992     // that data has been loaded into the lower bytes than it would have been
10993     // before, so we need to shl the loaded data into the correct position in the
10994     // register.
10995     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10996     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10997     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
10998   }
10999 
11000   // Return the new loaded value.
11001   return Result;
11002 }
11003 
11004 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
11005   SDValue N0 = N->getOperand(0);
11006   SDValue N1 = N->getOperand(1);
11007   EVT VT = N->getValueType(0);
11008   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
11009   unsigned VTBits = VT.getScalarSizeInBits();
11010   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
11011 
11012   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11013   if (N0.isUndef())
11014     return DAG.getConstant(0, SDLoc(N), VT);
11015 
11016   // fold (sext_in_reg c1) -> c1
11017   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
11018     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
11019 
11020   // If the input is already sign extended, just drop the extension.
11021   if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
11022     return N0;
11023 
11024   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
11025   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
11026       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
11027     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
11028                        N1);
11029 
11030   // fold (sext_in_reg (sext x)) -> (sext x)
11031   // fold (sext_in_reg (aext x)) -> (sext x)
11032   // if x is small enough or if we know that x has more than 1 sign bit and the
11033   // sign_extend_inreg is extending from one of them.
11034   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
11035     SDValue N00 = N0.getOperand(0);
11036     unsigned N00Bits = N00.getScalarValueSizeInBits();
11037     if ((N00Bits <= ExtVTBits ||
11038          (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
11039         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11040       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
11041   }
11042 
11043   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
11044   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
11045        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
11046        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
11047       N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) {
11048     if (!LegalOperations ||
11049         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
11050       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
11051                          N0.getOperand(0));
11052   }
11053 
11054   // fold (sext_in_reg (zext x)) -> (sext x)
11055   // iff we are extending the source sign bit.
11056   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
11057     SDValue N00 = N0.getOperand(0);
11058     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
11059         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11060       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
11061   }
11062 
11063   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
11064   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
11065     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
11066 
11067   // fold operands of sext_in_reg based on knowledge that the top bits are not
11068   // demanded.
11069   if (SimplifyDemandedBits(SDValue(N, 0)))
11070     return SDValue(N, 0);
11071 
11072   // fold (sext_in_reg (load x)) -> (smaller sextload x)
11073   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
11074   if (SDValue NarrowLoad = ReduceLoadWidth(N))
11075     return NarrowLoad;
11076 
11077   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
11078   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
11079   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
11080   if (N0.getOpcode() == ISD::SRL) {
11081     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
11082       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
11083         // We can turn this into an SRA iff the input to the SRL is already sign
11084         // extended enough.
11085         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
11086         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
11087           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
11088                              N0.getOperand(1));
11089       }
11090   }
11091 
11092   // fold (sext_inreg (extload x)) -> (sextload x)
11093   // If sextload is not supported by target, we can only do the combine when
11094   // load has one use. Doing otherwise can block folding the extload with other
11095   // extends that the target does support.
11096   if (ISD::isEXTLoad(N0.getNode()) &&
11097       ISD::isUNINDEXEDLoad(N0.getNode()) &&
11098       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11099       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
11100         N0.hasOneUse()) ||
11101        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11102     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11103     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11104                                      LN0->getChain(),
11105                                      LN0->getBasePtr(), ExtVT,
11106                                      LN0->getMemOperand());
11107     CombineTo(N, ExtLoad);
11108     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11109     AddToWorklist(ExtLoad.getNode());
11110     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11111   }
11112   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
11113   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
11114       N0.hasOneUse() &&
11115       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11116       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
11117        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11118     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11119     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11120                                      LN0->getChain(),
11121                                      LN0->getBasePtr(), ExtVT,
11122                                      LN0->getMemOperand());
11123     CombineTo(N, ExtLoad);
11124     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11125     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11126   }
11127 
11128   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
11129   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
11130     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
11131                                            N0.getOperand(1), false))
11132       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
11133   }
11134 
11135   return SDValue();
11136 }
11137 
11138 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
11139   SDValue N0 = N->getOperand(0);
11140   EVT VT = N->getValueType(0);
11141 
11142   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11143   if (N0.isUndef())
11144     return DAG.getConstant(0, SDLoc(N), VT);
11145 
11146   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11147     return Res;
11148 
11149   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11150     return SDValue(N, 0);
11151 
11152   return SDValue();
11153 }
11154 
11155 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
11156   SDValue N0 = N->getOperand(0);
11157   EVT VT = N->getValueType(0);
11158 
11159   // zext_vector_inreg(undef) = 0 because the top bits will be zero.
11160   if (N0.isUndef())
11161     return DAG.getConstant(0, SDLoc(N), VT);
11162 
11163   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11164     return Res;
11165 
11166   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11167     return SDValue(N, 0);
11168 
11169   return SDValue();
11170 }
11171 
11172 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
11173   SDValue N0 = N->getOperand(0);
11174   EVT VT = N->getValueType(0);
11175   EVT SrcVT = N0.getValueType();
11176   bool isLE = DAG.getDataLayout().isLittleEndian();
11177 
11178   // noop truncate
11179   if (SrcVT == VT)
11180     return N0;
11181 
11182   // fold (truncate (truncate x)) -> (truncate x)
11183   if (N0.getOpcode() == ISD::TRUNCATE)
11184     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11185 
11186   // fold (truncate c1) -> c1
11187   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
11188     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
11189     if (C.getNode() != N)
11190       return C;
11191   }
11192 
11193   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
11194   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
11195       N0.getOpcode() == ISD::SIGN_EXTEND ||
11196       N0.getOpcode() == ISD::ANY_EXTEND) {
11197     // if the source is smaller than the dest, we still need an extend.
11198     if (N0.getOperand(0).getValueType().bitsLT(VT))
11199       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11200     // if the source is larger than the dest, than we just need the truncate.
11201     if (N0.getOperand(0).getValueType().bitsGT(VT))
11202       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11203     // if the source and dest are the same type, we can drop both the extend
11204     // and the truncate.
11205     return N0.getOperand(0);
11206   }
11207 
11208   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
11209   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
11210     return SDValue();
11211 
11212   // Fold extract-and-trunc into a narrow extract. For example:
11213   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
11214   //   i32 y = TRUNCATE(i64 x)
11215   //        -- becomes --
11216   //   v16i8 b = BITCAST (v2i64 val)
11217   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
11218   //
11219   // Note: We only run this optimization after type legalization (which often
11220   // creates this pattern) and before operation legalization after which
11221   // we need to be more careful about the vector instructions that we generate.
11222   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11223       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
11224     EVT VecTy = N0.getOperand(0).getValueType();
11225     EVT ExTy = N0.getValueType();
11226     EVT TrTy = N->getValueType(0);
11227 
11228     unsigned NumElem = VecTy.getVectorNumElements();
11229     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
11230 
11231     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
11232     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
11233 
11234     SDValue EltNo = N0->getOperand(1);
11235     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
11236       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
11237       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
11238 
11239       SDLoc DL(N);
11240       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
11241                          DAG.getBitcast(NVT, N0.getOperand(0)),
11242                          DAG.getVectorIdxConstant(Index, DL));
11243     }
11244   }
11245 
11246   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
11247   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
11248     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
11249         TLI.isTruncateFree(SrcVT, VT)) {
11250       SDLoc SL(N0);
11251       SDValue Cond = N0.getOperand(0);
11252       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11253       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
11254       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
11255     }
11256   }
11257 
11258   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
11259   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11260       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11261       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
11262     SDValue Amt = N0.getOperand(1);
11263     KnownBits Known = DAG.computeKnownBits(Amt);
11264     unsigned Size = VT.getScalarSizeInBits();
11265     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
11266       SDLoc SL(N);
11267       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
11268 
11269       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11270       if (AmtVT != Amt.getValueType()) {
11271         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
11272         AddToWorklist(Amt.getNode());
11273       }
11274       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
11275     }
11276   }
11277 
11278   // Attempt to pre-truncate BUILD_VECTOR sources.
11279   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
11280       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
11281       // Avoid creating illegal types if running after type legalizer.
11282       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
11283     SDLoc DL(N);
11284     EVT SVT = VT.getScalarType();
11285     SmallVector<SDValue, 8> TruncOps;
11286     for (const SDValue &Op : N0->op_values()) {
11287       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
11288       TruncOps.push_back(TruncOp);
11289     }
11290     return DAG.getBuildVector(VT, DL, TruncOps);
11291   }
11292 
11293   // Fold a series of buildvector, bitcast, and truncate if possible.
11294   // For example fold
11295   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
11296   //   (2xi32 (buildvector x, y)).
11297   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
11298       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
11299       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
11300       N0.getOperand(0).hasOneUse()) {
11301     SDValue BuildVect = N0.getOperand(0);
11302     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
11303     EVT TruncVecEltTy = VT.getVectorElementType();
11304 
11305     // Check that the element types match.
11306     if (BuildVectEltTy == TruncVecEltTy) {
11307       // Now we only need to compute the offset of the truncated elements.
11308       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
11309       unsigned TruncVecNumElts = VT.getVectorNumElements();
11310       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
11311 
11312       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
11313              "Invalid number of elements");
11314 
11315       SmallVector<SDValue, 8> Opnds;
11316       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
11317         Opnds.push_back(BuildVect.getOperand(i));
11318 
11319       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
11320     }
11321   }
11322 
11323   // See if we can simplify the input to this truncate through knowledge that
11324   // only the low bits are being used.
11325   // For example "trunc (or (shl x, 8), y)" // -> trunc y
11326   // Currently we only perform this optimization on scalars because vectors
11327   // may have different active low bits.
11328   if (!VT.isVector()) {
11329     APInt Mask =
11330         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
11331     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
11332       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
11333   }
11334 
11335   // fold (truncate (load x)) -> (smaller load x)
11336   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
11337   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
11338     if (SDValue Reduced = ReduceLoadWidth(N))
11339       return Reduced;
11340 
11341     // Handle the case where the load remains an extending load even
11342     // after truncation.
11343     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
11344       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11345       if (LN0->isSimple() &&
11346           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
11347         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
11348                                          VT, LN0->getChain(), LN0->getBasePtr(),
11349                                          LN0->getMemoryVT(),
11350                                          LN0->getMemOperand());
11351         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
11352         return NewLoad;
11353       }
11354     }
11355   }
11356 
11357   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
11358   // where ... are all 'undef'.
11359   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
11360     SmallVector<EVT, 8> VTs;
11361     SDValue V;
11362     unsigned Idx = 0;
11363     unsigned NumDefs = 0;
11364 
11365     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
11366       SDValue X = N0.getOperand(i);
11367       if (!X.isUndef()) {
11368         V = X;
11369         Idx = i;
11370         NumDefs++;
11371       }
11372       // Stop if more than one members are non-undef.
11373       if (NumDefs > 1)
11374         break;
11375       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
11376                                      VT.getVectorElementType(),
11377                                      X.getValueType().getVectorNumElements()));
11378     }
11379 
11380     if (NumDefs == 0)
11381       return DAG.getUNDEF(VT);
11382 
11383     if (NumDefs == 1) {
11384       assert(V.getNode() && "The single defined operand is empty!");
11385       SmallVector<SDValue, 8> Opnds;
11386       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
11387         if (i != Idx) {
11388           Opnds.push_back(DAG.getUNDEF(VTs[i]));
11389           continue;
11390         }
11391         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
11392         AddToWorklist(NV.getNode());
11393         Opnds.push_back(NV);
11394       }
11395       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
11396     }
11397   }
11398 
11399   // Fold truncate of a bitcast of a vector to an extract of the low vector
11400   // element.
11401   //
11402   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
11403   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
11404     SDValue VecSrc = N0.getOperand(0);
11405     EVT VecSrcVT = VecSrc.getValueType();
11406     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
11407         (!LegalOperations ||
11408          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
11409       SDLoc SL(N);
11410 
11411       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
11412       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
11413                          DAG.getVectorIdxConstant(Idx, SL));
11414     }
11415   }
11416 
11417   // Simplify the operands using demanded-bits information.
11418   if (!VT.isVector() &&
11419       SimplifyDemandedBits(SDValue(N, 0)))
11420     return SDValue(N, 0);
11421 
11422   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
11423   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
11424   // When the adde's carry is not used.
11425   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
11426       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
11427       // We only do for addcarry before legalize operation
11428       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
11429        TLI.isOperationLegal(N0.getOpcode(), VT))) {
11430     SDLoc SL(N);
11431     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11432     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11433     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
11434     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
11435   }
11436 
11437   // fold (truncate (extract_subvector(ext x))) ->
11438   //      (extract_subvector x)
11439   // TODO: This can be generalized to cover cases where the truncate and extract
11440   // do not fully cancel each other out.
11441   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
11442     SDValue N00 = N0.getOperand(0);
11443     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
11444         N00.getOpcode() == ISD::ZERO_EXTEND ||
11445         N00.getOpcode() == ISD::ANY_EXTEND) {
11446       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
11447           VT.getVectorElementType())
11448         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
11449                            N00.getOperand(0), N0.getOperand(1));
11450     }
11451   }
11452 
11453   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11454     return NewVSel;
11455 
11456   // Narrow a suitable binary operation with a non-opaque constant operand by
11457   // moving it ahead of the truncate. This is limited to pre-legalization
11458   // because targets may prefer a wider type during later combines and invert
11459   // this transform.
11460   switch (N0.getOpcode()) {
11461   case ISD::ADD:
11462   case ISD::SUB:
11463   case ISD::MUL:
11464   case ISD::AND:
11465   case ISD::OR:
11466   case ISD::XOR:
11467     if (!LegalOperations && N0.hasOneUse() &&
11468         (isConstantOrConstantVector(N0.getOperand(0), true) ||
11469          isConstantOrConstantVector(N0.getOperand(1), true))) {
11470       // TODO: We already restricted this to pre-legalization, but for vectors
11471       // we are extra cautious to not create an unsupported operation.
11472       // Target-specific changes are likely needed to avoid regressions here.
11473       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
11474         SDLoc DL(N);
11475         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
11476         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
11477         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
11478       }
11479     }
11480   }
11481 
11482   return SDValue();
11483 }
11484 
11485 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
11486   SDValue Elt = N->getOperand(i);
11487   if (Elt.getOpcode() != ISD::MERGE_VALUES)
11488     return Elt.getNode();
11489   return Elt.getOperand(Elt.getResNo()).getNode();
11490 }
11491 
11492 /// build_pair (load, load) -> load
11493 /// if load locations are consecutive.
11494 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
11495   assert(N->getOpcode() == ISD::BUILD_PAIR);
11496 
11497   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
11498   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
11499 
11500   // A BUILD_PAIR is always having the least significant part in elt 0 and the
11501   // most significant part in elt 1. So when combining into one large load, we
11502   // need to consider the endianness.
11503   if (DAG.getDataLayout().isBigEndian())
11504     std::swap(LD1, LD2);
11505 
11506   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
11507       LD1->getAddressSpace() != LD2->getAddressSpace())
11508     return SDValue();
11509   EVT LD1VT = LD1->getValueType(0);
11510   unsigned LD1Bytes = LD1VT.getStoreSize();
11511   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
11512       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
11513     Align Alignment = LD1->getAlign();
11514     Align NewAlign = DAG.getDataLayout().getABITypeAlign(
11515         VT.getTypeForEVT(*DAG.getContext()));
11516 
11517     if (NewAlign <= Alignment &&
11518         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
11519       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
11520                          LD1->getPointerInfo(), Alignment);
11521   }
11522 
11523   return SDValue();
11524 }
11525 
11526 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
11527   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
11528   // and Lo parts; on big-endian machines it doesn't.
11529   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
11530 }
11531 
11532 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
11533                                     const TargetLowering &TLI) {
11534   // If this is not a bitcast to an FP type or if the target doesn't have
11535   // IEEE754-compliant FP logic, we're done.
11536   EVT VT = N->getValueType(0);
11537   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
11538     return SDValue();
11539 
11540   // TODO: Handle cases where the integer constant is a different scalar
11541   // bitwidth to the FP.
11542   SDValue N0 = N->getOperand(0);
11543   EVT SourceVT = N0.getValueType();
11544   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
11545     return SDValue();
11546 
11547   unsigned FPOpcode;
11548   APInt SignMask;
11549   switch (N0.getOpcode()) {
11550   case ISD::AND:
11551     FPOpcode = ISD::FABS;
11552     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
11553     break;
11554   case ISD::XOR:
11555     FPOpcode = ISD::FNEG;
11556     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11557     break;
11558   case ISD::OR:
11559     FPOpcode = ISD::FABS;
11560     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11561     break;
11562   default:
11563     return SDValue();
11564   }
11565 
11566   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
11567   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
11568   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
11569   //   fneg (fabs X)
11570   SDValue LogicOp0 = N0.getOperand(0);
11571   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
11572   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
11573       LogicOp0.getOpcode() == ISD::BITCAST &&
11574       LogicOp0.getOperand(0).getValueType() == VT) {
11575     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
11576     NumFPLogicOpsConv++;
11577     if (N0.getOpcode() == ISD::OR)
11578       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
11579     return FPOp;
11580   }
11581 
11582   return SDValue();
11583 }
11584 
11585 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
11586   SDValue N0 = N->getOperand(0);
11587   EVT VT = N->getValueType(0);
11588 
11589   if (N0.isUndef())
11590     return DAG.getUNDEF(VT);
11591 
11592   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
11593   // Only do this before legalize types, unless both types are integer and the
11594   // scalar type is legal. Only do this before legalize ops, since the target
11595   // maybe depending on the bitcast.
11596   // First check to see if this is all constant.
11597   // TODO: Support FP bitcasts after legalize types.
11598   if (VT.isVector() &&
11599       (!LegalTypes ||
11600        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
11601         TLI.isTypeLegal(VT.getVectorElementType()))) &&
11602       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
11603       cast<BuildVectorSDNode>(N0)->isConstant())
11604     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
11605                                              VT.getVectorElementType());
11606 
11607   // If the input is a constant, let getNode fold it.
11608   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
11609     // If we can't allow illegal operations, we need to check that this is just
11610     // a fp -> int or int -> conversion and that the resulting operation will
11611     // be legal.
11612     if (!LegalOperations ||
11613         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
11614          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
11615         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
11616          TLI.isOperationLegal(ISD::Constant, VT))) {
11617       SDValue C = DAG.getBitcast(VT, N0);
11618       if (C.getNode() != N)
11619         return C;
11620     }
11621   }
11622 
11623   // (conv (conv x, t1), t2) -> (conv x, t2)
11624   if (N0.getOpcode() == ISD::BITCAST)
11625     return DAG.getBitcast(VT, N0.getOperand(0));
11626 
11627   // fold (conv (load x)) -> (load (conv*)x)
11628   // If the resultant load doesn't need a higher alignment than the original!
11629   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11630       // Do not remove the cast if the types differ in endian layout.
11631       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
11632           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
11633       // If the load is volatile, we only want to change the load type if the
11634       // resulting load is legal. Otherwise we might increase the number of
11635       // memory accesses. We don't care if the original type was legal or not
11636       // as we assume software couldn't rely on the number of accesses of an
11637       // illegal type.
11638       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
11639        TLI.isOperationLegal(ISD::LOAD, VT))) {
11640     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11641 
11642     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
11643                                     *LN0->getMemOperand())) {
11644       SDValue Load =
11645           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
11646                       LN0->getPointerInfo(), LN0->getAlignment(),
11647                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11648       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11649       return Load;
11650     }
11651   }
11652 
11653   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
11654     return V;
11655 
11656   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
11657   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
11658   //
11659   // For ppc_fp128:
11660   // fold (bitcast (fneg x)) ->
11661   //     flipbit = signbit
11662   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11663   //
11664   // fold (bitcast (fabs x)) ->
11665   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
11666   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11667   // This often reduces constant pool loads.
11668   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
11669        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
11670       N0.getNode()->hasOneUse() && VT.isInteger() &&
11671       !VT.isVector() && !N0.getValueType().isVector()) {
11672     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11673     AddToWorklist(NewConv.getNode());
11674 
11675     SDLoc DL(N);
11676     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11677       assert(VT.getSizeInBits() == 128);
11678       SDValue SignBit = DAG.getConstant(
11679           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
11680       SDValue FlipBit;
11681       if (N0.getOpcode() == ISD::FNEG) {
11682         FlipBit = SignBit;
11683         AddToWorklist(FlipBit.getNode());
11684       } else {
11685         assert(N0.getOpcode() == ISD::FABS);
11686         SDValue Hi =
11687             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11688                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11689                                               SDLoc(NewConv)));
11690         AddToWorklist(Hi.getNode());
11691         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11692         AddToWorklist(FlipBit.getNode());
11693       }
11694       SDValue FlipBits =
11695           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11696       AddToWorklist(FlipBits.getNode());
11697       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11698     }
11699     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11700     if (N0.getOpcode() == ISD::FNEG)
11701       return DAG.getNode(ISD::XOR, DL, VT,
11702                          NewConv, DAG.getConstant(SignBit, DL, VT));
11703     assert(N0.getOpcode() == ISD::FABS);
11704     return DAG.getNode(ISD::AND, DL, VT,
11705                        NewConv, DAG.getConstant(~SignBit, DL, VT));
11706   }
11707 
11708   // fold (bitconvert (fcopysign cst, x)) ->
11709   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
11710   // Note that we don't handle (copysign x, cst) because this can always be
11711   // folded to an fneg or fabs.
11712   //
11713   // For ppc_fp128:
11714   // fold (bitcast (fcopysign cst, x)) ->
11715   //     flipbit = (and (extract_element
11716   //                     (xor (bitcast cst), (bitcast x)), 0),
11717   //                    signbit)
11718   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
11719   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
11720       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
11721       VT.isInteger() && !VT.isVector()) {
11722     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11723     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11724     if (isTypeLegal(IntXVT)) {
11725       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11726       AddToWorklist(X.getNode());
11727 
11728       // If X has a different width than the result/lhs, sext it or truncate it.
11729       unsigned VTWidth = VT.getSizeInBits();
11730       if (OrigXWidth < VTWidth) {
11731         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11732         AddToWorklist(X.getNode());
11733       } else if (OrigXWidth > VTWidth) {
11734         // To get the sign bit in the right place, we have to shift it right
11735         // before truncating.
11736         SDLoc DL(X);
11737         X = DAG.getNode(ISD::SRL, DL,
11738                         X.getValueType(), X,
11739                         DAG.getConstant(OrigXWidth-VTWidth, DL,
11740                                         X.getValueType()));
11741         AddToWorklist(X.getNode());
11742         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11743         AddToWorklist(X.getNode());
11744       }
11745 
11746       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11747         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11748         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11749         AddToWorklist(Cst.getNode());
11750         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11751         AddToWorklist(X.getNode());
11752         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11753         AddToWorklist(XorResult.getNode());
11754         SDValue XorResult64 = DAG.getNode(
11755             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11756             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11757                                   SDLoc(XorResult)));
11758         AddToWorklist(XorResult64.getNode());
11759         SDValue FlipBit =
11760             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11761                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11762         AddToWorklist(FlipBit.getNode());
11763         SDValue FlipBits =
11764             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11765         AddToWorklist(FlipBits.getNode());
11766         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11767       }
11768       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11769       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11770                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
11771       AddToWorklist(X.getNode());
11772 
11773       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11774       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11775                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11776       AddToWorklist(Cst.getNode());
11777 
11778       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11779     }
11780   }
11781 
11782   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11783   if (N0.getOpcode() == ISD::BUILD_PAIR)
11784     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11785       return CombineLD;
11786 
11787   // Remove double bitcasts from shuffles - this is often a legacy of
11788   // XformToShuffleWithZero being used to combine bitmaskings (of
11789   // float vectors bitcast to integer vectors) into shuffles.
11790   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11791   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11792       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11793       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
11794       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
11795     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11796 
11797     // If operands are a bitcast, peek through if it casts the original VT.
11798     // If operands are a constant, just bitcast back to original VT.
11799     auto PeekThroughBitcast = [&](SDValue Op) {
11800       if (Op.getOpcode() == ISD::BITCAST &&
11801           Op.getOperand(0).getValueType() == VT)
11802         return SDValue(Op.getOperand(0));
11803       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
11804           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
11805         return DAG.getBitcast(VT, Op);
11806       return SDValue();
11807     };
11808 
11809     // FIXME: If either input vector is bitcast, try to convert the shuffle to
11810     // the result type of this bitcast. This would eliminate at least one
11811     // bitcast. See the transform in InstCombine.
11812     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11813     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11814     if (!(SV0 && SV1))
11815       return SDValue();
11816 
11817     int MaskScale =
11818         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
11819     SmallVector<int, 8> NewMask;
11820     for (int M : SVN->getMask())
11821       for (int i = 0; i != MaskScale; ++i)
11822         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
11823 
11824     SDValue LegalShuffle =
11825         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
11826     if (LegalShuffle)
11827       return LegalShuffle;
11828   }
11829 
11830   return SDValue();
11831 }
11832 
11833 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11834   EVT VT = N->getValueType(0);
11835   return CombineConsecutiveLoads(N, VT);
11836 }
11837 
11838 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
11839   SDValue N0 = N->getOperand(0);
11840 
11841   // (freeze (freeze x)) -> (freeze x)
11842   if (N0.getOpcode() == ISD::FREEZE)
11843     return N0;
11844 
11845   // If the input is a constant, return it.
11846   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0))
11847     return N0;
11848 
11849   return SDValue();
11850 }
11851 
11852 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11853 /// operands. DstEltVT indicates the destination element value type.
11854 SDValue DAGCombiner::
11855 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11856   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11857 
11858   // If this is already the right type, we're done.
11859   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
11860 
11861   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11862   unsigned DstBitSize = DstEltVT.getSizeInBits();
11863 
11864   // If this is a conversion of N elements of one type to N elements of another
11865   // type, convert each element.  This handles FP<->INT cases.
11866   if (SrcBitSize == DstBitSize) {
11867     SmallVector<SDValue, 8> Ops;
11868     for (SDValue Op : BV->op_values()) {
11869       // If the vector element type is not legal, the BUILD_VECTOR operands
11870       // are promoted and implicitly truncated.  Make that explicit here.
11871       if (Op.getValueType() != SrcEltVT)
11872         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11873       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11874       AddToWorklist(Ops.back().getNode());
11875     }
11876     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11877                               BV->getValueType(0).getVectorNumElements());
11878     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11879   }
11880 
11881   // Otherwise, we're growing or shrinking the elements.  To avoid having to
11882   // handle annoying details of growing/shrinking FP values, we convert them to
11883   // int first.
11884   if (SrcEltVT.isFloatingPoint()) {
11885     // Convert the input float vector to a int vector where the elements are the
11886     // same sizes.
11887     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11888     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11889     SrcEltVT = IntVT;
11890   }
11891 
11892   // Now we know the input is an integer vector.  If the output is a FP type,
11893   // convert to integer first, then to FP of the right size.
11894   if (DstEltVT.isFloatingPoint()) {
11895     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11896     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11897 
11898     // Next, convert to FP elements of the same size.
11899     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11900   }
11901 
11902   SDLoc DL(BV);
11903 
11904   // Okay, we know the src/dst types are both integers of differing types.
11905   // Handling growing first.
11906   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
11907   if (SrcBitSize < DstBitSize) {
11908     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11909 
11910     SmallVector<SDValue, 8> Ops;
11911     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11912          i += NumInputsPerOutput) {
11913       bool isLE = DAG.getDataLayout().isLittleEndian();
11914       APInt NewBits = APInt(DstBitSize, 0);
11915       bool EltIsUndef = true;
11916       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
11917         // Shift the previously computed bits over.
11918         NewBits <<= SrcBitSize;
11919         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
11920         if (Op.isUndef()) continue;
11921         EltIsUndef = false;
11922 
11923         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11924                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
11925       }
11926 
11927       if (EltIsUndef)
11928         Ops.push_back(DAG.getUNDEF(DstEltVT));
11929       else
11930         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11931     }
11932 
11933     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11934     return DAG.getBuildVector(VT, DL, Ops);
11935   }
11936 
11937   // Finally, this must be the case where we are shrinking elements: each input
11938   // turns into multiple outputs.
11939   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11940   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11941                             NumOutputsPerInput*BV->getNumOperands());
11942   SmallVector<SDValue, 8> Ops;
11943 
11944   for (const SDValue &Op : BV->op_values()) {
11945     if (Op.isUndef()) {
11946       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11947       continue;
11948     }
11949 
11950     APInt OpVal = cast<ConstantSDNode>(Op)->
11951                   getAPIntValue().zextOrTrunc(SrcBitSize);
11952 
11953     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
11954       APInt ThisVal = OpVal.trunc(DstBitSize);
11955       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11956       OpVal.lshrInPlace(DstBitSize);
11957     }
11958 
11959     // For big endian targets, swap the order of the pieces of each element.
11960     if (DAG.getDataLayout().isBigEndian())
11961       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11962   }
11963 
11964   return DAG.getBuildVector(VT, DL, Ops);
11965 }
11966 
11967 static bool isContractable(SDNode *N) {
11968   SDNodeFlags F = N->getFlags();
11969   return F.hasAllowContract() || F.hasAllowReassociation();
11970 }
11971 
11972 /// Try to perform FMA combining on a given FADD node.
11973 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11974   SDValue N0 = N->getOperand(0);
11975   SDValue N1 = N->getOperand(1);
11976   EVT VT = N->getValueType(0);
11977   SDLoc SL(N);
11978 
11979   const TargetOptions &Options = DAG.getTarget().Options;
11980 
11981   // Floating-point multiply-add with intermediate rounding.
11982   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
11983 
11984   // Floating-point multiply-add without intermediate rounding.
11985   bool HasFMA =
11986       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
11987       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11988 
11989   // No valid opcode, do not combine.
11990   if (!HasFMAD && !HasFMA)
11991     return SDValue();
11992 
11993   SDNodeFlags Flags = N->getFlags();
11994   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11995   bool CanReassociate =
11996       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
11997   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11998                               CanFuse || HasFMAD);
11999   // If the addition is not contractable, do not combine.
12000   if (!AllowFusionGlobally && !isContractable(N))
12001     return SDValue();
12002 
12003   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12004     return SDValue();
12005 
12006   // Always prefer FMAD to FMA for precision.
12007   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12008   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12009 
12010   // Is the node an FMUL and contractable either due to global flags or
12011   // SDNodeFlags.
12012   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12013     if (N.getOpcode() != ISD::FMUL)
12014       return false;
12015     return AllowFusionGlobally || isContractable(N.getNode());
12016   };
12017   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
12018   // prefer to fold the multiply with fewer uses.
12019   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
12020     if (N0.getNode()->use_size() > N1.getNode()->use_size())
12021       std::swap(N0, N1);
12022   }
12023 
12024   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
12025   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
12026     return DAG.getNode(PreferredFusedOpcode, SL, VT,
12027                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
12028   }
12029 
12030   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
12031   // Note: Commutes FADD operands.
12032   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
12033     return DAG.getNode(PreferredFusedOpcode, SL, VT,
12034                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
12035   }
12036 
12037   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
12038   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
12039   // This requires reassociation because it changes the order of operations.
12040   SDValue FMA, E;
12041   if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
12042       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
12043       N0.getOperand(2).hasOneUse()) {
12044     FMA = N0;
12045     E = N1;
12046   } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
12047              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
12048              N1.getOperand(2).hasOneUse()) {
12049     FMA = N1;
12050     E = N0;
12051   }
12052   if (FMA && E) {
12053     SDValue A = FMA.getOperand(0);
12054     SDValue B = FMA.getOperand(1);
12055     SDValue C = FMA.getOperand(2).getOperand(0);
12056     SDValue D = FMA.getOperand(2).getOperand(1);
12057     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags);
12058     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags);
12059   }
12060 
12061   // Look through FP_EXTEND nodes to do more combining.
12062 
12063   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
12064   if (N0.getOpcode() == ISD::FP_EXTEND) {
12065     SDValue N00 = N0.getOperand(0);
12066     if (isContractableFMUL(N00) &&
12067         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12068                             N00.getValueType())) {
12069       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12070                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12071                                      N00.getOperand(0)),
12072                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12073                                      N00.getOperand(1)), N1, Flags);
12074     }
12075   }
12076 
12077   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
12078   // Note: Commutes FADD operands.
12079   if (N1.getOpcode() == ISD::FP_EXTEND) {
12080     SDValue N10 = N1.getOperand(0);
12081     if (isContractableFMUL(N10) &&
12082         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12083                             N10.getValueType())) {
12084       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12085                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12086                                      N10.getOperand(0)),
12087                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12088                                      N10.getOperand(1)), N0, Flags);
12089     }
12090   }
12091 
12092   // More folding opportunities when target permits.
12093   if (Aggressive) {
12094     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
12095     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
12096     auto FoldFAddFMAFPExtFMul = [&] (
12097       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
12098       SDNodeFlags Flags) {
12099       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
12100                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12101                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12102                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12103                                      Z, Flags), Flags);
12104     };
12105     if (N0.getOpcode() == PreferredFusedOpcode) {
12106       SDValue N02 = N0.getOperand(2);
12107       if (N02.getOpcode() == ISD::FP_EXTEND) {
12108         SDValue N020 = N02.getOperand(0);
12109         if (isContractableFMUL(N020) &&
12110             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12111                                 N020.getValueType())) {
12112           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
12113                                       N020.getOperand(0), N020.getOperand(1),
12114                                       N1, Flags);
12115         }
12116       }
12117     }
12118 
12119     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
12120     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
12121     // FIXME: This turns two single-precision and one double-precision
12122     // operation into two double-precision operations, which might not be
12123     // interesting for all targets, especially GPUs.
12124     auto FoldFAddFPExtFMAFMul = [&] (
12125       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
12126       SDNodeFlags Flags) {
12127       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12128                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
12129                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
12130                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12131                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12132                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12133                                      Z, Flags), Flags);
12134     };
12135     if (N0.getOpcode() == ISD::FP_EXTEND) {
12136       SDValue N00 = N0.getOperand(0);
12137       if (N00.getOpcode() == PreferredFusedOpcode) {
12138         SDValue N002 = N00.getOperand(2);
12139         if (isContractableFMUL(N002) &&
12140             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12141                                 N00.getValueType())) {
12142           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
12143                                       N002.getOperand(0), N002.getOperand(1),
12144                                       N1, Flags);
12145         }
12146       }
12147     }
12148 
12149     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
12150     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
12151     if (N1.getOpcode() == PreferredFusedOpcode) {
12152       SDValue N12 = N1.getOperand(2);
12153       if (N12.getOpcode() == ISD::FP_EXTEND) {
12154         SDValue N120 = N12.getOperand(0);
12155         if (isContractableFMUL(N120) &&
12156             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12157                                 N120.getValueType())) {
12158           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
12159                                       N120.getOperand(0), N120.getOperand(1),
12160                                       N0, Flags);
12161         }
12162       }
12163     }
12164 
12165     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
12166     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
12167     // FIXME: This turns two single-precision and one double-precision
12168     // operation into two double-precision operations, which might not be
12169     // interesting for all targets, especially GPUs.
12170     if (N1.getOpcode() == ISD::FP_EXTEND) {
12171       SDValue N10 = N1.getOperand(0);
12172       if (N10.getOpcode() == PreferredFusedOpcode) {
12173         SDValue N102 = N10.getOperand(2);
12174         if (isContractableFMUL(N102) &&
12175             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12176                                 N10.getValueType())) {
12177           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
12178                                       N102.getOperand(0), N102.getOperand(1),
12179                                       N0, Flags);
12180         }
12181       }
12182     }
12183   }
12184 
12185   return SDValue();
12186 }
12187 
12188 /// Try to perform FMA combining on a given FSUB node.
12189 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
12190   SDValue N0 = N->getOperand(0);
12191   SDValue N1 = N->getOperand(1);
12192   EVT VT = N->getValueType(0);
12193   SDLoc SL(N);
12194 
12195   const TargetOptions &Options = DAG.getTarget().Options;
12196   // Floating-point multiply-add with intermediate rounding.
12197   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12198 
12199   // Floating-point multiply-add without intermediate rounding.
12200   bool HasFMA =
12201       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12202       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12203 
12204   // No valid opcode, do not combine.
12205   if (!HasFMAD && !HasFMA)
12206     return SDValue();
12207 
12208   const SDNodeFlags Flags = N->getFlags();
12209   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
12210   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12211                               CanFuse || HasFMAD);
12212 
12213   // If the subtraction is not contractable, do not combine.
12214   if (!AllowFusionGlobally && !isContractable(N))
12215     return SDValue();
12216 
12217   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12218     return SDValue();
12219 
12220   // Always prefer FMAD to FMA for precision.
12221   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12222   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12223   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
12224 
12225   // Is the node an FMUL and contractable either due to global flags or
12226   // SDNodeFlags.
12227   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12228     if (N.getOpcode() != ISD::FMUL)
12229       return false;
12230     return AllowFusionGlobally || isContractable(N.getNode());
12231   };
12232 
12233   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12234   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
12235     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
12236       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
12237                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z),
12238                          Flags);
12239     }
12240     return SDValue();
12241   };
12242 
12243   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12244   // Note: Commutes FSUB operands.
12245   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
12246     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
12247       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12248                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
12249                          YZ.getOperand(1), X, Flags);
12250     }
12251     return SDValue();
12252   };
12253 
12254   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
12255   // prefer to fold the multiply with fewer uses.
12256   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
12257       (N0.getNode()->use_size() > N1.getNode()->use_size())) {
12258     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
12259     if (SDValue V = tryToFoldXSubYZ(N0, N1))
12260       return V;
12261     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
12262     if (SDValue V = tryToFoldXYSubZ(N0, N1))
12263       return V;
12264   } else {
12265     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12266     if (SDValue V = tryToFoldXYSubZ(N0, N1))
12267       return V;
12268     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12269     if (SDValue V = tryToFoldXSubYZ(N0, N1))
12270       return V;
12271   }
12272 
12273   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
12274   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
12275       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
12276     SDValue N00 = N0.getOperand(0).getOperand(0);
12277     SDValue N01 = N0.getOperand(0).getOperand(1);
12278     return DAG.getNode(PreferredFusedOpcode, SL, VT,
12279                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
12280                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
12281   }
12282 
12283   // Look through FP_EXTEND nodes to do more combining.
12284 
12285   // fold (fsub (fpext (fmul x, y)), z)
12286   //   -> (fma (fpext x), (fpext y), (fneg z))
12287   if (N0.getOpcode() == ISD::FP_EXTEND) {
12288     SDValue N00 = N0.getOperand(0);
12289     if (isContractableFMUL(N00) &&
12290         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12291                             N00.getValueType())) {
12292       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12293                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12294                                      N00.getOperand(0)),
12295                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12296                                      N00.getOperand(1)),
12297                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
12298     }
12299   }
12300 
12301   // fold (fsub x, (fpext (fmul y, z)))
12302   //   -> (fma (fneg (fpext y)), (fpext z), x)
12303   // Note: Commutes FSUB operands.
12304   if (N1.getOpcode() == ISD::FP_EXTEND) {
12305     SDValue N10 = N1.getOperand(0);
12306     if (isContractableFMUL(N10) &&
12307         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12308                             N10.getValueType())) {
12309       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12310                          DAG.getNode(ISD::FNEG, SL, VT,
12311                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
12312                                                  N10.getOperand(0))),
12313                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12314                                      N10.getOperand(1)),
12315                          N0, Flags);
12316     }
12317   }
12318 
12319   // fold (fsub (fpext (fneg (fmul, x, y))), z)
12320   //   -> (fneg (fma (fpext x), (fpext y), z))
12321   // Note: This could be removed with appropriate canonicalization of the
12322   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12323   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12324   // from implementing the canonicalization in visitFSUB.
12325   if (N0.getOpcode() == ISD::FP_EXTEND) {
12326     SDValue N00 = N0.getOperand(0);
12327     if (N00.getOpcode() == ISD::FNEG) {
12328       SDValue N000 = N00.getOperand(0);
12329       if (isContractableFMUL(N000) &&
12330           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12331                               N00.getValueType())) {
12332         return DAG.getNode(ISD::FNEG, SL, VT,
12333                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12334                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12335                                                    N000.getOperand(0)),
12336                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12337                                                    N000.getOperand(1)),
12338                                        N1, Flags));
12339       }
12340     }
12341   }
12342 
12343   // fold (fsub (fneg (fpext (fmul, x, y))), z)
12344   //   -> (fneg (fma (fpext x)), (fpext y), z)
12345   // Note: This could be removed with appropriate canonicalization of the
12346   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12347   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12348   // from implementing the canonicalization in visitFSUB.
12349   if (N0.getOpcode() == ISD::FNEG) {
12350     SDValue N00 = N0.getOperand(0);
12351     if (N00.getOpcode() == ISD::FP_EXTEND) {
12352       SDValue N000 = N00.getOperand(0);
12353       if (isContractableFMUL(N000) &&
12354           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12355                               N000.getValueType())) {
12356         return DAG.getNode(ISD::FNEG, SL, VT,
12357                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12358                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12359                                                    N000.getOperand(0)),
12360                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12361                                                    N000.getOperand(1)),
12362                                        N1, Flags));
12363       }
12364     }
12365   }
12366 
12367   // More folding opportunities when target permits.
12368   if (Aggressive) {
12369     // fold (fsub (fma x, y, (fmul u, v)), z)
12370     //   -> (fma x, y (fma u, v, (fneg z)))
12371     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
12372         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
12373         N0.getOperand(2)->hasOneUse()) {
12374       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12375                          N0.getOperand(0), N0.getOperand(1),
12376                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12377                                      N0.getOperand(2).getOperand(0),
12378                                      N0.getOperand(2).getOperand(1),
12379                                      DAG.getNode(ISD::FNEG, SL, VT,
12380                                                  N1), Flags), Flags);
12381     }
12382 
12383     // fold (fsub x, (fma y, z, (fmul u, v)))
12384     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
12385     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
12386         isContractableFMUL(N1.getOperand(2)) &&
12387         N1->hasOneUse() && NoSignedZero) {
12388       SDValue N20 = N1.getOperand(2).getOperand(0);
12389       SDValue N21 = N1.getOperand(2).getOperand(1);
12390       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12391                          DAG.getNode(ISD::FNEG, SL, VT,
12392                                      N1.getOperand(0)),
12393                          N1.getOperand(1),
12394                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12395                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
12396                                      N21, N0, Flags), Flags);
12397     }
12398 
12399 
12400     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
12401     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
12402     if (N0.getOpcode() == PreferredFusedOpcode &&
12403         N0->hasOneUse()) {
12404       SDValue N02 = N0.getOperand(2);
12405       if (N02.getOpcode() == ISD::FP_EXTEND) {
12406         SDValue N020 = N02.getOperand(0);
12407         if (isContractableFMUL(N020) &&
12408             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12409                                 N020.getValueType())) {
12410           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12411                              N0.getOperand(0), N0.getOperand(1),
12412                              DAG.getNode(PreferredFusedOpcode, SL, VT,
12413                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12414                                                      N020.getOperand(0)),
12415                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12416                                                      N020.getOperand(1)),
12417                                          DAG.getNode(ISD::FNEG, SL, VT,
12418                                                      N1), Flags), Flags);
12419         }
12420       }
12421     }
12422 
12423     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
12424     //   -> (fma (fpext x), (fpext y),
12425     //           (fma (fpext u), (fpext v), (fneg z)))
12426     // FIXME: This turns two single-precision and one double-precision
12427     // operation into two double-precision operations, which might not be
12428     // interesting for all targets, especially GPUs.
12429     if (N0.getOpcode() == ISD::FP_EXTEND) {
12430       SDValue N00 = N0.getOperand(0);
12431       if (N00.getOpcode() == PreferredFusedOpcode) {
12432         SDValue N002 = N00.getOperand(2);
12433         if (isContractableFMUL(N002) &&
12434             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12435                                 N00.getValueType())) {
12436           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12437                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
12438                                          N00.getOperand(0)),
12439                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
12440                                          N00.getOperand(1)),
12441                              DAG.getNode(PreferredFusedOpcode, SL, VT,
12442                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12443                                                      N002.getOperand(0)),
12444                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12445                                                      N002.getOperand(1)),
12446                                          DAG.getNode(ISD::FNEG, SL, VT,
12447                                                      N1), Flags), Flags);
12448         }
12449       }
12450     }
12451 
12452     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
12453     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
12454     if (N1.getOpcode() == PreferredFusedOpcode &&
12455         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
12456         N1->hasOneUse()) {
12457       SDValue N120 = N1.getOperand(2).getOperand(0);
12458       if (isContractableFMUL(N120) &&
12459           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12460                               N120.getValueType())) {
12461         SDValue N1200 = N120.getOperand(0);
12462         SDValue N1201 = N120.getOperand(1);
12463         return DAG.getNode(PreferredFusedOpcode, SL, VT,
12464                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
12465                            N1.getOperand(1),
12466                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12467                                        DAG.getNode(ISD::FNEG, SL, VT,
12468                                                    DAG.getNode(ISD::FP_EXTEND, SL,
12469                                                                VT, N1200)),
12470                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12471                                                    N1201),
12472                                        N0, Flags), Flags);
12473       }
12474     }
12475 
12476     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
12477     //   -> (fma (fneg (fpext y)), (fpext z),
12478     //           (fma (fneg (fpext u)), (fpext v), x))
12479     // FIXME: This turns two single-precision and one double-precision
12480     // operation into two double-precision operations, which might not be
12481     // interesting for all targets, especially GPUs.
12482     if (N1.getOpcode() == ISD::FP_EXTEND &&
12483         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
12484       SDValue CvtSrc = N1.getOperand(0);
12485       SDValue N100 = CvtSrc.getOperand(0);
12486       SDValue N101 = CvtSrc.getOperand(1);
12487       SDValue N102 = CvtSrc.getOperand(2);
12488       if (isContractableFMUL(N102) &&
12489           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12490                               CvtSrc.getValueType())) {
12491         SDValue N1020 = N102.getOperand(0);
12492         SDValue N1021 = N102.getOperand(1);
12493         return DAG.getNode(PreferredFusedOpcode, SL, VT,
12494                            DAG.getNode(ISD::FNEG, SL, VT,
12495                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12496                                                    N100)),
12497                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
12498                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12499                                        DAG.getNode(ISD::FNEG, SL, VT,
12500                                                    DAG.getNode(ISD::FP_EXTEND, SL,
12501                                                                VT, N1020)),
12502                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12503                                                    N1021),
12504                                        N0, Flags), Flags);
12505       }
12506     }
12507   }
12508 
12509   return SDValue();
12510 }
12511 
12512 /// Try to perform FMA combining on a given FMUL node based on the distributive
12513 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
12514 /// subtraction instead of addition).
12515 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
12516   SDValue N0 = N->getOperand(0);
12517   SDValue N1 = N->getOperand(1);
12518   EVT VT = N->getValueType(0);
12519   SDLoc SL(N);
12520   const SDNodeFlags Flags = N->getFlags();
12521 
12522   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
12523 
12524   const TargetOptions &Options = DAG.getTarget().Options;
12525 
12526   // The transforms below are incorrect when x == 0 and y == inf, because the
12527   // intermediate multiplication produces a nan.
12528   if (!Options.NoInfsFPMath)
12529     return SDValue();
12530 
12531   // Floating-point multiply-add without intermediate rounding.
12532   bool HasFMA =
12533       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
12534       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12535       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12536 
12537   // Floating-point multiply-add with intermediate rounding. This can result
12538   // in a less precise result due to the changed rounding order.
12539   bool HasFMAD = Options.UnsafeFPMath &&
12540                  (LegalOperations && TLI.isFMADLegal(DAG, N));
12541 
12542   // No valid opcode, do not combine.
12543   if (!HasFMAD && !HasFMA)
12544     return SDValue();
12545 
12546   // Always prefer FMAD to FMA for precision.
12547   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12548   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12549 
12550   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
12551   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
12552   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12553     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
12554       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
12555         if (C->isExactlyValue(+1.0))
12556           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12557                              Y, Flags);
12558         if (C->isExactlyValue(-1.0))
12559           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12560                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12561       }
12562     }
12563     return SDValue();
12564   };
12565 
12566   if (SDValue FMA = FuseFADD(N0, N1, Flags))
12567     return FMA;
12568   if (SDValue FMA = FuseFADD(N1, N0, Flags))
12569     return FMA;
12570 
12571   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
12572   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
12573   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
12574   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
12575   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12576     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
12577       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
12578         if (C0->isExactlyValue(+1.0))
12579           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12580                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12581                              Y, Flags);
12582         if (C0->isExactlyValue(-1.0))
12583           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12584                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12585                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12586       }
12587       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
12588         if (C1->isExactlyValue(+1.0))
12589           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12590                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12591         if (C1->isExactlyValue(-1.0))
12592           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12593                              Y, Flags);
12594       }
12595     }
12596     return SDValue();
12597   };
12598 
12599   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
12600     return FMA;
12601   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
12602     return FMA;
12603 
12604   return SDValue();
12605 }
12606 
12607 SDValue DAGCombiner::visitFADD(SDNode *N) {
12608   SDValue N0 = N->getOperand(0);
12609   SDValue N1 = N->getOperand(1);
12610   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12611   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12612   EVT VT = N->getValueType(0);
12613   SDLoc DL(N);
12614   const TargetOptions &Options = DAG.getTarget().Options;
12615   const SDNodeFlags Flags = N->getFlags();
12616 
12617   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12618     return R;
12619 
12620   // fold vector ops
12621   if (VT.isVector())
12622     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12623       return FoldedVOp;
12624 
12625   // fold (fadd c1, c2) -> c1 + c2
12626   if (N0CFP && N1CFP)
12627     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
12628 
12629   // canonicalize constant to RHS
12630   if (N0CFP && !N1CFP)
12631     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
12632 
12633   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
12634   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
12635   if (N1C && N1C->isZero())
12636     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
12637       return N0;
12638 
12639   if (SDValue NewSel = foldBinOpIntoSelect(N))
12640     return NewSel;
12641 
12642   // fold (fadd A, (fneg B)) -> (fsub A, B)
12643   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
12644     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
12645             N1, DAG, LegalOperations, ForCodeSize))
12646       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags);
12647 
12648   // fold (fadd (fneg A), B) -> (fsub B, A)
12649   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
12650     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
12651             N0, DAG, LegalOperations, ForCodeSize))
12652       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags);
12653 
12654   auto isFMulNegTwo = [](SDValue FMul) {
12655     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
12656       return false;
12657     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
12658     return C && C->isExactlyValue(-2.0);
12659   };
12660 
12661   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
12662   if (isFMulNegTwo(N0)) {
12663     SDValue B = N0.getOperand(0);
12664     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12665     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
12666   }
12667   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
12668   if (isFMulNegTwo(N1)) {
12669     SDValue B = N1.getOperand(0);
12670     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12671     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
12672   }
12673 
12674   // No FP constant should be created after legalization as Instruction
12675   // Selection pass has a hard time dealing with FP constants.
12676   bool AllowNewConst = (Level < AfterLegalizeDAG);
12677 
12678   // If nnan is enabled, fold lots of things.
12679   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
12680     // If allowed, fold (fadd (fneg x), x) -> 0.0
12681     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
12682       return DAG.getConstantFP(0.0, DL, VT);
12683 
12684     // If allowed, fold (fadd x, (fneg x)) -> 0.0
12685     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
12686       return DAG.getConstantFP(0.0, DL, VT);
12687   }
12688 
12689   // If 'unsafe math' or reassoc and nsz, fold lots of things.
12690   // TODO: break out portions of the transformations below for which Unsafe is
12691   //       considered and which do not require both nsz and reassoc
12692   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12693        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12694       AllowNewConst) {
12695     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
12696     if (N1CFP && N0.getOpcode() == ISD::FADD &&
12697         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12698       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
12699       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
12700     }
12701 
12702     // We can fold chains of FADD's of the same value into multiplications.
12703     // This transform is not safe in general because we are reducing the number
12704     // of rounding steps.
12705     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
12706       if (N0.getOpcode() == ISD::FMUL) {
12707         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12708         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
12709 
12710         // (fadd (fmul x, c), x) -> (fmul x, c+1)
12711         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
12712           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12713                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12714           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
12715         }
12716 
12717         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
12718         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
12719             N1.getOperand(0) == N1.getOperand(1) &&
12720             N0.getOperand(0) == N1.getOperand(0)) {
12721           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12722                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12723           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
12724         }
12725       }
12726 
12727       if (N1.getOpcode() == ISD::FMUL) {
12728         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12729         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
12730 
12731         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12732         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
12733           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12734                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12735           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
12736         }
12737 
12738         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12739         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
12740             N0.getOperand(0) == N0.getOperand(1) &&
12741             N1.getOperand(0) == N0.getOperand(0)) {
12742           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12743                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12744           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
12745         }
12746       }
12747 
12748       if (N0.getOpcode() == ISD::FADD) {
12749         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12750         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12751         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12752             (N0.getOperand(0) == N1)) {
12753           return DAG.getNode(ISD::FMUL, DL, VT,
12754                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
12755         }
12756       }
12757 
12758       if (N1.getOpcode() == ISD::FADD) {
12759         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12760         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12761         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12762             N1.getOperand(0) == N0) {
12763           return DAG.getNode(ISD::FMUL, DL, VT,
12764                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
12765         }
12766       }
12767 
12768       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12769       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
12770           N0.getOperand(0) == N0.getOperand(1) &&
12771           N1.getOperand(0) == N1.getOperand(1) &&
12772           N0.getOperand(0) == N1.getOperand(0)) {
12773         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12774                            DAG.getConstantFP(4.0, DL, VT), Flags);
12775       }
12776     }
12777   } // enable-unsafe-fp-math
12778 
12779   // FADD -> FMA combines:
12780   if (SDValue Fused = visitFADDForFMACombine(N)) {
12781     AddToWorklist(Fused.getNode());
12782     return Fused;
12783   }
12784   return SDValue();
12785 }
12786 
12787 SDValue DAGCombiner::visitFSUB(SDNode *N) {
12788   SDValue N0 = N->getOperand(0);
12789   SDValue N1 = N->getOperand(1);
12790   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12791   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12792   EVT VT = N->getValueType(0);
12793   SDLoc DL(N);
12794   const TargetOptions &Options = DAG.getTarget().Options;
12795   const SDNodeFlags Flags = N->getFlags();
12796 
12797   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12798     return R;
12799 
12800   // fold vector ops
12801   if (VT.isVector())
12802     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12803       return FoldedVOp;
12804 
12805   // fold (fsub c1, c2) -> c1-c2
12806   if (N0CFP && N1CFP)
12807     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
12808 
12809   if (SDValue NewSel = foldBinOpIntoSelect(N))
12810     return NewSel;
12811 
12812   // (fsub A, 0) -> A
12813   if (N1CFP && N1CFP->isZero()) {
12814     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
12815         Flags.hasNoSignedZeros()) {
12816       return N0;
12817     }
12818   }
12819 
12820   if (N0 == N1) {
12821     // (fsub x, x) -> 0.0
12822     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
12823       return DAG.getConstantFP(0.0f, DL, VT);
12824   }
12825 
12826   // (fsub -0.0, N1) -> -N1
12827   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
12828   //       FSUB does not specify the sign bit of a NaN. Also note that for
12829   //       the same reason, the inverse transform is not safe, unless fast math
12830   //       flags are in play.
12831   if (N0CFP && N0CFP->isZero()) {
12832     if (N0CFP->isNegative() ||
12833         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
12834       if (SDValue NegN1 =
12835               TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
12836         return NegN1;
12837       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12838         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12839     }
12840   }
12841 
12842   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12843        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12844       N1.getOpcode() == ISD::FADD) {
12845     // X - (X + Y) -> -Y
12846     if (N0 == N1->getOperand(0))
12847       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12848     // X - (Y + X) -> -Y
12849     if (N0 == N1->getOperand(1))
12850       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12851   }
12852 
12853   // fold (fsub A, (fneg B)) -> (fadd A, B)
12854   if (SDValue NegN1 =
12855           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
12856     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags);
12857 
12858   // FSUB -> FMA combines:
12859   if (SDValue Fused = visitFSUBForFMACombine(N)) {
12860     AddToWorklist(Fused.getNode());
12861     return Fused;
12862   }
12863 
12864   return SDValue();
12865 }
12866 
12867 SDValue DAGCombiner::visitFMUL(SDNode *N) {
12868   SDValue N0 = N->getOperand(0);
12869   SDValue N1 = N->getOperand(1);
12870   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12871   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12872   EVT VT = N->getValueType(0);
12873   SDLoc DL(N);
12874   const TargetOptions &Options = DAG.getTarget().Options;
12875   const SDNodeFlags Flags = N->getFlags();
12876 
12877   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12878     return R;
12879 
12880   // fold vector ops
12881   if (VT.isVector()) {
12882     // This just handles C1 * C2 for vectors. Other vector folds are below.
12883     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12884       return FoldedVOp;
12885   }
12886 
12887   // fold (fmul c1, c2) -> c1*c2
12888   if (N0CFP && N1CFP)
12889     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12890 
12891   // canonicalize constant to RHS
12892   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12893      !isConstantFPBuildVectorOrConstantFP(N1))
12894     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12895 
12896   if (SDValue NewSel = foldBinOpIntoSelect(N))
12897     return NewSel;
12898 
12899   if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
12900       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
12901     // fold (fmul A, 0) -> 0
12902     if (N1CFP && N1CFP->isZero())
12903       return N1;
12904   }
12905 
12906   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
12907     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12908     if (isConstantFPBuildVectorOrConstantFP(N1) &&
12909         N0.getOpcode() == ISD::FMUL) {
12910       SDValue N00 = N0.getOperand(0);
12911       SDValue N01 = N0.getOperand(1);
12912       // Avoid an infinite loop by making sure that N00 is not a constant
12913       // (the inner multiply has not been constant folded yet).
12914       if (isConstantFPBuildVectorOrConstantFP(N01) &&
12915           !isConstantFPBuildVectorOrConstantFP(N00)) {
12916         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12917         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12918       }
12919     }
12920 
12921     // Match a special-case: we convert X * 2.0 into fadd.
12922     // fmul (fadd X, X), C -> fmul X, 2.0 * C
12923     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
12924         N0.getOperand(0) == N0.getOperand(1)) {
12925       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12926       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12927       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12928     }
12929   }
12930 
12931   // fold (fmul X, 2.0) -> (fadd X, X)
12932   if (N1CFP && N1CFP->isExactlyValue(+2.0))
12933     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12934 
12935   // fold (fmul X, -1.0) -> (fneg X)
12936   if (N1CFP && N1CFP->isExactlyValue(-1.0))
12937     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12938       return DAG.getNode(ISD::FNEG, DL, VT, N0);
12939 
12940   // -N0 * -N1 --> N0 * N1
12941   TargetLowering::NegatibleCost CostN0 =
12942       TargetLowering::NegatibleCost::Expensive;
12943   TargetLowering::NegatibleCost CostN1 =
12944       TargetLowering::NegatibleCost::Expensive;
12945   SDValue NegN0 =
12946       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
12947   SDValue NegN1 =
12948       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
12949   if (NegN0 && NegN1 &&
12950       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
12951        CostN1 == TargetLowering::NegatibleCost::Cheaper))
12952     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
12953 
12954   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12955   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12956   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
12957       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
12958       TLI.isOperationLegal(ISD::FABS, VT)) {
12959     SDValue Select = N0, X = N1;
12960     if (Select.getOpcode() != ISD::SELECT)
12961       std::swap(Select, X);
12962 
12963     SDValue Cond = Select.getOperand(0);
12964     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12965     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12966 
12967     if (TrueOpnd && FalseOpnd &&
12968         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12969         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12970         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12971       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12972       switch (CC) {
12973       default: break;
12974       case ISD::SETOLT:
12975       case ISD::SETULT:
12976       case ISD::SETOLE:
12977       case ISD::SETULE:
12978       case ISD::SETLT:
12979       case ISD::SETLE:
12980         std::swap(TrueOpnd, FalseOpnd);
12981         LLVM_FALLTHROUGH;
12982       case ISD::SETOGT:
12983       case ISD::SETUGT:
12984       case ISD::SETOGE:
12985       case ISD::SETUGE:
12986       case ISD::SETGT:
12987       case ISD::SETGE:
12988         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
12989             TLI.isOperationLegal(ISD::FNEG, VT))
12990           return DAG.getNode(ISD::FNEG, DL, VT,
12991                    DAG.getNode(ISD::FABS, DL, VT, X));
12992         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12993           return DAG.getNode(ISD::FABS, DL, VT, X);
12994 
12995         break;
12996       }
12997     }
12998   }
12999 
13000   // FMUL -> FMA combines:
13001   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
13002     AddToWorklist(Fused.getNode());
13003     return Fused;
13004   }
13005 
13006   return SDValue();
13007 }
13008 
13009 SDValue DAGCombiner::visitFMA(SDNode *N) {
13010   SDValue N0 = N->getOperand(0);
13011   SDValue N1 = N->getOperand(1);
13012   SDValue N2 = N->getOperand(2);
13013   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13014   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13015   EVT VT = N->getValueType(0);
13016   SDLoc DL(N);
13017   const TargetOptions &Options = DAG.getTarget().Options;
13018 
13019   // FMA nodes have flags that propagate to the created nodes.
13020   const SDNodeFlags Flags = N->getFlags();
13021   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
13022 
13023   // Constant fold FMA.
13024   if (isa<ConstantFPSDNode>(N0) &&
13025       isa<ConstantFPSDNode>(N1) &&
13026       isa<ConstantFPSDNode>(N2)) {
13027     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
13028   }
13029 
13030   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
13031   TargetLowering::NegatibleCost CostN0 =
13032       TargetLowering::NegatibleCost::Expensive;
13033   TargetLowering::NegatibleCost CostN1 =
13034       TargetLowering::NegatibleCost::Expensive;
13035   SDValue NegN0 =
13036       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13037   SDValue NegN1 =
13038       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13039   if (NegN0 && NegN1 &&
13040       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13041        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13042     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
13043 
13044   if (UnsafeFPMath) {
13045     if (N0CFP && N0CFP->isZero())
13046       return N2;
13047     if (N1CFP && N1CFP->isZero())
13048       return N2;
13049   }
13050   // TODO: The FMA node should have flags that propagate to these nodes.
13051   if (N0CFP && N0CFP->isExactlyValue(1.0))
13052     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
13053   if (N1CFP && N1CFP->isExactlyValue(1.0))
13054     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
13055 
13056   // Canonicalize (fma c, x, y) -> (fma x, c, y)
13057   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13058      !isConstantFPBuildVectorOrConstantFP(N1))
13059     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
13060 
13061   if (UnsafeFPMath) {
13062     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
13063     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
13064         isConstantFPBuildVectorOrConstantFP(N1) &&
13065         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
13066       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13067                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
13068                                      Flags), Flags);
13069     }
13070 
13071     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
13072     if (N0.getOpcode() == ISD::FMUL &&
13073         isConstantFPBuildVectorOrConstantFP(N1) &&
13074         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13075       return DAG.getNode(ISD::FMA, DL, VT,
13076                          N0.getOperand(0),
13077                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
13078                                      Flags),
13079                          N2);
13080     }
13081   }
13082 
13083   // (fma x, 1, y) -> (fadd x, y)
13084   // (fma x, -1, y) -> (fadd (fneg x), y)
13085   if (N1CFP) {
13086     if (N1CFP->isExactlyValue(1.0))
13087       // TODO: The FMA node should have flags that propagate to this node.
13088       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
13089 
13090     if (N1CFP->isExactlyValue(-1.0) &&
13091         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
13092       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
13093       AddToWorklist(RHSNeg.getNode());
13094       // TODO: The FMA node should have flags that propagate to this node.
13095       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
13096     }
13097 
13098     // fma (fneg x), K, y -> fma x -K, y
13099     if (N0.getOpcode() == ISD::FNEG &&
13100         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13101          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
13102                                               ForCodeSize)))) {
13103       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13104                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
13105     }
13106   }
13107 
13108   if (UnsafeFPMath) {
13109     // (fma x, c, x) -> (fmul x, (c+1))
13110     if (N1CFP && N0 == N2) {
13111       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13112                          DAG.getNode(ISD::FADD, DL, VT, N1,
13113                                      DAG.getConstantFP(1.0, DL, VT), Flags),
13114                          Flags);
13115     }
13116 
13117     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
13118     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
13119       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13120                          DAG.getNode(ISD::FADD, DL, VT, N1,
13121                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
13122                          Flags);
13123     }
13124   }
13125 
13126   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
13127   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
13128   if (!TLI.isFNegFree(VT))
13129     if (SDValue Neg = TLI.getCheaperNegatedExpression(
13130             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
13131       return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags);
13132   return SDValue();
13133 }
13134 
13135 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13136 // reciprocal.
13137 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
13138 // Notice that this is not always beneficial. One reason is different targets
13139 // may have different costs for FDIV and FMUL, so sometimes the cost of two
13140 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
13141 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
13142 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
13143   // TODO: Limit this transform based on optsize/minsize - it always creates at
13144   //       least 1 extra instruction. But the perf win may be substantial enough
13145   //       that only minsize should restrict this.
13146   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
13147   const SDNodeFlags Flags = N->getFlags();
13148   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
13149     return SDValue();
13150 
13151   // Skip if current node is a reciprocal/fneg-reciprocal.
13152   SDValue N0 = N->getOperand(0);
13153   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
13154   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
13155     return SDValue();
13156 
13157   // Exit early if the target does not want this transform or if there can't
13158   // possibly be enough uses of the divisor to make the transform worthwhile.
13159   SDValue N1 = N->getOperand(1);
13160   unsigned MinUses = TLI.combineRepeatedFPDivisors();
13161 
13162   // For splat vectors, scale the number of uses by the splat factor. If we can
13163   // convert the division into a scalar op, that will likely be much faster.
13164   unsigned NumElts = 1;
13165   EVT VT = N->getValueType(0);
13166   if (VT.isVector() && DAG.isSplatValue(N1))
13167     NumElts = VT.getVectorNumElements();
13168 
13169   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
13170     return SDValue();
13171 
13172   // Find all FDIV users of the same divisor.
13173   // Use a set because duplicates may be present in the user list.
13174   SetVector<SDNode *> Users;
13175   for (auto *U : N1->uses()) {
13176     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
13177       // This division is eligible for optimization only if global unsafe math
13178       // is enabled or if this division allows reciprocal formation.
13179       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
13180         Users.insert(U);
13181     }
13182   }
13183 
13184   // Now that we have the actual number of divisor uses, make sure it meets
13185   // the minimum threshold specified by the target.
13186   if ((Users.size() * NumElts) < MinUses)
13187     return SDValue();
13188 
13189   SDLoc DL(N);
13190   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
13191   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
13192 
13193   // Dividend / Divisor -> Dividend * Reciprocal
13194   for (auto *U : Users) {
13195     SDValue Dividend = U->getOperand(0);
13196     if (Dividend != FPOne) {
13197       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
13198                                     Reciprocal, Flags);
13199       CombineTo(U, NewNode);
13200     } else if (U != Reciprocal.getNode()) {
13201       // In the absence of fast-math-flags, this user node is always the
13202       // same node as Reciprocal, but with FMF they may be different nodes.
13203       CombineTo(U, Reciprocal);
13204     }
13205   }
13206   return SDValue(N, 0);  // N was replaced.
13207 }
13208 
13209 SDValue DAGCombiner::visitFDIV(SDNode *N) {
13210   SDValue N0 = N->getOperand(0);
13211   SDValue N1 = N->getOperand(1);
13212   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13213   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13214   EVT VT = N->getValueType(0);
13215   SDLoc DL(N);
13216   const TargetOptions &Options = DAG.getTarget().Options;
13217   SDNodeFlags Flags = N->getFlags();
13218 
13219   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13220     return R;
13221 
13222   // fold vector ops
13223   if (VT.isVector())
13224     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13225       return FoldedVOp;
13226 
13227   // fold (fdiv c1, c2) -> c1/c2
13228   if (N0CFP && N1CFP)
13229     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
13230 
13231   if (SDValue NewSel = foldBinOpIntoSelect(N))
13232     return NewSel;
13233 
13234   if (SDValue V = combineRepeatedFPDivisors(N))
13235     return V;
13236 
13237   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
13238     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
13239     if (N1CFP) {
13240       // Compute the reciprocal 1.0 / c2.
13241       const APFloat &N1APF = N1CFP->getValueAPF();
13242       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
13243       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
13244       // Only do the transform if the reciprocal is a legal fp immediate that
13245       // isn't too nasty (eg NaN, denormal, ...).
13246       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
13247           (!LegalOperations ||
13248            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
13249            // backend)... we should handle this gracefully after Legalize.
13250            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
13251            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13252            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
13253         return DAG.getNode(ISD::FMUL, DL, VT, N0,
13254                            DAG.getConstantFP(Recip, DL, VT), Flags);
13255     }
13256 
13257     // If this FDIV is part of a reciprocal square root, it may be folded
13258     // into a target-specific square root estimate instruction.
13259     if (N1.getOpcode() == ISD::FSQRT) {
13260       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
13261         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13262     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
13263                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13264       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
13265                                           Flags)) {
13266         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
13267         AddToWorklist(RV.getNode());
13268         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13269       }
13270     } else if (N1.getOpcode() == ISD::FP_ROUND &&
13271                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13272       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
13273                                           Flags)) {
13274         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
13275         AddToWorklist(RV.getNode());
13276         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13277       }
13278     } else if (N1.getOpcode() == ISD::FMUL) {
13279       // Look through an FMUL. Even though this won't remove the FDIV directly,
13280       // it's still worthwhile to get rid of the FSQRT if possible.
13281       SDValue Sqrt, Y;
13282       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13283         Sqrt = N1.getOperand(0);
13284         Y = N1.getOperand(1);
13285       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
13286         Sqrt = N1.getOperand(1);
13287         Y = N1.getOperand(0);
13288       }
13289       if (Sqrt.getNode()) {
13290         // If the other multiply operand is known positive, pull it into the
13291         // sqrt. That will eliminate the division if we convert to an estimate:
13292         // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
13293         // TODO: Also fold the case where A == Z (fabs is missing).
13294         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
13295             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() &&
13296             Y.getOpcode() == ISD::FABS && Y.hasOneUse()) {
13297           SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0),
13298                                    Y.getOperand(0), Flags);
13299           SDValue AAZ =
13300               DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags);
13301           if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
13302             return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags);
13303 
13304           // Estimate creation failed. Clean up speculatively created nodes.
13305           recursivelyDeleteUnusedNodes(AAZ.getNode());
13306         }
13307 
13308         // We found a FSQRT, so try to make this fold:
13309         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
13310         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
13311           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags);
13312           AddToWorklist(Div.getNode());
13313           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags);
13314         }
13315       }
13316     }
13317 
13318     // Fold into a reciprocal estimate and multiply instead of a real divide.
13319     if (Options.NoInfsFPMath || Flags.hasNoInfs())
13320       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
13321         return RV;
13322   }
13323 
13324   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
13325   TargetLowering::NegatibleCost CostN0 =
13326       TargetLowering::NegatibleCost::Expensive;
13327   TargetLowering::NegatibleCost CostN1 =
13328       TargetLowering::NegatibleCost::Expensive;
13329   SDValue NegN0 =
13330       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13331   SDValue NegN1 =
13332       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13333   if (NegN0 && NegN1 &&
13334       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13335        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13336     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags);
13337 
13338   return SDValue();
13339 }
13340 
13341 SDValue DAGCombiner::visitFREM(SDNode *N) {
13342   SDValue N0 = N->getOperand(0);
13343   SDValue N1 = N->getOperand(1);
13344   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13345   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13346   EVT VT = N->getValueType(0);
13347   SDNodeFlags Flags = N->getFlags();
13348 
13349   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13350     return R;
13351 
13352   // fold (frem c1, c2) -> fmod(c1,c2)
13353   if (N0CFP && N1CFP)
13354     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
13355 
13356   if (SDValue NewSel = foldBinOpIntoSelect(N))
13357     return NewSel;
13358 
13359   return SDValue();
13360 }
13361 
13362 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
13363   SDNodeFlags Flags = N->getFlags();
13364   const TargetOptions &Options = DAG.getTarget().Options;
13365 
13366   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
13367   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
13368   if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
13369       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
13370     return SDValue();
13371 
13372   SDValue N0 = N->getOperand(0);
13373   if (TLI.isFsqrtCheap(N0, DAG))
13374     return SDValue();
13375 
13376   // FSQRT nodes have flags that propagate to the created nodes.
13377   return buildSqrtEstimate(N0, Flags);
13378 }
13379 
13380 /// copysign(x, fp_extend(y)) -> copysign(x, y)
13381 /// copysign(x, fp_round(y)) -> copysign(x, y)
13382 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
13383   SDValue N1 = N->getOperand(1);
13384   if ((N1.getOpcode() == ISD::FP_EXTEND ||
13385        N1.getOpcode() == ISD::FP_ROUND)) {
13386     // Do not optimize out type conversion of f128 type yet.
13387     // For some targets like x86_64, configuration is changed to keep one f128
13388     // value in one SSE register, but instruction selection cannot handle
13389     // FCOPYSIGN on SSE registers yet.
13390     EVT N1VT = N1->getValueType(0);
13391     EVT N1Op0VT = N1->getOperand(0).getValueType();
13392     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
13393   }
13394   return false;
13395 }
13396 
13397 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
13398   SDValue N0 = N->getOperand(0);
13399   SDValue N1 = N->getOperand(1);
13400   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
13401   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
13402   EVT VT = N->getValueType(0);
13403 
13404   if (N0CFP && N1CFP) // Constant fold
13405     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
13406 
13407   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
13408     const APFloat &V = N1C->getValueAPF();
13409     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
13410     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
13411     if (!V.isNegative()) {
13412       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
13413         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13414     } else {
13415       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13416         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
13417                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
13418     }
13419   }
13420 
13421   // copysign(fabs(x), y) -> copysign(x, y)
13422   // copysign(fneg(x), y) -> copysign(x, y)
13423   // copysign(copysign(x,z), y) -> copysign(x, y)
13424   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
13425       N0.getOpcode() == ISD::FCOPYSIGN)
13426     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
13427 
13428   // copysign(x, abs(y)) -> abs(x)
13429   if (N1.getOpcode() == ISD::FABS)
13430     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13431 
13432   // copysign(x, copysign(y,z)) -> copysign(x, z)
13433   if (N1.getOpcode() == ISD::FCOPYSIGN)
13434     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
13435 
13436   // copysign(x, fp_extend(y)) -> copysign(x, y)
13437   // copysign(x, fp_round(y)) -> copysign(x, y)
13438   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
13439     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
13440 
13441   return SDValue();
13442 }
13443 
13444 SDValue DAGCombiner::visitFPOW(SDNode *N) {
13445   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
13446   if (!ExponentC)
13447     return SDValue();
13448 
13449   // Try to convert x ** (1/3) into cube root.
13450   // TODO: Handle the various flavors of long double.
13451   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
13452   //       Some range near 1/3 should be fine.
13453   EVT VT = N->getValueType(0);
13454   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
13455       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
13456     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
13457     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
13458     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
13459     // For regular numbers, rounding may cause the results to differ.
13460     // Therefore, we require { nsz ninf nnan afn } for this transform.
13461     // TODO: We could select out the special cases if we don't have nsz/ninf.
13462     SDNodeFlags Flags = N->getFlags();
13463     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
13464         !Flags.hasApproximateFuncs())
13465       return SDValue();
13466 
13467     // Do not create a cbrt() libcall if the target does not have it, and do not
13468     // turn a pow that has lowering support into a cbrt() libcall.
13469     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
13470         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
13471          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
13472       return SDValue();
13473 
13474     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
13475   }
13476 
13477   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
13478   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
13479   // TODO: This could be extended (using a target hook) to handle smaller
13480   // power-of-2 fractional exponents.
13481   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
13482   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
13483   if (ExponentIs025 || ExponentIs075) {
13484     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
13485     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
13486     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
13487     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
13488     // For regular numbers, rounding may cause the results to differ.
13489     // Therefore, we require { nsz ninf afn } for this transform.
13490     // TODO: We could select out the special cases if we don't have nsz/ninf.
13491     SDNodeFlags Flags = N->getFlags();
13492 
13493     // We only need no signed zeros for the 0.25 case.
13494     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
13495         !Flags.hasApproximateFuncs())
13496       return SDValue();
13497 
13498     // Don't double the number of libcalls. We are trying to inline fast code.
13499     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
13500       return SDValue();
13501 
13502     // Assume that libcalls are the smallest code.
13503     // TODO: This restriction should probably be lifted for vectors.
13504     if (ForCodeSize)
13505       return SDValue();
13506 
13507     // pow(X, 0.25) --> sqrt(sqrt(X))
13508     SDLoc DL(N);
13509     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
13510     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
13511     if (ExponentIs025)
13512       return SqrtSqrt;
13513     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
13514     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
13515   }
13516 
13517   return SDValue();
13518 }
13519 
13520 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
13521                                const TargetLowering &TLI) {
13522   // This optimization is guarded by a function attribute because it may produce
13523   // unexpected results. Ie, programs may be relying on the platform-specific
13524   // undefined behavior when the float-to-int conversion overflows.
13525   const Function &F = DAG.getMachineFunction().getFunction();
13526   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
13527   if (StrictOverflow.getValueAsString().equals("false"))
13528     return SDValue();
13529 
13530   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
13531   // replacing casts with a libcall. We also must be allowed to ignore -0.0
13532   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
13533   // conversions would return +0.0.
13534   // FIXME: We should be able to use node-level FMF here.
13535   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
13536   EVT VT = N->getValueType(0);
13537   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
13538       !DAG.getTarget().Options.NoSignedZerosFPMath)
13539     return SDValue();
13540 
13541   // fptosi/fptoui round towards zero, so converting from FP to integer and
13542   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
13543   SDValue N0 = N->getOperand(0);
13544   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
13545       N0.getOperand(0).getValueType() == VT)
13546     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13547 
13548   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
13549       N0.getOperand(0).getValueType() == VT)
13550     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13551 
13552   return SDValue();
13553 }
13554 
13555 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
13556   SDValue N0 = N->getOperand(0);
13557   EVT VT = N->getValueType(0);
13558   EVT OpVT = N0.getValueType();
13559 
13560   // [us]itofp(undef) = 0, because the result value is bounded.
13561   if (N0.isUndef())
13562     return DAG.getConstantFP(0.0, SDLoc(N), VT);
13563 
13564   // fold (sint_to_fp c1) -> c1fp
13565   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13566       // ...but only if the target supports immediate floating-point values
13567       (!LegalOperations ||
13568        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13569     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13570 
13571   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
13572   // but UINT_TO_FP is legal on this target, try to convert.
13573   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
13574       hasOperation(ISD::UINT_TO_FP, OpVT)) {
13575     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
13576     if (DAG.SignBitIsZero(N0))
13577       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13578   }
13579 
13580   // The next optimizations are desirable only if SELECT_CC can be lowered.
13581   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
13582   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
13583       !VT.isVector() &&
13584       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13585     SDLoc DL(N);
13586     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
13587                          DAG.getConstantFP(0.0, DL, VT));
13588   }
13589 
13590   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
13591   //      (select (setcc x, y, cc), 1.0, 0.0)
13592   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
13593       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
13594       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13595     SDLoc DL(N);
13596     return DAG.getSelect(DL, VT, N0.getOperand(0),
13597                          DAG.getConstantFP(1.0, DL, VT),
13598                          DAG.getConstantFP(0.0, DL, VT));
13599   }
13600 
13601   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13602     return FTrunc;
13603 
13604   return SDValue();
13605 }
13606 
13607 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
13608   SDValue N0 = N->getOperand(0);
13609   EVT VT = N->getValueType(0);
13610   EVT OpVT = N0.getValueType();
13611 
13612   // [us]itofp(undef) = 0, because the result value is bounded.
13613   if (N0.isUndef())
13614     return DAG.getConstantFP(0.0, SDLoc(N), VT);
13615 
13616   // fold (uint_to_fp c1) -> c1fp
13617   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13618       // ...but only if the target supports immediate floating-point values
13619       (!LegalOperations ||
13620        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13621     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13622 
13623   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
13624   // but SINT_TO_FP is legal on this target, try to convert.
13625   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
13626       hasOperation(ISD::SINT_TO_FP, OpVT)) {
13627     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
13628     if (DAG.SignBitIsZero(N0))
13629       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13630   }
13631 
13632   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
13633   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
13634       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13635     SDLoc DL(N);
13636     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
13637                          DAG.getConstantFP(0.0, DL, VT));
13638   }
13639 
13640   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13641     return FTrunc;
13642 
13643   return SDValue();
13644 }
13645 
13646 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
13647 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
13648   SDValue N0 = N->getOperand(0);
13649   EVT VT = N->getValueType(0);
13650 
13651   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
13652     return SDValue();
13653 
13654   SDValue Src = N0.getOperand(0);
13655   EVT SrcVT = Src.getValueType();
13656   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
13657   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
13658 
13659   // We can safely assume the conversion won't overflow the output range,
13660   // because (for example) (uint8_t)18293.f is undefined behavior.
13661 
13662   // Since we can assume the conversion won't overflow, our decision as to
13663   // whether the input will fit in the float should depend on the minimum
13664   // of the input range and output range.
13665 
13666   // This means this is also safe for a signed input and unsigned output, since
13667   // a negative input would lead to undefined behavior.
13668   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
13669   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
13670   unsigned ActualSize = std::min(InputSize, OutputSize);
13671   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
13672 
13673   // We can only fold away the float conversion if the input range can be
13674   // represented exactly in the float range.
13675   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
13676     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
13677       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
13678                                                        : ISD::ZERO_EXTEND;
13679       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
13680     }
13681     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
13682       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
13683     return DAG.getBitcast(VT, Src);
13684   }
13685   return SDValue();
13686 }
13687 
13688 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
13689   SDValue N0 = N->getOperand(0);
13690   EVT VT = N->getValueType(0);
13691 
13692   // fold (fp_to_sint undef) -> undef
13693   if (N0.isUndef())
13694     return DAG.getUNDEF(VT);
13695 
13696   // fold (fp_to_sint c1fp) -> c1
13697   if (isConstantFPBuildVectorOrConstantFP(N0))
13698     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
13699 
13700   return FoldIntToFPToInt(N, DAG);
13701 }
13702 
13703 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
13704   SDValue N0 = N->getOperand(0);
13705   EVT VT = N->getValueType(0);
13706 
13707   // fold (fp_to_uint undef) -> undef
13708   if (N0.isUndef())
13709     return DAG.getUNDEF(VT);
13710 
13711   // fold (fp_to_uint c1fp) -> c1
13712   if (isConstantFPBuildVectorOrConstantFP(N0))
13713     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
13714 
13715   return FoldIntToFPToInt(N, DAG);
13716 }
13717 
13718 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
13719   SDValue N0 = N->getOperand(0);
13720   SDValue N1 = N->getOperand(1);
13721   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13722   EVT VT = N->getValueType(0);
13723 
13724   // fold (fp_round c1fp) -> c1fp
13725   if (N0CFP)
13726     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
13727 
13728   // fold (fp_round (fp_extend x)) -> x
13729   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
13730     return N0.getOperand(0);
13731 
13732   // fold (fp_round (fp_round x)) -> (fp_round x)
13733   if (N0.getOpcode() == ISD::FP_ROUND) {
13734     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
13735     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
13736 
13737     // Skip this folding if it results in an fp_round from f80 to f16.
13738     //
13739     // f80 to f16 always generates an expensive (and as yet, unimplemented)
13740     // libcall to __truncxfhf2 instead of selecting native f16 conversion
13741     // instructions from f32 or f64.  Moreover, the first (value-preserving)
13742     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
13743     // x86.
13744     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
13745       return SDValue();
13746 
13747     // If the first fp_round isn't a value preserving truncation, it might
13748     // introduce a tie in the second fp_round, that wouldn't occur in the
13749     // single-step fp_round we want to fold to.
13750     // In other words, double rounding isn't the same as rounding.
13751     // Also, this is a value preserving truncation iff both fp_round's are.
13752     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
13753       SDLoc DL(N);
13754       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13755                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
13756     }
13757   }
13758 
13759   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13760   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
13761     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13762                               N0.getOperand(0), N1);
13763     AddToWorklist(Tmp.getNode());
13764     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13765                        Tmp, N0.getOperand(1));
13766   }
13767 
13768   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13769     return NewVSel;
13770 
13771   return SDValue();
13772 }
13773 
13774 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13775   SDValue N0 = N->getOperand(0);
13776   EVT VT = N->getValueType(0);
13777 
13778   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13779   if (N->hasOneUse() &&
13780       N->use_begin()->getOpcode() == ISD::FP_ROUND)
13781     return SDValue();
13782 
13783   // fold (fp_extend c1fp) -> c1fp
13784   if (isConstantFPBuildVectorOrConstantFP(N0))
13785     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13786 
13787   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13788   if (N0.getOpcode() == ISD::FP16_TO_FP &&
13789       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
13790     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
13791 
13792   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
13793   // value of X.
13794   if (N0.getOpcode() == ISD::FP_ROUND
13795       && N0.getConstantOperandVal(1) == 1) {
13796     SDValue In = N0.getOperand(0);
13797     if (In.getValueType() == VT) return In;
13798     if (VT.bitsLT(In.getValueType()))
13799       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
13800                          In, N0.getOperand(1));
13801     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
13802   }
13803 
13804   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
13805   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13806        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13807     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13808     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13809                                      LN0->getChain(),
13810                                      LN0->getBasePtr(), N0.getValueType(),
13811                                      LN0->getMemOperand());
13812     CombineTo(N, ExtLoad);
13813     CombineTo(N0.getNode(),
13814               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
13815                           N0.getValueType(), ExtLoad,
13816                           DAG.getIntPtrConstant(1, SDLoc(N0))),
13817               ExtLoad.getValue(1));
13818     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13819   }
13820 
13821   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13822     return NewVSel;
13823 
13824   return SDValue();
13825 }
13826 
13827 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
13828   SDValue N0 = N->getOperand(0);
13829   EVT VT = N->getValueType(0);
13830 
13831   // fold (fceil c1) -> fceil(c1)
13832   if (isConstantFPBuildVectorOrConstantFP(N0))
13833     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
13834 
13835   return SDValue();
13836 }
13837 
13838 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
13839   SDValue N0 = N->getOperand(0);
13840   EVT VT = N->getValueType(0);
13841 
13842   // fold (ftrunc c1) -> ftrunc(c1)
13843   if (isConstantFPBuildVectorOrConstantFP(N0))
13844     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
13845 
13846   // fold ftrunc (known rounded int x) -> x
13847   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
13848   // likely to be generated to extract integer from a rounded floating value.
13849   switch (N0.getOpcode()) {
13850   default: break;
13851   case ISD::FRINT:
13852   case ISD::FTRUNC:
13853   case ISD::FNEARBYINT:
13854   case ISD::FFLOOR:
13855   case ISD::FCEIL:
13856     return N0;
13857   }
13858 
13859   return SDValue();
13860 }
13861 
13862 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13863   SDValue N0 = N->getOperand(0);
13864   EVT VT = N->getValueType(0);
13865 
13866   // fold (ffloor c1) -> ffloor(c1)
13867   if (isConstantFPBuildVectorOrConstantFP(N0))
13868     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13869 
13870   return SDValue();
13871 }
13872 
13873 // FIXME: FNEG and FABS have a lot in common; refactor.
13874 SDValue DAGCombiner::visitFNEG(SDNode *N) {
13875   SDValue N0 = N->getOperand(0);
13876   EVT VT = N->getValueType(0);
13877 
13878   // Constant fold FNEG.
13879   if (isConstantFPBuildVectorOrConstantFP(N0))
13880     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13881 
13882   if (SDValue NegN0 =
13883           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
13884     return NegN0;
13885 
13886   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
13887   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
13888   // know it was called from a context with a nsz flag if the input fsub does
13889   // not.
13890   if (N0.getOpcode() == ISD::FSUB &&
13891       (DAG.getTarget().Options.NoSignedZerosFPMath ||
13892        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
13893     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
13894                        N0.getOperand(0), N->getFlags());
13895   }
13896 
13897   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13898   // constant pool values.
13899   if (!TLI.isFNegFree(VT) &&
13900       N0.getOpcode() == ISD::BITCAST &&
13901       N0.getNode()->hasOneUse()) {
13902     SDValue Int = N0.getOperand(0);
13903     EVT IntVT = Int.getValueType();
13904     if (IntVT.isInteger() && !IntVT.isVector()) {
13905       APInt SignMask;
13906       if (N0.getValueType().isVector()) {
13907         // For a vector, get a mask such as 0x80... per scalar element
13908         // and splat it.
13909         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
13910         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13911       } else {
13912         // For a scalar, just generate 0x80...
13913         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13914       }
13915       SDLoc DL0(N0);
13916       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13917                         DAG.getConstant(SignMask, DL0, IntVT));
13918       AddToWorklist(Int.getNode());
13919       return DAG.getBitcast(VT, Int);
13920     }
13921   }
13922 
13923   // (fneg (fmul c, x)) -> (fmul -c, x)
13924   if (N0.getOpcode() == ISD::FMUL &&
13925       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
13926     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13927     if (CFP1) {
13928       APFloat CVal = CFP1->getValueAPF();
13929       CVal.changeSign();
13930       if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
13931                        TLI.isOperationLegal(ISD::ConstantFP, VT)))
13932         return DAG.getNode(
13933             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13934             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13935             N0->getFlags());
13936     }
13937   }
13938 
13939   return SDValue();
13940 }
13941 
13942 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
13943                             APFloat (*Op)(const APFloat &, const APFloat &)) {
13944   SDValue N0 = N->getOperand(0);
13945   SDValue N1 = N->getOperand(1);
13946   EVT VT = N->getValueType(0);
13947   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13948   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13949 
13950   if (N0CFP && N1CFP) {
13951     const APFloat &C0 = N0CFP->getValueAPF();
13952     const APFloat &C1 = N1CFP->getValueAPF();
13953     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13954   }
13955 
13956   // Canonicalize to constant on RHS.
13957   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13958       !isConstantFPBuildVectorOrConstantFP(N1))
13959     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13960 
13961   return SDValue();
13962 }
13963 
13964 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13965   return visitFMinMax(DAG, N, minnum);
13966 }
13967 
13968 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13969   return visitFMinMax(DAG, N, maxnum);
13970 }
13971 
13972 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13973   return visitFMinMax(DAG, N, minimum);
13974 }
13975 
13976 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13977   return visitFMinMax(DAG, N, maximum);
13978 }
13979 
13980 SDValue DAGCombiner::visitFABS(SDNode *N) {
13981   SDValue N0 = N->getOperand(0);
13982   EVT VT = N->getValueType(0);
13983 
13984   // fold (fabs c1) -> fabs(c1)
13985   if (isConstantFPBuildVectorOrConstantFP(N0))
13986     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13987 
13988   // fold (fabs (fabs x)) -> (fabs x)
13989   if (N0.getOpcode() == ISD::FABS)
13990     return N->getOperand(0);
13991 
13992   // fold (fabs (fneg x)) -> (fabs x)
13993   // fold (fabs (fcopysign x, y)) -> (fabs x)
13994   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13995     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13996 
13997   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13998   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
13999     SDValue Int = N0.getOperand(0);
14000     EVT IntVT = Int.getValueType();
14001     if (IntVT.isInteger() && !IntVT.isVector()) {
14002       APInt SignMask;
14003       if (N0.getValueType().isVector()) {
14004         // For a vector, get a mask such as 0x7f... per scalar element
14005         // and splat it.
14006         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
14007         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
14008       } else {
14009         // For a scalar, just generate 0x7f...
14010         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
14011       }
14012       SDLoc DL(N0);
14013       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
14014                         DAG.getConstant(SignMask, DL, IntVT));
14015       AddToWorklist(Int.getNode());
14016       return DAG.getBitcast(N->getValueType(0), Int);
14017     }
14018   }
14019 
14020   return SDValue();
14021 }
14022 
14023 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
14024   SDValue Chain = N->getOperand(0);
14025   SDValue N1 = N->getOperand(1);
14026   SDValue N2 = N->getOperand(2);
14027 
14028   // If N is a constant we could fold this into a fallthrough or unconditional
14029   // branch. However that doesn't happen very often in normal code, because
14030   // Instcombine/SimplifyCFG should have handled the available opportunities.
14031   // If we did this folding here, it would be necessary to update the
14032   // MachineBasicBlock CFG, which is awkward.
14033 
14034   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
14035   // on the target.
14036   if (N1.getOpcode() == ISD::SETCC &&
14037       TLI.isOperationLegalOrCustom(ISD::BR_CC,
14038                                    N1.getOperand(0).getValueType())) {
14039     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14040                        Chain, N1.getOperand(2),
14041                        N1.getOperand(0), N1.getOperand(1), N2);
14042   }
14043 
14044   if (N1.hasOneUse()) {
14045     // rebuildSetCC calls visitXor which may change the Chain when there is a
14046     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
14047     HandleSDNode ChainHandle(Chain);
14048     if (SDValue NewN1 = rebuildSetCC(N1))
14049       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
14050                          ChainHandle.getValue(), NewN1, N2);
14051   }
14052 
14053   return SDValue();
14054 }
14055 
14056 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
14057   if (N.getOpcode() == ISD::SRL ||
14058       (N.getOpcode() == ISD::TRUNCATE &&
14059        (N.getOperand(0).hasOneUse() &&
14060         N.getOperand(0).getOpcode() == ISD::SRL))) {
14061     // Look pass the truncate.
14062     if (N.getOpcode() == ISD::TRUNCATE)
14063       N = N.getOperand(0);
14064 
14065     // Match this pattern so that we can generate simpler code:
14066     //
14067     //   %a = ...
14068     //   %b = and i32 %a, 2
14069     //   %c = srl i32 %b, 1
14070     //   brcond i32 %c ...
14071     //
14072     // into
14073     //
14074     //   %a = ...
14075     //   %b = and i32 %a, 2
14076     //   %c = setcc eq %b, 0
14077     //   brcond %c ...
14078     //
14079     // This applies only when the AND constant value has one bit set and the
14080     // SRL constant is equal to the log2 of the AND constant. The back-end is
14081     // smart enough to convert the result into a TEST/JMP sequence.
14082     SDValue Op0 = N.getOperand(0);
14083     SDValue Op1 = N.getOperand(1);
14084 
14085     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
14086       SDValue AndOp1 = Op0.getOperand(1);
14087 
14088       if (AndOp1.getOpcode() == ISD::Constant) {
14089         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
14090 
14091         if (AndConst.isPowerOf2() &&
14092             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
14093           SDLoc DL(N);
14094           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
14095                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
14096                               ISD::SETNE);
14097         }
14098       }
14099     }
14100   }
14101 
14102   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
14103   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
14104   if (N.getOpcode() == ISD::XOR) {
14105     // Because we may call this on a speculatively constructed
14106     // SimplifiedSetCC Node, we need to simplify this node first.
14107     // Ideally this should be folded into SimplifySetCC and not
14108     // here. For now, grab a handle to N so we don't lose it from
14109     // replacements interal to the visit.
14110     HandleSDNode XORHandle(N);
14111     while (N.getOpcode() == ISD::XOR) {
14112       SDValue Tmp = visitXOR(N.getNode());
14113       // No simplification done.
14114       if (!Tmp.getNode())
14115         break;
14116       // Returning N is form in-visit replacement that may invalidated
14117       // N. Grab value from Handle.
14118       if (Tmp.getNode() == N.getNode())
14119         N = XORHandle.getValue();
14120       else // Node simplified. Try simplifying again.
14121         N = Tmp;
14122     }
14123 
14124     if (N.getOpcode() != ISD::XOR)
14125       return N;
14126 
14127     SDValue Op0 = N->getOperand(0);
14128     SDValue Op1 = N->getOperand(1);
14129 
14130     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
14131       bool Equal = false;
14132       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
14133       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
14134           Op0.getValueType() == MVT::i1) {
14135         N = Op0;
14136         Op0 = N->getOperand(0);
14137         Op1 = N->getOperand(1);
14138         Equal = true;
14139       }
14140 
14141       EVT SetCCVT = N.getValueType();
14142       if (LegalTypes)
14143         SetCCVT = getSetCCResultType(SetCCVT);
14144       // Replace the uses of XOR with SETCC
14145       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
14146                           Equal ? ISD::SETEQ : ISD::SETNE);
14147     }
14148   }
14149 
14150   return SDValue();
14151 }
14152 
14153 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
14154 //
14155 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
14156   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
14157   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
14158 
14159   // If N is a constant we could fold this into a fallthrough or unconditional
14160   // branch. However that doesn't happen very often in normal code, because
14161   // Instcombine/SimplifyCFG should have handled the available opportunities.
14162   // If we did this folding here, it would be necessary to update the
14163   // MachineBasicBlock CFG, which is awkward.
14164 
14165   // Use SimplifySetCC to simplify SETCC's.
14166   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
14167                                CondLHS, CondRHS, CC->get(), SDLoc(N),
14168                                false);
14169   if (Simp.getNode()) AddToWorklist(Simp.getNode());
14170 
14171   // fold to a simpler setcc
14172   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
14173     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14174                        N->getOperand(0), Simp.getOperand(2),
14175                        Simp.getOperand(0), Simp.getOperand(1),
14176                        N->getOperand(4));
14177 
14178   return SDValue();
14179 }
14180 
14181 /// Return true if 'Use' is a load or a store that uses N as its base pointer
14182 /// and that N may be folded in the load / store addressing mode.
14183 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
14184                                     SelectionDAG &DAG,
14185                                     const TargetLowering &TLI) {
14186   EVT VT;
14187   unsigned AS;
14188 
14189   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
14190     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
14191       return false;
14192     VT = LD->getMemoryVT();
14193     AS = LD->getAddressSpace();
14194   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
14195     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
14196       return false;
14197     VT = ST->getMemoryVT();
14198     AS = ST->getAddressSpace();
14199   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
14200     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
14201       return false;
14202     VT = LD->getMemoryVT();
14203     AS = LD->getAddressSpace();
14204   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
14205     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
14206       return false;
14207     VT = ST->getMemoryVT();
14208     AS = ST->getAddressSpace();
14209   } else
14210     return false;
14211 
14212   TargetLowering::AddrMode AM;
14213   if (N->getOpcode() == ISD::ADD) {
14214     AM.HasBaseReg = true;
14215     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
14216     if (Offset)
14217       // [reg +/- imm]
14218       AM.BaseOffs = Offset->getSExtValue();
14219     else
14220       // [reg +/- reg]
14221       AM.Scale = 1;
14222   } else if (N->getOpcode() == ISD::SUB) {
14223     AM.HasBaseReg = true;
14224     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
14225     if (Offset)
14226       // [reg +/- imm]
14227       AM.BaseOffs = -Offset->getSExtValue();
14228     else
14229       // [reg +/- reg]
14230       AM.Scale = 1;
14231   } else
14232     return false;
14233 
14234   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
14235                                    VT.getTypeForEVT(*DAG.getContext()), AS);
14236 }
14237 
14238 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
14239                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
14240                                      const TargetLowering &TLI) {
14241   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
14242     if (LD->isIndexed())
14243       return false;
14244     EVT VT = LD->getMemoryVT();
14245     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
14246       return false;
14247     Ptr = LD->getBasePtr();
14248   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
14249     if (ST->isIndexed())
14250       return false;
14251     EVT VT = ST->getMemoryVT();
14252     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
14253       return false;
14254     Ptr = ST->getBasePtr();
14255     IsLoad = false;
14256   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
14257     if (LD->isIndexed())
14258       return false;
14259     EVT VT = LD->getMemoryVT();
14260     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
14261         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
14262       return false;
14263     Ptr = LD->getBasePtr();
14264     IsMasked = true;
14265   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
14266     if (ST->isIndexed())
14267       return false;
14268     EVT VT = ST->getMemoryVT();
14269     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
14270         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
14271       return false;
14272     Ptr = ST->getBasePtr();
14273     IsLoad = false;
14274     IsMasked = true;
14275   } else {
14276     return false;
14277   }
14278   return true;
14279 }
14280 
14281 /// Try turning a load/store into a pre-indexed load/store when the base
14282 /// pointer is an add or subtract and it has other uses besides the load/store.
14283 /// After the transformation, the new indexed load/store has effectively folded
14284 /// the add/subtract in and all of its other uses are redirected to the
14285 /// new load/store.
14286 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
14287   if (Level < AfterLegalizeDAG)
14288     return false;
14289 
14290   bool IsLoad = true;
14291   bool IsMasked = false;
14292   SDValue Ptr;
14293   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
14294                                 Ptr, TLI))
14295     return false;
14296 
14297   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
14298   // out.  There is no reason to make this a preinc/predec.
14299   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
14300       Ptr.getNode()->hasOneUse())
14301     return false;
14302 
14303   // Ask the target to do addressing mode selection.
14304   SDValue BasePtr;
14305   SDValue Offset;
14306   ISD::MemIndexedMode AM = ISD::UNINDEXED;
14307   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
14308     return false;
14309 
14310   // Backends without true r+i pre-indexed forms may need to pass a
14311   // constant base with a variable offset so that constant coercion
14312   // will work with the patterns in canonical form.
14313   bool Swapped = false;
14314   if (isa<ConstantSDNode>(BasePtr)) {
14315     std::swap(BasePtr, Offset);
14316     Swapped = true;
14317   }
14318 
14319   // Don't create a indexed load / store with zero offset.
14320   if (isNullConstant(Offset))
14321     return false;
14322 
14323   // Try turning it into a pre-indexed load / store except when:
14324   // 1) The new base ptr is a frame index.
14325   // 2) If N is a store and the new base ptr is either the same as or is a
14326   //    predecessor of the value being stored.
14327   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
14328   //    that would create a cycle.
14329   // 4) All uses are load / store ops that use it as old base ptr.
14330 
14331   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
14332   // (plus the implicit offset) to a register to preinc anyway.
14333   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14334     return false;
14335 
14336   // Check #2.
14337   if (!IsLoad) {
14338     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
14339                            : cast<StoreSDNode>(N)->getValue();
14340 
14341     // Would require a copy.
14342     if (Val == BasePtr)
14343       return false;
14344 
14345     // Would create a cycle.
14346     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
14347       return false;
14348   }
14349 
14350   // Caches for hasPredecessorHelper.
14351   SmallPtrSet<const SDNode *, 32> Visited;
14352   SmallVector<const SDNode *, 16> Worklist;
14353   Worklist.push_back(N);
14354 
14355   // If the offset is a constant, there may be other adds of constants that
14356   // can be folded with this one. We should do this to avoid having to keep
14357   // a copy of the original base pointer.
14358   SmallVector<SDNode *, 16> OtherUses;
14359   if (isa<ConstantSDNode>(Offset))
14360     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
14361                               UE = BasePtr.getNode()->use_end();
14362          UI != UE; ++UI) {
14363       SDUse &Use = UI.getUse();
14364       // Skip the use that is Ptr and uses of other results from BasePtr's
14365       // node (important for nodes that return multiple results).
14366       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
14367         continue;
14368 
14369       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
14370         continue;
14371 
14372       if (Use.getUser()->getOpcode() != ISD::ADD &&
14373           Use.getUser()->getOpcode() != ISD::SUB) {
14374         OtherUses.clear();
14375         break;
14376       }
14377 
14378       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
14379       if (!isa<ConstantSDNode>(Op1)) {
14380         OtherUses.clear();
14381         break;
14382       }
14383 
14384       // FIXME: In some cases, we can be smarter about this.
14385       if (Op1.getValueType() != Offset.getValueType()) {
14386         OtherUses.clear();
14387         break;
14388       }
14389 
14390       OtherUses.push_back(Use.getUser());
14391     }
14392 
14393   if (Swapped)
14394     std::swap(BasePtr, Offset);
14395 
14396   // Now check for #3 and #4.
14397   bool RealUse = false;
14398 
14399   for (SDNode *Use : Ptr.getNode()->uses()) {
14400     if (Use == N)
14401       continue;
14402     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
14403       return false;
14404 
14405     // If Ptr may be folded in addressing mode of other use, then it's
14406     // not profitable to do this transformation.
14407     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
14408       RealUse = true;
14409   }
14410 
14411   if (!RealUse)
14412     return false;
14413 
14414   SDValue Result;
14415   if (!IsMasked) {
14416     if (IsLoad)
14417       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14418     else
14419       Result =
14420           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14421   } else {
14422     if (IsLoad)
14423       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14424                                         Offset, AM);
14425     else
14426       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
14427                                          Offset, AM);
14428   }
14429   ++PreIndexedNodes;
14430   ++NodesCombined;
14431   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
14432              Result.getNode()->dump(&DAG); dbgs() << '\n');
14433   WorklistRemover DeadNodes(*this);
14434   if (IsLoad) {
14435     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14436     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14437   } else {
14438     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14439   }
14440 
14441   // Finally, since the node is now dead, remove it from the graph.
14442   deleteAndRecombine(N);
14443 
14444   if (Swapped)
14445     std::swap(BasePtr, Offset);
14446 
14447   // Replace other uses of BasePtr that can be updated to use Ptr
14448   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
14449     unsigned OffsetIdx = 1;
14450     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
14451       OffsetIdx = 0;
14452     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
14453            BasePtr.getNode() && "Expected BasePtr operand");
14454 
14455     // We need to replace ptr0 in the following expression:
14456     //   x0 * offset0 + y0 * ptr0 = t0
14457     // knowing that
14458     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
14459     //
14460     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
14461     // indexed load/store and the expression that needs to be re-written.
14462     //
14463     // Therefore, we have:
14464     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
14465 
14466     ConstantSDNode *CN =
14467       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
14468     int X0, X1, Y0, Y1;
14469     const APInt &Offset0 = CN->getAPIntValue();
14470     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
14471 
14472     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
14473     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
14474     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
14475     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
14476 
14477     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
14478 
14479     APInt CNV = Offset0;
14480     if (X0 < 0) CNV = -CNV;
14481     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
14482     else CNV = CNV - Offset1;
14483 
14484     SDLoc DL(OtherUses[i]);
14485 
14486     // We can now generate the new expression.
14487     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
14488     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
14489 
14490     SDValue NewUse = DAG.getNode(Opcode,
14491                                  DL,
14492                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
14493     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
14494     deleteAndRecombine(OtherUses[i]);
14495   }
14496 
14497   // Replace the uses of Ptr with uses of the updated base value.
14498   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
14499   deleteAndRecombine(Ptr.getNode());
14500   AddToWorklist(Result.getNode());
14501 
14502   return true;
14503 }
14504 
14505 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
14506                                    SDValue &BasePtr, SDValue &Offset,
14507                                    ISD::MemIndexedMode &AM,
14508                                    SelectionDAG &DAG,
14509                                    const TargetLowering &TLI) {
14510   if (PtrUse == N ||
14511       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
14512     return false;
14513 
14514   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
14515     return false;
14516 
14517   // Don't create a indexed load / store with zero offset.
14518   if (isNullConstant(Offset))
14519     return false;
14520 
14521   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14522     return false;
14523 
14524   SmallPtrSet<const SDNode *, 32> Visited;
14525   for (SDNode *Use : BasePtr.getNode()->uses()) {
14526     if (Use == Ptr.getNode())
14527       continue;
14528 
14529     // No if there's a later user which could perform the index instead.
14530     if (isa<MemSDNode>(Use)) {
14531       bool IsLoad = true;
14532       bool IsMasked = false;
14533       SDValue OtherPtr;
14534       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
14535                                    IsMasked, OtherPtr, TLI)) {
14536         SmallVector<const SDNode *, 2> Worklist;
14537         Worklist.push_back(Use);
14538         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
14539           return false;
14540       }
14541     }
14542 
14543     // If all the uses are load / store addresses, then don't do the
14544     // transformation.
14545     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
14546       for (SDNode *UseUse : Use->uses())
14547         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
14548           return false;
14549     }
14550   }
14551   return true;
14552 }
14553 
14554 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
14555                                          bool &IsMasked, SDValue &Ptr,
14556                                          SDValue &BasePtr, SDValue &Offset,
14557                                          ISD::MemIndexedMode &AM,
14558                                          SelectionDAG &DAG,
14559                                          const TargetLowering &TLI) {
14560   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
14561                                 IsMasked, Ptr, TLI) ||
14562       Ptr.getNode()->hasOneUse())
14563     return nullptr;
14564 
14565   // Try turning it into a post-indexed load / store except when
14566   // 1) All uses are load / store ops that use it as base ptr (and
14567   //    it may be folded as addressing mmode).
14568   // 2) Op must be independent of N, i.e. Op is neither a predecessor
14569   //    nor a successor of N. Otherwise, if Op is folded that would
14570   //    create a cycle.
14571   for (SDNode *Op : Ptr->uses()) {
14572     // Check for #1.
14573     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
14574       continue;
14575 
14576     // Check for #2.
14577     SmallPtrSet<const SDNode *, 32> Visited;
14578     SmallVector<const SDNode *, 8> Worklist;
14579     // Ptr is predecessor to both N and Op.
14580     Visited.insert(Ptr.getNode());
14581     Worklist.push_back(N);
14582     Worklist.push_back(Op);
14583     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
14584         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
14585       return Op;
14586   }
14587   return nullptr;
14588 }
14589 
14590 /// Try to combine a load/store with a add/sub of the base pointer node into a
14591 /// post-indexed load/store. The transformation folded the add/subtract into the
14592 /// new indexed load/store effectively and all of its uses are redirected to the
14593 /// new load/store.
14594 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
14595   if (Level < AfterLegalizeDAG)
14596     return false;
14597 
14598   bool IsLoad = true;
14599   bool IsMasked = false;
14600   SDValue Ptr;
14601   SDValue BasePtr;
14602   SDValue Offset;
14603   ISD::MemIndexedMode AM = ISD::UNINDEXED;
14604   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
14605                                          Offset, AM, DAG, TLI);
14606   if (!Op)
14607     return false;
14608 
14609   SDValue Result;
14610   if (!IsMasked)
14611     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14612                                          Offset, AM)
14613                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
14614                                           BasePtr, Offset, AM);
14615   else
14616     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
14617                                                BasePtr, Offset, AM)
14618                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
14619                                                 BasePtr, Offset, AM);
14620   ++PostIndexedNodes;
14621   ++NodesCombined;
14622   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
14623              dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
14624              dbgs() << '\n');
14625   WorklistRemover DeadNodes(*this);
14626   if (IsLoad) {
14627     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14628     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14629   } else {
14630     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14631   }
14632 
14633   // Finally, since the node is now dead, remove it from the graph.
14634   deleteAndRecombine(N);
14635 
14636   // Replace the uses of Use with uses of the updated base value.
14637   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
14638                                 Result.getValue(IsLoad ? 1 : 0));
14639   deleteAndRecombine(Op);
14640   return true;
14641 }
14642 
14643 /// Return the base-pointer arithmetic from an indexed \p LD.
14644 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
14645   ISD::MemIndexedMode AM = LD->getAddressingMode();
14646   assert(AM != ISD::UNINDEXED);
14647   SDValue BP = LD->getOperand(1);
14648   SDValue Inc = LD->getOperand(2);
14649 
14650   // Some backends use TargetConstants for load offsets, but don't expect
14651   // TargetConstants in general ADD nodes. We can convert these constants into
14652   // regular Constants (if the constant is not opaque).
14653   assert((Inc.getOpcode() != ISD::TargetConstant ||
14654           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
14655          "Cannot split out indexing using opaque target constants");
14656   if (Inc.getOpcode() == ISD::TargetConstant) {
14657     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
14658     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
14659                           ConstInc->getValueType(0));
14660   }
14661 
14662   unsigned Opc =
14663       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
14664   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
14665 }
14666 
14667 static inline int numVectorEltsOrZero(EVT T) {
14668   return T.isVector() ? T.getVectorNumElements() : 0;
14669 }
14670 
14671 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
14672   Val = ST->getValue();
14673   EVT STType = Val.getValueType();
14674   EVT STMemType = ST->getMemoryVT();
14675   if (STType == STMemType)
14676     return true;
14677   if (isTypeLegal(STMemType))
14678     return false; // fail.
14679   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
14680       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
14681     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
14682     return true;
14683   }
14684   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
14685       STType.isInteger() && STMemType.isInteger()) {
14686     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
14687     return true;
14688   }
14689   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
14690     Val = DAG.getBitcast(STMemType, Val);
14691     return true;
14692   }
14693   return false; // fail.
14694 }
14695 
14696 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
14697   EVT LDMemType = LD->getMemoryVT();
14698   EVT LDType = LD->getValueType(0);
14699   assert(Val.getValueType() == LDMemType &&
14700          "Attempting to extend value of non-matching type");
14701   if (LDType == LDMemType)
14702     return true;
14703   if (LDMemType.isInteger() && LDType.isInteger()) {
14704     switch (LD->getExtensionType()) {
14705     case ISD::NON_EXTLOAD:
14706       Val = DAG.getBitcast(LDType, Val);
14707       return true;
14708     case ISD::EXTLOAD:
14709       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
14710       return true;
14711     case ISD::SEXTLOAD:
14712       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
14713       return true;
14714     case ISD::ZEXTLOAD:
14715       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
14716       return true;
14717     }
14718   }
14719   return false;
14720 }
14721 
14722 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
14723   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
14724     return SDValue();
14725   SDValue Chain = LD->getOperand(0);
14726   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
14727   // TODO: Relax this restriction for unordered atomics (see D66309)
14728   if (!ST || !ST->isSimple())
14729     return SDValue();
14730 
14731   EVT LDType = LD->getValueType(0);
14732   EVT LDMemType = LD->getMemoryVT();
14733   EVT STMemType = ST->getMemoryVT();
14734   EVT STType = ST->getValue().getValueType();
14735 
14736   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
14737   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
14738   int64_t Offset;
14739   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
14740     return SDValue();
14741 
14742   // Normalize for Endianness. After this Offset=0 will denote that the least
14743   // significant bit in the loaded value maps to the least significant bit in
14744   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
14745   // n:th least significant byte of the stored value.
14746   if (DAG.getDataLayout().isBigEndian())
14747     Offset = ((int64_t)STMemType.getStoreSizeInBits() -
14748               (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
14749 
14750   // Check that the stored value cover all bits that are loaded.
14751   bool STCoversLD =
14752       (Offset >= 0) &&
14753       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
14754 
14755   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
14756     if (LD->isIndexed()) {
14757       // Cannot handle opaque target constants and we must respect the user's
14758       // request not to split indexes from loads.
14759       if (!canSplitIdx(LD))
14760         return SDValue();
14761       SDValue Idx = SplitIndexingFromLoad(LD);
14762       SDValue Ops[] = {Val, Idx, Chain};
14763       return CombineTo(LD, Ops, 3);
14764     }
14765     return CombineTo(LD, Val, Chain);
14766   };
14767 
14768   if (!STCoversLD)
14769     return SDValue();
14770 
14771   // Memory as copy space (potentially masked).
14772   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
14773     // Simple case: Direct non-truncating forwarding
14774     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
14775       return ReplaceLd(LD, ST->getValue(), Chain);
14776     // Can we model the truncate and extension with an and mask?
14777     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
14778         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
14779       // Mask to size of LDMemType
14780       auto Mask =
14781           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
14782                                                STMemType.getSizeInBits()),
14783                           SDLoc(ST), STType);
14784       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
14785       return ReplaceLd(LD, Val, Chain);
14786     }
14787   }
14788 
14789   // TODO: Deal with nonzero offset.
14790   if (LD->getBasePtr().isUndef() || Offset != 0)
14791     return SDValue();
14792   // Model necessary truncations / extenstions.
14793   SDValue Val;
14794   // Truncate Value To Stored Memory Size.
14795   do {
14796     if (!getTruncatedStoreValue(ST, Val))
14797       continue;
14798     if (!isTypeLegal(LDMemType))
14799       continue;
14800     if (STMemType != LDMemType) {
14801       // TODO: Support vectors? This requires extract_subvector/bitcast.
14802       if (!STMemType.isVector() && !LDMemType.isVector() &&
14803           STMemType.isInteger() && LDMemType.isInteger())
14804         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
14805       else
14806         continue;
14807     }
14808     if (!extendLoadedValueToExtension(LD, Val))
14809       continue;
14810     return ReplaceLd(LD, Val, Chain);
14811   } while (false);
14812 
14813   // On failure, cleanup dead nodes we may have created.
14814   if (Val->use_empty())
14815     deleteAndRecombine(Val.getNode());
14816   return SDValue();
14817 }
14818 
14819 SDValue DAGCombiner::visitLOAD(SDNode *N) {
14820   LoadSDNode *LD  = cast<LoadSDNode>(N);
14821   SDValue Chain = LD->getChain();
14822   SDValue Ptr   = LD->getBasePtr();
14823 
14824   // If load is not volatile and there are no uses of the loaded value (and
14825   // the updated indexed value in case of indexed loads), change uses of the
14826   // chain value into uses of the chain input (i.e. delete the dead load).
14827   // TODO: Allow this for unordered atomics (see D66309)
14828   if (LD->isSimple()) {
14829     if (N->getValueType(1) == MVT::Other) {
14830       // Unindexed loads.
14831       if (!N->hasAnyUseOfValue(0)) {
14832         // It's not safe to use the two value CombineTo variant here. e.g.
14833         // v1, chain2 = load chain1, loc
14834         // v2, chain3 = load chain2, loc
14835         // v3         = add v2, c
14836         // Now we replace use of chain2 with chain1.  This makes the second load
14837         // isomorphic to the one we are deleting, and thus makes this load live.
14838         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
14839                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
14840                    dbgs() << "\n");
14841         WorklistRemover DeadNodes(*this);
14842         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14843         AddUsersToWorklist(Chain.getNode());
14844         if (N->use_empty())
14845           deleteAndRecombine(N);
14846 
14847         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14848       }
14849     } else {
14850       // Indexed loads.
14851       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
14852 
14853       // If this load has an opaque TargetConstant offset, then we cannot split
14854       // the indexing into an add/sub directly (that TargetConstant may not be
14855       // valid for a different type of node, and we cannot convert an opaque
14856       // target constant into a regular constant).
14857       bool CanSplitIdx = canSplitIdx(LD);
14858 
14859       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
14860         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
14861         SDValue Index;
14862         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
14863           Index = SplitIndexingFromLoad(LD);
14864           // Try to fold the base pointer arithmetic into subsequent loads and
14865           // stores.
14866           AddUsersToWorklist(N);
14867         } else
14868           Index = DAG.getUNDEF(N->getValueType(1));
14869         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
14870                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
14871                    dbgs() << " and 2 other values\n");
14872         WorklistRemover DeadNodes(*this);
14873         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
14874         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
14875         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
14876         deleteAndRecombine(N);
14877         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14878       }
14879     }
14880   }
14881 
14882   // If this load is directly stored, replace the load value with the stored
14883   // value.
14884   if (auto V = ForwardStoreValueToDirectLoad(LD))
14885     return V;
14886 
14887   // Try to infer better alignment information than the load already has.
14888   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
14889     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
14890       if (*Alignment > LD->getAlign() &&
14891           isAligned(*Alignment, LD->getSrcValueOffset())) {
14892         SDValue NewLoad = DAG.getExtLoad(
14893             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
14894             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
14895             LD->getMemOperand()->getFlags(), LD->getAAInfo());
14896         // NewLoad will always be N as we are only refining the alignment
14897         assert(NewLoad.getNode() == N);
14898         (void)NewLoad;
14899       }
14900     }
14901   }
14902 
14903   if (LD->isUnindexed()) {
14904     // Walk up chain skipping non-aliasing memory nodes.
14905     SDValue BetterChain = FindBetterChain(LD, Chain);
14906 
14907     // If there is a better chain.
14908     if (Chain != BetterChain) {
14909       SDValue ReplLoad;
14910 
14911       // Replace the chain to void dependency.
14912       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
14913         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
14914                                BetterChain, Ptr, LD->getMemOperand());
14915       } else {
14916         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
14917                                   LD->getValueType(0),
14918                                   BetterChain, Ptr, LD->getMemoryVT(),
14919                                   LD->getMemOperand());
14920       }
14921 
14922       // Create token factor to keep old chain connected.
14923       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
14924                                   MVT::Other, Chain, ReplLoad.getValue(1));
14925 
14926       // Replace uses with load result and token factor
14927       return CombineTo(N, ReplLoad.getValue(0), Token);
14928     }
14929   }
14930 
14931   // Try transforming N to an indexed load.
14932   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14933     return SDValue(N, 0);
14934 
14935   // Try to slice up N to more direct loads if the slices are mapped to
14936   // different register banks or pairing can take place.
14937   if (SliceUpLoad(N))
14938     return SDValue(N, 0);
14939 
14940   return SDValue();
14941 }
14942 
14943 namespace {
14944 
14945 /// Helper structure used to slice a load in smaller loads.
14946 /// Basically a slice is obtained from the following sequence:
14947 /// Origin = load Ty1, Base
14948 /// Shift = srl Ty1 Origin, CstTy Amount
14949 /// Inst = trunc Shift to Ty2
14950 ///
14951 /// Then, it will be rewritten into:
14952 /// Slice = load SliceTy, Base + SliceOffset
14953 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14954 ///
14955 /// SliceTy is deduced from the number of bits that are actually used to
14956 /// build Inst.
14957 struct LoadedSlice {
14958   /// Helper structure used to compute the cost of a slice.
14959   struct Cost {
14960     /// Are we optimizing for code size.
14961     bool ForCodeSize = false;
14962 
14963     /// Various cost.
14964     unsigned Loads = 0;
14965     unsigned Truncates = 0;
14966     unsigned CrossRegisterBanksCopies = 0;
14967     unsigned ZExts = 0;
14968     unsigned Shift = 0;
14969 
14970     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
14971 
14972     /// Get the cost of one isolated slice.
14973     Cost(const LoadedSlice &LS, bool ForCodeSize)
14974         : ForCodeSize(ForCodeSize), Loads(1) {
14975       EVT TruncType = LS.Inst->getValueType(0);
14976       EVT LoadedType = LS.getLoadedType();
14977       if (TruncType != LoadedType &&
14978           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
14979         ZExts = 1;
14980     }
14981 
14982     /// Account for slicing gain in the current cost.
14983     /// Slicing provide a few gains like removing a shift or a
14984     /// truncate. This method allows to grow the cost of the original
14985     /// load with the gain from this slice.
14986     void addSliceGain(const LoadedSlice &LS) {
14987       // Each slice saves a truncate.
14988       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14989       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14990                               LS.Inst->getValueType(0)))
14991         ++Truncates;
14992       // If there is a shift amount, this slice gets rid of it.
14993       if (LS.Shift)
14994         ++Shift;
14995       // If this slice can merge a cross register bank copy, account for it.
14996       if (LS.canMergeExpensiveCrossRegisterBankCopy())
14997         ++CrossRegisterBanksCopies;
14998     }
14999 
15000     Cost &operator+=(const Cost &RHS) {
15001       Loads += RHS.Loads;
15002       Truncates += RHS.Truncates;
15003       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
15004       ZExts += RHS.ZExts;
15005       Shift += RHS.Shift;
15006       return *this;
15007     }
15008 
15009     bool operator==(const Cost &RHS) const {
15010       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
15011              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
15012              ZExts == RHS.ZExts && Shift == RHS.Shift;
15013     }
15014 
15015     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
15016 
15017     bool operator<(const Cost &RHS) const {
15018       // Assume cross register banks copies are as expensive as loads.
15019       // FIXME: Do we want some more target hooks?
15020       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
15021       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
15022       // Unless we are optimizing for code size, consider the
15023       // expensive operation first.
15024       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
15025         return ExpensiveOpsLHS < ExpensiveOpsRHS;
15026       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
15027              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
15028     }
15029 
15030     bool operator>(const Cost &RHS) const { return RHS < *this; }
15031 
15032     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
15033 
15034     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
15035   };
15036 
15037   // The last instruction that represent the slice. This should be a
15038   // truncate instruction.
15039   SDNode *Inst;
15040 
15041   // The original load instruction.
15042   LoadSDNode *Origin;
15043 
15044   // The right shift amount in bits from the original load.
15045   unsigned Shift;
15046 
15047   // The DAG from which Origin came from.
15048   // This is used to get some contextual information about legal types, etc.
15049   SelectionDAG *DAG;
15050 
15051   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
15052               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
15053       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
15054 
15055   /// Get the bits used in a chunk of bits \p BitWidth large.
15056   /// \return Result is \p BitWidth and has used bits set to 1 and
15057   ///         not used bits set to 0.
15058   APInt getUsedBits() const {
15059     // Reproduce the trunc(lshr) sequence:
15060     // - Start from the truncated value.
15061     // - Zero extend to the desired bit width.
15062     // - Shift left.
15063     assert(Origin && "No original load to compare against.");
15064     unsigned BitWidth = Origin->getValueSizeInBits(0);
15065     assert(Inst && "This slice is not bound to an instruction");
15066     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
15067            "Extracted slice is bigger than the whole type!");
15068     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
15069     UsedBits.setAllBits();
15070     UsedBits = UsedBits.zext(BitWidth);
15071     UsedBits <<= Shift;
15072     return UsedBits;
15073   }
15074 
15075   /// Get the size of the slice to be loaded in bytes.
15076   unsigned getLoadedSize() const {
15077     unsigned SliceSize = getUsedBits().countPopulation();
15078     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
15079     return SliceSize / 8;
15080   }
15081 
15082   /// Get the type that will be loaded for this slice.
15083   /// Note: This may not be the final type for the slice.
15084   EVT getLoadedType() const {
15085     assert(DAG && "Missing context");
15086     LLVMContext &Ctxt = *DAG->getContext();
15087     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
15088   }
15089 
15090   /// Get the alignment of the load used for this slice.
15091   Align getAlign() const {
15092     Align Alignment = Origin->getAlign();
15093     uint64_t Offset = getOffsetFromBase();
15094     if (Offset != 0)
15095       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
15096     return Alignment;
15097   }
15098 
15099   /// Check if this slice can be rewritten with legal operations.
15100   bool isLegal() const {
15101     // An invalid slice is not legal.
15102     if (!Origin || !Inst || !DAG)
15103       return false;
15104 
15105     // Offsets are for indexed load only, we do not handle that.
15106     if (!Origin->getOffset().isUndef())
15107       return false;
15108 
15109     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15110 
15111     // Check that the type is legal.
15112     EVT SliceType = getLoadedType();
15113     if (!TLI.isTypeLegal(SliceType))
15114       return false;
15115 
15116     // Check that the load is legal for this type.
15117     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
15118       return false;
15119 
15120     // Check that the offset can be computed.
15121     // 1. Check its type.
15122     EVT PtrType = Origin->getBasePtr().getValueType();
15123     if (PtrType == MVT::Untyped || PtrType.isExtended())
15124       return false;
15125 
15126     // 2. Check that it fits in the immediate.
15127     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
15128       return false;
15129 
15130     // 3. Check that the computation is legal.
15131     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
15132       return false;
15133 
15134     // Check that the zext is legal if it needs one.
15135     EVT TruncateType = Inst->getValueType(0);
15136     if (TruncateType != SliceType &&
15137         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
15138       return false;
15139 
15140     return true;
15141   }
15142 
15143   /// Get the offset in bytes of this slice in the original chunk of
15144   /// bits.
15145   /// \pre DAG != nullptr.
15146   uint64_t getOffsetFromBase() const {
15147     assert(DAG && "Missing context.");
15148     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
15149     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
15150     uint64_t Offset = Shift / 8;
15151     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
15152     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
15153            "The size of the original loaded type is not a multiple of a"
15154            " byte.");
15155     // If Offset is bigger than TySizeInBytes, it means we are loading all
15156     // zeros. This should have been optimized before in the process.
15157     assert(TySizeInBytes > Offset &&
15158            "Invalid shift amount for given loaded size");
15159     if (IsBigEndian)
15160       Offset = TySizeInBytes - Offset - getLoadedSize();
15161     return Offset;
15162   }
15163 
15164   /// Generate the sequence of instructions to load the slice
15165   /// represented by this object and redirect the uses of this slice to
15166   /// this new sequence of instructions.
15167   /// \pre this->Inst && this->Origin are valid Instructions and this
15168   /// object passed the legal check: LoadedSlice::isLegal returned true.
15169   /// \return The last instruction of the sequence used to load the slice.
15170   SDValue loadSlice() const {
15171     assert(Inst && Origin && "Unable to replace a non-existing slice.");
15172     const SDValue &OldBaseAddr = Origin->getBasePtr();
15173     SDValue BaseAddr = OldBaseAddr;
15174     // Get the offset in that chunk of bytes w.r.t. the endianness.
15175     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
15176     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
15177     if (Offset) {
15178       // BaseAddr = BaseAddr + Offset.
15179       EVT ArithType = BaseAddr.getValueType();
15180       SDLoc DL(Origin);
15181       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
15182                               DAG->getConstant(Offset, DL, ArithType));
15183     }
15184 
15185     // Create the type of the loaded slice according to its size.
15186     EVT SliceType = getLoadedType();
15187 
15188     // Create the load for the slice.
15189     SDValue LastInst =
15190         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
15191                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
15192                      Origin->getMemOperand()->getFlags());
15193     // If the final type is not the same as the loaded type, this means that
15194     // we have to pad with zero. Create a zero extend for that.
15195     EVT FinalType = Inst->getValueType(0);
15196     if (SliceType != FinalType)
15197       LastInst =
15198           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
15199     return LastInst;
15200   }
15201 
15202   /// Check if this slice can be merged with an expensive cross register
15203   /// bank copy. E.g.,
15204   /// i = load i32
15205   /// f = bitcast i32 i to float
15206   bool canMergeExpensiveCrossRegisterBankCopy() const {
15207     if (!Inst || !Inst->hasOneUse())
15208       return false;
15209     SDNode *Use = *Inst->use_begin();
15210     if (Use->getOpcode() != ISD::BITCAST)
15211       return false;
15212     assert(DAG && "Missing context");
15213     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15214     EVT ResVT = Use->getValueType(0);
15215     const TargetRegisterClass *ResRC =
15216         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
15217     const TargetRegisterClass *ArgRC =
15218         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
15219                            Use->getOperand(0)->isDivergent());
15220     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
15221       return false;
15222 
15223     // At this point, we know that we perform a cross-register-bank copy.
15224     // Check if it is expensive.
15225     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
15226     // Assume bitcasts are cheap, unless both register classes do not
15227     // explicitly share a common sub class.
15228     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
15229       return false;
15230 
15231     // Check if it will be merged with the load.
15232     // 1. Check the alignment constraint.
15233     Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
15234         ResVT.getTypeForEVT(*DAG->getContext()));
15235 
15236     if (RequiredAlignment > getAlign())
15237       return false;
15238 
15239     // 2. Check that the load is a legal operation for that type.
15240     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
15241       return false;
15242 
15243     // 3. Check that we do not have a zext in the way.
15244     if (Inst->getValueType(0) != getLoadedType())
15245       return false;
15246 
15247     return true;
15248   }
15249 };
15250 
15251 } // end anonymous namespace
15252 
15253 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
15254 /// \p UsedBits looks like 0..0 1..1 0..0.
15255 static bool areUsedBitsDense(const APInt &UsedBits) {
15256   // If all the bits are one, this is dense!
15257   if (UsedBits.isAllOnesValue())
15258     return true;
15259 
15260   // Get rid of the unused bits on the right.
15261   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
15262   // Get rid of the unused bits on the left.
15263   if (NarrowedUsedBits.countLeadingZeros())
15264     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
15265   // Check that the chunk of bits is completely used.
15266   return NarrowedUsedBits.isAllOnesValue();
15267 }
15268 
15269 /// Check whether or not \p First and \p Second are next to each other
15270 /// in memory. This means that there is no hole between the bits loaded
15271 /// by \p First and the bits loaded by \p Second.
15272 static bool areSlicesNextToEachOther(const LoadedSlice &First,
15273                                      const LoadedSlice &Second) {
15274   assert(First.Origin == Second.Origin && First.Origin &&
15275          "Unable to match different memory origins.");
15276   APInt UsedBits = First.getUsedBits();
15277   assert((UsedBits & Second.getUsedBits()) == 0 &&
15278          "Slices are not supposed to overlap.");
15279   UsedBits |= Second.getUsedBits();
15280   return areUsedBitsDense(UsedBits);
15281 }
15282 
15283 /// Adjust the \p GlobalLSCost according to the target
15284 /// paring capabilities and the layout of the slices.
15285 /// \pre \p GlobalLSCost should account for at least as many loads as
15286 /// there is in the slices in \p LoadedSlices.
15287 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15288                                  LoadedSlice::Cost &GlobalLSCost) {
15289   unsigned NumberOfSlices = LoadedSlices.size();
15290   // If there is less than 2 elements, no pairing is possible.
15291   if (NumberOfSlices < 2)
15292     return;
15293 
15294   // Sort the slices so that elements that are likely to be next to each
15295   // other in memory are next to each other in the list.
15296   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
15297     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
15298     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
15299   });
15300   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
15301   // First (resp. Second) is the first (resp. Second) potentially candidate
15302   // to be placed in a paired load.
15303   const LoadedSlice *First = nullptr;
15304   const LoadedSlice *Second = nullptr;
15305   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
15306                 // Set the beginning of the pair.
15307                                                            First = Second) {
15308     Second = &LoadedSlices[CurrSlice];
15309 
15310     // If First is NULL, it means we start a new pair.
15311     // Get to the next slice.
15312     if (!First)
15313       continue;
15314 
15315     EVT LoadedType = First->getLoadedType();
15316 
15317     // If the types of the slices are different, we cannot pair them.
15318     if (LoadedType != Second->getLoadedType())
15319       continue;
15320 
15321     // Check if the target supplies paired loads for this type.
15322     Align RequiredAlignment;
15323     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
15324       // move to the next pair, this type is hopeless.
15325       Second = nullptr;
15326       continue;
15327     }
15328     // Check if we meet the alignment requirement.
15329     if (First->getAlign() < RequiredAlignment)
15330       continue;
15331 
15332     // Check that both loads are next to each other in memory.
15333     if (!areSlicesNextToEachOther(*First, *Second))
15334       continue;
15335 
15336     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
15337     --GlobalLSCost.Loads;
15338     // Move to the next pair.
15339     Second = nullptr;
15340   }
15341 }
15342 
15343 /// Check the profitability of all involved LoadedSlice.
15344 /// Currently, it is considered profitable if there is exactly two
15345 /// involved slices (1) which are (2) next to each other in memory, and
15346 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
15347 ///
15348 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
15349 /// the elements themselves.
15350 ///
15351 /// FIXME: When the cost model will be mature enough, we can relax
15352 /// constraints (1) and (2).
15353 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15354                                 const APInt &UsedBits, bool ForCodeSize) {
15355   unsigned NumberOfSlices = LoadedSlices.size();
15356   if (StressLoadSlicing)
15357     return NumberOfSlices > 1;
15358 
15359   // Check (1).
15360   if (NumberOfSlices != 2)
15361     return false;
15362 
15363   // Check (2).
15364   if (!areUsedBitsDense(UsedBits))
15365     return false;
15366 
15367   // Check (3).
15368   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
15369   // The original code has one big load.
15370   OrigCost.Loads = 1;
15371   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
15372     const LoadedSlice &LS = LoadedSlices[CurrSlice];
15373     // Accumulate the cost of all the slices.
15374     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
15375     GlobalSlicingCost += SliceCost;
15376 
15377     // Account as cost in the original configuration the gain obtained
15378     // with the current slices.
15379     OrigCost.addSliceGain(LS);
15380   }
15381 
15382   // If the target supports paired load, adjust the cost accordingly.
15383   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
15384   return OrigCost > GlobalSlicingCost;
15385 }
15386 
15387 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
15388 /// operations, split it in the various pieces being extracted.
15389 ///
15390 /// This sort of thing is introduced by SROA.
15391 /// This slicing takes care not to insert overlapping loads.
15392 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
15393 bool DAGCombiner::SliceUpLoad(SDNode *N) {
15394   if (Level < AfterLegalizeDAG)
15395     return false;
15396 
15397   LoadSDNode *LD = cast<LoadSDNode>(N);
15398   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
15399       !LD->getValueType(0).isInteger())
15400     return false;
15401 
15402   // The algorithm to split up a load of a scalable vector into individual
15403   // elements currently requires knowing the length of the loaded type,
15404   // so will need adjusting to work on scalable vectors.
15405   if (LD->getValueType(0).isScalableVector())
15406     return false;
15407 
15408   // Keep track of already used bits to detect overlapping values.
15409   // In that case, we will just abort the transformation.
15410   APInt UsedBits(LD->getValueSizeInBits(0), 0);
15411 
15412   SmallVector<LoadedSlice, 4> LoadedSlices;
15413 
15414   // Check if this load is used as several smaller chunks of bits.
15415   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
15416   // of computation for each trunc.
15417   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
15418        UI != UIEnd; ++UI) {
15419     // Skip the uses of the chain.
15420     if (UI.getUse().getResNo() != 0)
15421       continue;
15422 
15423     SDNode *User = *UI;
15424     unsigned Shift = 0;
15425 
15426     // Check if this is a trunc(lshr).
15427     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
15428         isa<ConstantSDNode>(User->getOperand(1))) {
15429       Shift = User->getConstantOperandVal(1);
15430       User = *User->use_begin();
15431     }
15432 
15433     // At this point, User is a Truncate, iff we encountered, trunc or
15434     // trunc(lshr).
15435     if (User->getOpcode() != ISD::TRUNCATE)
15436       return false;
15437 
15438     // The width of the type must be a power of 2 and greater than 8-bits.
15439     // Otherwise the load cannot be represented in LLVM IR.
15440     // Moreover, if we shifted with a non-8-bits multiple, the slice
15441     // will be across several bytes. We do not support that.
15442     unsigned Width = User->getValueSizeInBits(0);
15443     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
15444       return false;
15445 
15446     // Build the slice for this chain of computations.
15447     LoadedSlice LS(User, LD, Shift, &DAG);
15448     APInt CurrentUsedBits = LS.getUsedBits();
15449 
15450     // Check if this slice overlaps with another.
15451     if ((CurrentUsedBits & UsedBits) != 0)
15452       return false;
15453     // Update the bits used globally.
15454     UsedBits |= CurrentUsedBits;
15455 
15456     // Check if the new slice would be legal.
15457     if (!LS.isLegal())
15458       return false;
15459 
15460     // Record the slice.
15461     LoadedSlices.push_back(LS);
15462   }
15463 
15464   // Abort slicing if it does not seem to be profitable.
15465   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
15466     return false;
15467 
15468   ++SlicedLoads;
15469 
15470   // Rewrite each chain to use an independent load.
15471   // By construction, each chain can be represented by a unique load.
15472 
15473   // Prepare the argument for the new token factor for all the slices.
15474   SmallVector<SDValue, 8> ArgChains;
15475   for (SmallVectorImpl<LoadedSlice>::const_iterator
15476            LSIt = LoadedSlices.begin(),
15477            LSItEnd = LoadedSlices.end();
15478        LSIt != LSItEnd; ++LSIt) {
15479     SDValue SliceInst = LSIt->loadSlice();
15480     CombineTo(LSIt->Inst, SliceInst, true);
15481     if (SliceInst.getOpcode() != ISD::LOAD)
15482       SliceInst = SliceInst.getOperand(0);
15483     assert(SliceInst->getOpcode() == ISD::LOAD &&
15484            "It takes more than a zext to get to the loaded slice!!");
15485     ArgChains.push_back(SliceInst.getValue(1));
15486   }
15487 
15488   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
15489                               ArgChains);
15490   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15491   AddToWorklist(Chain.getNode());
15492   return true;
15493 }
15494 
15495 /// Check to see if V is (and load (ptr), imm), where the load is having
15496 /// specific bytes cleared out.  If so, return the byte size being masked out
15497 /// and the shift amount.
15498 static std::pair<unsigned, unsigned>
15499 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
15500   std::pair<unsigned, unsigned> Result(0, 0);
15501 
15502   // Check for the structure we're looking for.
15503   if (V->getOpcode() != ISD::AND ||
15504       !isa<ConstantSDNode>(V->getOperand(1)) ||
15505       !ISD::isNormalLoad(V->getOperand(0).getNode()))
15506     return Result;
15507 
15508   // Check the chain and pointer.
15509   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
15510   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
15511 
15512   // This only handles simple types.
15513   if (V.getValueType() != MVT::i16 &&
15514       V.getValueType() != MVT::i32 &&
15515       V.getValueType() != MVT::i64)
15516     return Result;
15517 
15518   // Check the constant mask.  Invert it so that the bits being masked out are
15519   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
15520   // follow the sign bit for uniformity.
15521   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
15522   unsigned NotMaskLZ = countLeadingZeros(NotMask);
15523   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
15524   unsigned NotMaskTZ = countTrailingZeros(NotMask);
15525   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
15526   if (NotMaskLZ == 64) return Result;  // All zero mask.
15527 
15528   // See if we have a continuous run of bits.  If so, we have 0*1+0*
15529   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
15530     return Result;
15531 
15532   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
15533   if (V.getValueType() != MVT::i64 && NotMaskLZ)
15534     NotMaskLZ -= 64-V.getValueSizeInBits();
15535 
15536   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
15537   switch (MaskedBytes) {
15538   case 1:
15539   case 2:
15540   case 4: break;
15541   default: return Result; // All one mask, or 5-byte mask.
15542   }
15543 
15544   // Verify that the first bit starts at a multiple of mask so that the access
15545   // is aligned the same as the access width.
15546   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
15547 
15548   // For narrowing to be valid, it must be the case that the load the
15549   // immediately preceding memory operation before the store.
15550   if (LD == Chain.getNode())
15551     ; // ok.
15552   else if (Chain->getOpcode() == ISD::TokenFactor &&
15553            SDValue(LD, 1).hasOneUse()) {
15554     // LD has only 1 chain use so they are no indirect dependencies.
15555     if (!LD->isOperandOf(Chain.getNode()))
15556       return Result;
15557   } else
15558     return Result; // Fail.
15559 
15560   Result.first = MaskedBytes;
15561   Result.second = NotMaskTZ/8;
15562   return Result;
15563 }
15564 
15565 /// Check to see if IVal is something that provides a value as specified by
15566 /// MaskInfo. If so, replace the specified store with a narrower store of
15567 /// truncated IVal.
15568 static SDValue
15569 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
15570                                 SDValue IVal, StoreSDNode *St,
15571                                 DAGCombiner *DC) {
15572   unsigned NumBytes = MaskInfo.first;
15573   unsigned ByteShift = MaskInfo.second;
15574   SelectionDAG &DAG = DC->getDAG();
15575 
15576   // Check to see if IVal is all zeros in the part being masked in by the 'or'
15577   // that uses this.  If not, this is not a replacement.
15578   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
15579                                   ByteShift*8, (ByteShift+NumBytes)*8);
15580   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
15581 
15582   // Check that it is legal on the target to do this.  It is legal if the new
15583   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
15584   // legalization (and the target doesn't explicitly think this is a bad idea).
15585   MVT VT = MVT::getIntegerVT(NumBytes * 8);
15586   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15587   if (!DC->isTypeLegal(VT))
15588     return SDValue();
15589   if (St->getMemOperand() &&
15590       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15591                               *St->getMemOperand()))
15592     return SDValue();
15593 
15594   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
15595   // shifted by ByteShift and truncated down to NumBytes.
15596   if (ByteShift) {
15597     SDLoc DL(IVal);
15598     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
15599                        DAG.getConstant(ByteShift*8, DL,
15600                                     DC->getShiftAmountTy(IVal.getValueType())));
15601   }
15602 
15603   // Figure out the offset for the store and the alignment of the access.
15604   unsigned StOffset;
15605   unsigned NewAlign = St->getAlignment();
15606 
15607   if (DAG.getDataLayout().isLittleEndian())
15608     StOffset = ByteShift;
15609   else
15610     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
15611 
15612   SDValue Ptr = St->getBasePtr();
15613   if (StOffset) {
15614     SDLoc DL(IVal);
15615     Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
15616     NewAlign = MinAlign(NewAlign, StOffset);
15617   }
15618 
15619   // Truncate down to the new size.
15620   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
15621 
15622   ++OpsNarrowed;
15623   return DAG
15624       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
15625                 St->getPointerInfo().getWithOffset(StOffset), NewAlign);
15626 }
15627 
15628 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
15629 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
15630 /// narrowing the load and store if it would end up being a win for performance
15631 /// or code size.
15632 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
15633   StoreSDNode *ST  = cast<StoreSDNode>(N);
15634   if (!ST->isSimple())
15635     return SDValue();
15636 
15637   SDValue Chain = ST->getChain();
15638   SDValue Value = ST->getValue();
15639   SDValue Ptr   = ST->getBasePtr();
15640   EVT VT = Value.getValueType();
15641 
15642   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
15643     return SDValue();
15644 
15645   unsigned Opc = Value.getOpcode();
15646 
15647   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
15648   // is a byte mask indicating a consecutive number of bytes, check to see if
15649   // Y is known to provide just those bytes.  If so, we try to replace the
15650   // load + replace + store sequence with a single (narrower) store, which makes
15651   // the load dead.
15652   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
15653     std::pair<unsigned, unsigned> MaskedLoad;
15654     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
15655     if (MaskedLoad.first)
15656       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15657                                                   Value.getOperand(1), ST,this))
15658         return NewST;
15659 
15660     // Or is commutative, so try swapping X and Y.
15661     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
15662     if (MaskedLoad.first)
15663       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15664                                                   Value.getOperand(0), ST,this))
15665         return NewST;
15666   }
15667 
15668   if (!EnableReduceLoadOpStoreWidth)
15669     return SDValue();
15670 
15671   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
15672       Value.getOperand(1).getOpcode() != ISD::Constant)
15673     return SDValue();
15674 
15675   SDValue N0 = Value.getOperand(0);
15676   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15677       Chain == SDValue(N0.getNode(), 1)) {
15678     LoadSDNode *LD = cast<LoadSDNode>(N0);
15679     if (LD->getBasePtr() != Ptr ||
15680         LD->getPointerInfo().getAddrSpace() !=
15681         ST->getPointerInfo().getAddrSpace())
15682       return SDValue();
15683 
15684     // Find the type to narrow it the load / op / store to.
15685     SDValue N1 = Value.getOperand(1);
15686     unsigned BitWidth = N1.getValueSizeInBits();
15687     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
15688     if (Opc == ISD::AND)
15689       Imm ^= APInt::getAllOnesValue(BitWidth);
15690     if (Imm == 0 || Imm.isAllOnesValue())
15691       return SDValue();
15692     unsigned ShAmt = Imm.countTrailingZeros();
15693     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
15694     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
15695     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15696     // The narrowing should be profitable, the load/store operation should be
15697     // legal (or custom) and the store size should be equal to the NewVT width.
15698     while (NewBW < BitWidth &&
15699            (NewVT.getStoreSizeInBits() != NewBW ||
15700             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
15701             !TLI.isNarrowingProfitable(VT, NewVT))) {
15702       NewBW = NextPowerOf2(NewBW);
15703       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15704     }
15705     if (NewBW >= BitWidth)
15706       return SDValue();
15707 
15708     // If the lsb changed does not start at the type bitwidth boundary,
15709     // start at the previous one.
15710     if (ShAmt % NewBW)
15711       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
15712     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
15713                                    std::min(BitWidth, ShAmt + NewBW));
15714     if ((Imm & Mask) == Imm) {
15715       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
15716       if (Opc == ISD::AND)
15717         NewImm ^= APInt::getAllOnesValue(NewBW);
15718       uint64_t PtrOff = ShAmt / 8;
15719       // For big endian targets, we need to adjust the offset to the pointer to
15720       // load the correct bytes.
15721       if (DAG.getDataLayout().isBigEndian())
15722         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
15723 
15724       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
15725       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
15726       if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
15727         return SDValue();
15728 
15729       SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
15730       SDValue NewLD =
15731           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
15732                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
15733                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
15734       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
15735                                    DAG.getConstant(NewImm, SDLoc(Value),
15736                                                    NewVT));
15737       SDValue NewST =
15738           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
15739                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
15740 
15741       AddToWorklist(NewPtr.getNode());
15742       AddToWorklist(NewLD.getNode());
15743       AddToWorklist(NewVal.getNode());
15744       WorklistRemover DeadNodes(*this);
15745       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
15746       ++OpsNarrowed;
15747       return NewST;
15748     }
15749   }
15750 
15751   return SDValue();
15752 }
15753 
15754 /// For a given floating point load / store pair, if the load value isn't used
15755 /// by any other operations, then consider transforming the pair to integer
15756 /// load / store operations if the target deems the transformation profitable.
15757 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
15758   StoreSDNode *ST  = cast<StoreSDNode>(N);
15759   SDValue Value = ST->getValue();
15760   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
15761       Value.hasOneUse()) {
15762     LoadSDNode *LD = cast<LoadSDNode>(Value);
15763     EVT VT = LD->getMemoryVT();
15764     if (!VT.isFloatingPoint() ||
15765         VT != ST->getMemoryVT() ||
15766         LD->isNonTemporal() ||
15767         ST->isNonTemporal() ||
15768         LD->getPointerInfo().getAddrSpace() != 0 ||
15769         ST->getPointerInfo().getAddrSpace() != 0)
15770       return SDValue();
15771 
15772     TypeSize VTSize = VT.getSizeInBits();
15773 
15774     // We don't know the size of scalable types at compile time so we cannot
15775     // create an integer of the equivalent size.
15776     if (VTSize.isScalable())
15777       return SDValue();
15778 
15779     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
15780     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
15781         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
15782         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
15783         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
15784       return SDValue();
15785 
15786     Align LDAlign = LD->getAlign();
15787     Align STAlign = ST->getAlign();
15788     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
15789     Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
15790     if (LDAlign < ABIAlign || STAlign < ABIAlign)
15791       return SDValue();
15792 
15793     SDValue NewLD =
15794         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
15795                     LD->getPointerInfo(), LDAlign);
15796 
15797     SDValue NewST =
15798         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
15799                      ST->getPointerInfo(), STAlign);
15800 
15801     AddToWorklist(NewLD.getNode());
15802     AddToWorklist(NewST.getNode());
15803     WorklistRemover DeadNodes(*this);
15804     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
15805     ++LdStFP2Int;
15806     return NewST;
15807   }
15808 
15809   return SDValue();
15810 }
15811 
15812 // This is a helper function for visitMUL to check the profitability
15813 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
15814 // MulNode is the original multiply, AddNode is (add x, c1),
15815 // and ConstNode is c2.
15816 //
15817 // If the (add x, c1) has multiple uses, we could increase
15818 // the number of adds if we make this transformation.
15819 // It would only be worth doing this if we can remove a
15820 // multiply in the process. Check for that here.
15821 // To illustrate:
15822 //     (A + c1) * c3
15823 //     (A + c2) * c3
15824 // We're checking for cases where we have common "c3 * A" expressions.
15825 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
15826                                               SDValue &AddNode,
15827                                               SDValue &ConstNode) {
15828   APInt Val;
15829 
15830   // If the add only has one use, this would be OK to do.
15831   if (AddNode.getNode()->hasOneUse())
15832     return true;
15833 
15834   // Walk all the users of the constant with which we're multiplying.
15835   for (SDNode *Use : ConstNode->uses()) {
15836     if (Use == MulNode) // This use is the one we're on right now. Skip it.
15837       continue;
15838 
15839     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
15840       SDNode *OtherOp;
15841       SDNode *MulVar = AddNode.getOperand(0).getNode();
15842 
15843       // OtherOp is what we're multiplying against the constant.
15844       if (Use->getOperand(0) == ConstNode)
15845         OtherOp = Use->getOperand(1).getNode();
15846       else
15847         OtherOp = Use->getOperand(0).getNode();
15848 
15849       // Check to see if multiply is with the same operand of our "add".
15850       //
15851       //     ConstNode  = CONST
15852       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
15853       //     ...
15854       //     AddNode  = (A + c1)  <-- MulVar is A.
15855       //         = AddNode * ConstNode   <-- current visiting instruction.
15856       //
15857       // If we make this transformation, we will have a common
15858       // multiply (ConstNode * A) that we can save.
15859       if (OtherOp == MulVar)
15860         return true;
15861 
15862       // Now check to see if a future expansion will give us a common
15863       // multiply.
15864       //
15865       //     ConstNode  = CONST
15866       //     AddNode    = (A + c1)
15867       //     ...   = AddNode * ConstNode <-- current visiting instruction.
15868       //     ...
15869       //     OtherOp = (A + c2)
15870       //     Use     = OtherOp * ConstNode <-- visiting Use.
15871       //
15872       // If we make this transformation, we will have a common
15873       // multiply (CONST * A) after we also do the same transformation
15874       // to the "t2" instruction.
15875       if (OtherOp->getOpcode() == ISD::ADD &&
15876           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
15877           OtherOp->getOperand(0).getNode() == MulVar)
15878         return true;
15879     }
15880   }
15881 
15882   // Didn't find a case where this would be profitable.
15883   return false;
15884 }
15885 
15886 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
15887                                          unsigned NumStores) {
15888   SmallVector<SDValue, 8> Chains;
15889   SmallPtrSet<const SDNode *, 8> Visited;
15890   SDLoc StoreDL(StoreNodes[0].MemNode);
15891 
15892   for (unsigned i = 0; i < NumStores; ++i) {
15893     Visited.insert(StoreNodes[i].MemNode);
15894   }
15895 
15896   // don't include nodes that are children or repeated nodes.
15897   for (unsigned i = 0; i < NumStores; ++i) {
15898     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
15899       Chains.push_back(StoreNodes[i].MemNode->getChain());
15900   }
15901 
15902   assert(Chains.size() > 0 && "Chain should have generated a chain");
15903   return DAG.getTokenFactor(StoreDL, Chains);
15904 }
15905 
15906 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
15907     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
15908     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
15909   // Make sure we have something to merge.
15910   if (NumStores < 2)
15911     return false;
15912 
15913   // The latest Node in the DAG.
15914   SDLoc DL(StoreNodes[0].MemNode);
15915 
15916   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
15917   unsigned SizeInBits = NumStores * ElementSizeBits;
15918   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15919 
15920   EVT StoreTy;
15921   if (UseVector) {
15922     unsigned Elts = NumStores * NumMemElts;
15923     // Get the type for the merged vector store.
15924     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15925   } else
15926     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
15927 
15928   SDValue StoredVal;
15929   if (UseVector) {
15930     if (IsConstantSrc) {
15931       SmallVector<SDValue, 8> BuildVector;
15932       for (unsigned I = 0; I != NumStores; ++I) {
15933         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
15934         SDValue Val = St->getValue();
15935         // If constant is of the wrong type, convert it now.
15936         if (MemVT != Val.getValueType()) {
15937           Val = peekThroughBitcasts(Val);
15938           // Deal with constants of wrong size.
15939           if (ElementSizeBits != Val.getValueSizeInBits()) {
15940             EVT IntMemVT =
15941                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
15942             if (isa<ConstantFPSDNode>(Val)) {
15943               // Not clear how to truncate FP values.
15944               return false;
15945             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15946               Val = DAG.getConstant(C->getAPIntValue()
15947                                         .zextOrTrunc(Val.getValueSizeInBits())
15948                                         .zextOrTrunc(ElementSizeBits),
15949                                     SDLoc(C), IntMemVT);
15950           }
15951           // Make sure correctly size type is the correct type.
15952           Val = DAG.getBitcast(MemVT, Val);
15953         }
15954         BuildVector.push_back(Val);
15955       }
15956       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15957                                                : ISD::BUILD_VECTOR,
15958                               DL, StoreTy, BuildVector);
15959     } else {
15960       SmallVector<SDValue, 8> Ops;
15961       for (unsigned i = 0; i < NumStores; ++i) {
15962         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15963         SDValue Val = peekThroughBitcasts(St->getValue());
15964         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15965         // type MemVT. If the underlying value is not the correct
15966         // type, but it is an extraction of an appropriate vector we
15967         // can recast Val to be of the correct type. This may require
15968         // converting between EXTRACT_VECTOR_ELT and
15969         // EXTRACT_SUBVECTOR.
15970         if ((MemVT != Val.getValueType()) &&
15971             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15972              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
15973           EVT MemVTScalarTy = MemVT.getScalarType();
15974           // We may need to add a bitcast here to get types to line up.
15975           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15976             Val = DAG.getBitcast(MemVT, Val);
15977           } else {
15978             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15979                                             : ISD::EXTRACT_VECTOR_ELT;
15980             SDValue Vec = Val.getOperand(0);
15981             SDValue Idx = Val.getOperand(1);
15982             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15983           }
15984         }
15985         Ops.push_back(Val);
15986       }
15987 
15988       // Build the extracted vector elements back into a vector.
15989       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15990                                                : ISD::BUILD_VECTOR,
15991                               DL, StoreTy, Ops);
15992     }
15993   } else {
15994     // We should always use a vector store when merging extracted vector
15995     // elements, so this path implies a store of constants.
15996     assert(IsConstantSrc && "Merged vector elements should use vector store");
15997 
15998     APInt StoreInt(SizeInBits, 0);
15999 
16000     // Construct a single integer constant which is made of the smaller
16001     // constant inputs.
16002     bool IsLE = DAG.getDataLayout().isLittleEndian();
16003     for (unsigned i = 0; i < NumStores; ++i) {
16004       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
16005       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
16006 
16007       SDValue Val = St->getValue();
16008       Val = peekThroughBitcasts(Val);
16009       StoreInt <<= ElementSizeBits;
16010       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
16011         StoreInt |= C->getAPIntValue()
16012                         .zextOrTrunc(ElementSizeBits)
16013                         .zextOrTrunc(SizeInBits);
16014       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
16015         StoreInt |= C->getValueAPF()
16016                         .bitcastToAPInt()
16017                         .zextOrTrunc(ElementSizeBits)
16018                         .zextOrTrunc(SizeInBits);
16019         // If fp truncation is necessary give up for now.
16020         if (MemVT.getSizeInBits() != ElementSizeBits)
16021           return false;
16022       } else {
16023         llvm_unreachable("Invalid constant element type");
16024       }
16025     }
16026 
16027     // Create the new Load and Store operations.
16028     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
16029   }
16030 
16031   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16032   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
16033 
16034   // make sure we use trunc store if it's necessary to be legal.
16035   SDValue NewStore;
16036   if (!UseTrunc) {
16037     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
16038                             FirstInChain->getPointerInfo(),
16039                             FirstInChain->getAlignment());
16040   } else { // Must be realized as a trunc store
16041     EVT LegalizedStoredValTy =
16042         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
16043     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
16044     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
16045     SDValue ExtendedStoreVal =
16046         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
16047                         LegalizedStoredValTy);
16048     NewStore = DAG.getTruncStore(
16049         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
16050         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
16051         FirstInChain->getAlignment(),
16052         FirstInChain->getMemOperand()->getFlags());
16053   }
16054 
16055   // Replace all merged stores with the new store.
16056   for (unsigned i = 0; i < NumStores; ++i)
16057     CombineTo(StoreNodes[i].MemNode, NewStore);
16058 
16059   AddToWorklist(NewChain.getNode());
16060   return true;
16061 }
16062 
16063 void DAGCombiner::getStoreMergeCandidates(
16064     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
16065     SDNode *&RootNode) {
16066   // This holds the base pointer, index, and the offset in bytes from the base
16067   // pointer.
16068   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
16069   EVT MemVT = St->getMemoryVT();
16070 
16071   SDValue Val = peekThroughBitcasts(St->getValue());
16072   // We must have a base and an offset.
16073   if (!BasePtr.getBase().getNode())
16074     return;
16075 
16076   // Do not handle stores to undef base pointers.
16077   if (BasePtr.getBase().isUndef())
16078     return;
16079 
16080   StoreSource StoreSrc = getStoreSource(Val);
16081   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
16082   BaseIndexOffset LBasePtr;
16083   // Match on loadbaseptr if relevant.
16084   EVT LoadVT;
16085   if (StoreSrc == StoreSource::Load) {
16086     auto *Ld = cast<LoadSDNode>(Val);
16087     LBasePtr = BaseIndexOffset::match(Ld, DAG);
16088     LoadVT = Ld->getMemoryVT();
16089     // Load and store should be the same type.
16090     if (MemVT != LoadVT)
16091       return;
16092     // Loads must only have one use.
16093     if (!Ld->hasNUsesOfValue(1, 0))
16094       return;
16095     // The memory operands must not be volatile/indexed/atomic.
16096     // TODO: May be able to relax for unordered atomics (see D66309)
16097     if (!Ld->isSimple() || Ld->isIndexed())
16098       return;
16099   }
16100   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
16101                             int64_t &Offset) -> bool {
16102     // The memory operands must not be volatile/indexed/atomic.
16103     // TODO: May be able to relax for unordered atomics (see D66309)
16104     if (!Other->isSimple() ||  Other->isIndexed())
16105       return false;
16106     // Don't mix temporal stores with non-temporal stores.
16107     if (St->isNonTemporal() != Other->isNonTemporal())
16108       return false;
16109     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
16110     // Allow merging constants of different types as integers.
16111     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
16112                                            : Other->getMemoryVT() != MemVT;
16113     if (StoreSrc == StoreSource::Load) {
16114       if (NoTypeMatch)
16115         return false;
16116       // The Load's Base Ptr must also match
16117       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
16118         BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
16119         if (LoadVT != OtherLd->getMemoryVT())
16120           return false;
16121         // Loads must only have one use.
16122         if (!OtherLd->hasNUsesOfValue(1, 0))
16123           return false;
16124         // The memory operands must not be volatile/indexed/atomic.
16125         // TODO: May be able to relax for unordered atomics (see D66309)
16126         if (!OtherLd->isSimple() ||
16127             OtherLd->isIndexed())
16128           return false;
16129         // Don't mix temporal loads with non-temporal loads.
16130         if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
16131           return false;
16132         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
16133           return false;
16134       } else
16135         return false;
16136     }
16137     if (StoreSrc == StoreSource::Constant) {
16138       if (NoTypeMatch)
16139         return false;
16140       if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
16141         return false;
16142     }
16143     if (StoreSrc == StoreSource::Extract) {
16144       // Do not merge truncated stores here.
16145       if (Other->isTruncatingStore())
16146         return false;
16147       if (!MemVT.bitsEq(OtherBC.getValueType()))
16148         return false;
16149       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
16150           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16151         return false;
16152     }
16153     Ptr = BaseIndexOffset::match(Other, DAG);
16154     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
16155   };
16156 
16157   // Check if the pair of StoreNode and the RootNode already bail out many
16158   // times which is over the limit in dependence check.
16159   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
16160                                         SDNode *RootNode) -> bool {
16161     auto RootCount = StoreRootCountMap.find(StoreNode);
16162     if (RootCount != StoreRootCountMap.end() &&
16163         RootCount->second.first == RootNode &&
16164         RootCount->second.second > StoreMergeDependenceLimit)
16165       return true;
16166     return false;
16167   };
16168 
16169   // We looking for a root node which is an ancestor to all mergable
16170   // stores. We search up through a load, to our root and then down
16171   // through all children. For instance we will find Store{1,2,3} if
16172   // St is Store1, Store2. or Store3 where the root is not a load
16173   // which always true for nonvolatile ops. TODO: Expand
16174   // the search to find all valid candidates through multiple layers of loads.
16175   //
16176   // Root
16177   // |-------|-------|
16178   // Load    Load    Store3
16179   // |       |
16180   // Store1   Store2
16181   //
16182   // FIXME: We should be able to climb and
16183   // descend TokenFactors to find candidates as well.
16184 
16185   RootNode = St->getChain().getNode();
16186 
16187   unsigned NumNodesExplored = 0;
16188   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
16189     RootNode = Ldn->getChain().getNode();
16190     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16191          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
16192       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
16193         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
16194           if (I2.getOperandNo() == 0)
16195             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
16196               BaseIndexOffset Ptr;
16197               int64_t PtrDiff;
16198               if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
16199                   !OverLimitInDependenceCheck(OtherST, RootNode))
16200                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
16201             }
16202   } else
16203     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16204          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
16205       if (I.getOperandNo() == 0)
16206         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
16207           BaseIndexOffset Ptr;
16208           int64_t PtrDiff;
16209           if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
16210               !OverLimitInDependenceCheck(OtherST, RootNode))
16211             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
16212         }
16213 }
16214 
16215 // We need to check that merging these stores does not cause a loop in
16216 // the DAG. Any store candidate may depend on another candidate
16217 // indirectly through its operand (we already consider dependencies
16218 // through the chain). Check in parallel by searching up from
16219 // non-chain operands of candidates.
16220 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
16221     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
16222     SDNode *RootNode) {
16223   // FIXME: We should be able to truncate a full search of
16224   // predecessors by doing a BFS and keeping tabs the originating
16225   // stores from which worklist nodes come from in a similar way to
16226   // TokenFactor simplfication.
16227 
16228   SmallPtrSet<const SDNode *, 32> Visited;
16229   SmallVector<const SDNode *, 8> Worklist;
16230 
16231   // RootNode is a predecessor to all candidates so we need not search
16232   // past it. Add RootNode (peeking through TokenFactors). Do not count
16233   // these towards size check.
16234 
16235   Worklist.push_back(RootNode);
16236   while (!Worklist.empty()) {
16237     auto N = Worklist.pop_back_val();
16238     if (!Visited.insert(N).second)
16239       continue; // Already present in Visited.
16240     if (N->getOpcode() == ISD::TokenFactor) {
16241       for (SDValue Op : N->ops())
16242         Worklist.push_back(Op.getNode());
16243     }
16244   }
16245 
16246   // Don't count pruning nodes towards max.
16247   unsigned int Max = 1024 + Visited.size();
16248   // Search Ops of store candidates.
16249   for (unsigned i = 0; i < NumStores; ++i) {
16250     SDNode *N = StoreNodes[i].MemNode;
16251     // Of the 4 Store Operands:
16252     //   * Chain (Op 0) -> We have already considered these
16253     //                    in candidate selection and can be
16254     //                    safely ignored
16255     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
16256     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
16257     //                       but aren't necessarily fromt the same base node, so
16258     //                       cycles possible (e.g. via indexed store).
16259     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
16260     //               non-indexed stores). Not constant on all targets (e.g. ARM)
16261     //               and so can participate in a cycle.
16262     for (unsigned j = 1; j < N->getNumOperands(); ++j)
16263       Worklist.push_back(N->getOperand(j).getNode());
16264   }
16265   // Search through DAG. We can stop early if we find a store node.
16266   for (unsigned i = 0; i < NumStores; ++i)
16267     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
16268                                      Max)) {
16269       // If the searching bail out, record the StoreNode and RootNode in the
16270       // StoreRootCountMap. If we have seen the pair many times over a limit,
16271       // we won't add the StoreNode into StoreNodes set again.
16272       if (Visited.size() >= Max) {
16273         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
16274         if (RootCount.first == RootNode)
16275           RootCount.second++;
16276         else
16277           RootCount = {RootNode, 1};
16278       }
16279       return false;
16280     }
16281   return true;
16282 }
16283 
16284 unsigned
16285 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
16286                                   int64_t ElementSizeBytes) const {
16287   while (true) {
16288     // Find a store past the width of the first store.
16289     size_t StartIdx = 0;
16290     while ((StartIdx + 1 < StoreNodes.size()) &&
16291            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
16292               StoreNodes[StartIdx + 1].OffsetFromBase)
16293       ++StartIdx;
16294 
16295     // Bail if we don't have enough candidates to merge.
16296     if (StartIdx + 1 >= StoreNodes.size())
16297       return 0;
16298 
16299     // Trim stores that overlapped with the first store.
16300     if (StartIdx)
16301       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
16302 
16303     // Scan the memory operations on the chain and find the first
16304     // non-consecutive store memory address.
16305     unsigned NumConsecutiveStores = 1;
16306     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
16307     // Check that the addresses are consecutive starting from the second
16308     // element in the list of stores.
16309     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
16310       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
16311       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16312         break;
16313       NumConsecutiveStores = i + 1;
16314     }
16315     if (NumConsecutiveStores > 1)
16316       return NumConsecutiveStores;
16317 
16318     // There are no consecutive stores at the start of the list.
16319     // Remove the first store and try again.
16320     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
16321   }
16322 }
16323 
16324 bool DAGCombiner::tryStoreMergeOfConstants(
16325     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16326     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
16327   LLVMContext &Context = *DAG.getContext();
16328   const DataLayout &DL = DAG.getDataLayout();
16329   int64_t ElementSizeBytes = MemVT.getStoreSize();
16330   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16331   bool MadeChange = false;
16332 
16333   // Store the constants into memory as one consecutive store.
16334   while (NumConsecutiveStores >= 2) {
16335     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16336     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16337     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16338     unsigned LastLegalType = 1;
16339     unsigned LastLegalVectorType = 1;
16340     bool LastIntegerTrunc = false;
16341     bool NonZero = false;
16342     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
16343     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16344       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
16345       SDValue StoredVal = ST->getValue();
16346       bool IsElementZero = false;
16347       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
16348         IsElementZero = C->isNullValue();
16349       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
16350         IsElementZero = C->getConstantFPValue()->isNullValue();
16351       if (IsElementZero) {
16352         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
16353           FirstZeroAfterNonZero = i;
16354       }
16355       NonZero |= !IsElementZero;
16356 
16357       // Find a legal type for the constant store.
16358       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16359       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16360       bool IsFast = false;
16361 
16362       // Break early when size is too large to be legal.
16363       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16364         break;
16365 
16366       if (TLI.isTypeLegal(StoreTy) &&
16367           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16368           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16369                                  *FirstInChain->getMemOperand(), &IsFast) &&
16370           IsFast) {
16371         LastIntegerTrunc = false;
16372         LastLegalType = i + 1;
16373         // Or check whether a truncstore is legal.
16374       } else if (TLI.getTypeAction(Context, StoreTy) ==
16375                  TargetLowering::TypePromoteInteger) {
16376         EVT LegalizedStoredValTy =
16377             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
16378         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16379             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16380             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16381                                    *FirstInChain->getMemOperand(), &IsFast) &&
16382             IsFast) {
16383           LastIntegerTrunc = true;
16384           LastLegalType = i + 1;
16385         }
16386       }
16387 
16388       // We only use vectors if the constant is known to be zero or the
16389       // target allows it and the function is not marked with the
16390       // noimplicitfloat attribute.
16391       if ((!NonZero ||
16392            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
16393           AllowVectors) {
16394         // Find a legal type for the vector store.
16395         unsigned Elts = (i + 1) * NumMemElts;
16396         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16397         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
16398             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16399             TLI.allowsMemoryAccess(Context, DL, Ty,
16400                                    *FirstInChain->getMemOperand(), &IsFast) &&
16401             IsFast)
16402           LastLegalVectorType = i + 1;
16403       }
16404     }
16405 
16406     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
16407     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
16408 
16409     // Check if we found a legal integer type that creates a meaningful
16410     // merge.
16411     if (NumElem < 2) {
16412       // We know that candidate stores are in order and of correct
16413       // shape. While there is no mergeable sequence from the
16414       // beginning one may start later in the sequence. The only
16415       // reason a merge of size N could have failed where another of
16416       // the same size would not have, is if the alignment has
16417       // improved or we've dropped a non-zero value. Drop as many
16418       // candidates as we can here.
16419       unsigned NumSkip = 1;
16420       while ((NumSkip < NumConsecutiveStores) &&
16421              (NumSkip < FirstZeroAfterNonZero) &&
16422              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16423         NumSkip++;
16424 
16425       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16426       NumConsecutiveStores -= NumSkip;
16427       continue;
16428     }
16429 
16430     // Check that we can merge these candidates without causing a cycle.
16431     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16432                                                   RootNode)) {
16433       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16434       NumConsecutiveStores -= NumElem;
16435       continue;
16436     }
16437 
16438     MadeChange |= mergeStoresOfConstantsOrVecElts(
16439         StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
16440 
16441     // Remove merged stores for next iteration.
16442     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16443     NumConsecutiveStores -= NumElem;
16444   }
16445   return MadeChange;
16446 }
16447 
16448 bool DAGCombiner::tryStoreMergeOfExtracts(
16449     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16450     EVT MemVT, SDNode *RootNode) {
16451   LLVMContext &Context = *DAG.getContext();
16452   const DataLayout &DL = DAG.getDataLayout();
16453   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16454   bool MadeChange = false;
16455 
16456   // Loop on Consecutive Stores on success.
16457   while (NumConsecutiveStores >= 2) {
16458     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16459     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16460     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16461     unsigned NumStoresToMerge = 1;
16462     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16463       // Find a legal type for the vector store.
16464       unsigned Elts = (i + 1) * NumMemElts;
16465       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16466       bool IsFast = false;
16467 
16468       // Break early when size is too large to be legal.
16469       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
16470         break;
16471 
16472       if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16473           TLI.allowsMemoryAccess(Context, DL, Ty,
16474                                  *FirstInChain->getMemOperand(), &IsFast) &&
16475           IsFast)
16476         NumStoresToMerge = i + 1;
16477     }
16478 
16479     // Check if we found a legal integer type creating a meaningful
16480     // merge.
16481     if (NumStoresToMerge < 2) {
16482       // We know that candidate stores are in order and of correct
16483       // shape. While there is no mergeable sequence from the
16484       // beginning one may start later in the sequence. The only
16485       // reason a merge of size N could have failed where another of
16486       // the same size would not have, is if the alignment has
16487       // improved. Drop as many candidates as we can here.
16488       unsigned NumSkip = 1;
16489       while ((NumSkip < NumConsecutiveStores) &&
16490              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16491         NumSkip++;
16492 
16493       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16494       NumConsecutiveStores -= NumSkip;
16495       continue;
16496     }
16497 
16498     // Check that we can merge these candidates without causing a cycle.
16499     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
16500                                                   RootNode)) {
16501       StoreNodes.erase(StoreNodes.begin(),
16502                        StoreNodes.begin() + NumStoresToMerge);
16503       NumConsecutiveStores -= NumStoresToMerge;
16504       continue;
16505     }
16506 
16507     MadeChange |= mergeStoresOfConstantsOrVecElts(
16508         StoreNodes, MemVT, NumStoresToMerge, false, true, false);
16509 
16510     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
16511     NumConsecutiveStores -= NumStoresToMerge;
16512   }
16513   return MadeChange;
16514 }
16515 
16516 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
16517                                        unsigned NumConsecutiveStores, EVT MemVT,
16518                                        SDNode *RootNode, bool AllowVectors,
16519                                        bool IsNonTemporalStore,
16520                                        bool IsNonTemporalLoad) {
16521   LLVMContext &Context = *DAG.getContext();
16522   const DataLayout &DL = DAG.getDataLayout();
16523   int64_t ElementSizeBytes = MemVT.getStoreSize();
16524   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16525   bool MadeChange = false;
16526 
16527   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
16528 
16529   // Look for load nodes which are used by the stored values.
16530   SmallVector<MemOpLink, 8> LoadNodes;
16531 
16532   // Find acceptable loads. Loads need to have the same chain (token factor),
16533   // must not be zext, volatile, indexed, and they must be consecutive.
16534   BaseIndexOffset LdBasePtr;
16535 
16536   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16537     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16538     SDValue Val = peekThroughBitcasts(St->getValue());
16539     LoadSDNode *Ld = cast<LoadSDNode>(Val);
16540 
16541     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
16542     // If this is not the first ptr that we check.
16543     int64_t LdOffset = 0;
16544     if (LdBasePtr.getBase().getNode()) {
16545       // The base ptr must be the same.
16546       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
16547         break;
16548     } else {
16549       // Check that all other base pointers are the same as this one.
16550       LdBasePtr = LdPtr;
16551     }
16552 
16553     // We found a potential memory operand to merge.
16554     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
16555   }
16556 
16557   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
16558     Align RequiredAlignment;
16559     bool NeedRotate = false;
16560     if (LoadNodes.size() == 2) {
16561       // If we have load/store pair instructions and we only have two values,
16562       // don't bother merging.
16563       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
16564           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
16565         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
16566         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
16567         break;
16568       }
16569       // If the loads are reversed, see if we can rotate the halves into place.
16570       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
16571       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
16572       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
16573       if (Offset0 - Offset1 == ElementSizeBytes &&
16574           (hasOperation(ISD::ROTL, PairVT) ||
16575            hasOperation(ISD::ROTR, PairVT))) {
16576         std::swap(LoadNodes[0], LoadNodes[1]);
16577         NeedRotate = true;
16578       }
16579     }
16580     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16581     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16582     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16583     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
16584 
16585     // Scan the memory operations on the chain and find the first
16586     // non-consecutive load memory address. These variables hold the index in
16587     // the store node array.
16588 
16589     unsigned LastConsecutiveLoad = 1;
16590 
16591     // This variable refers to the size and not index in the array.
16592     unsigned LastLegalVectorType = 1;
16593     unsigned LastLegalIntegerType = 1;
16594     bool isDereferenceable = true;
16595     bool DoIntegerTruncate = false;
16596     StartAddress = LoadNodes[0].OffsetFromBase;
16597     SDValue LoadChain = FirstLoad->getChain();
16598     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
16599       // All loads must share the same chain.
16600       if (LoadNodes[i].MemNode->getChain() != LoadChain)
16601         break;
16602 
16603       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
16604       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16605         break;
16606       LastConsecutiveLoad = i;
16607 
16608       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
16609         isDereferenceable = false;
16610 
16611       // Find a legal type for the vector store.
16612       unsigned Elts = (i + 1) * NumMemElts;
16613       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16614 
16615       // Break early when size is too large to be legal.
16616       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16617         break;
16618 
16619       bool IsFastSt = false;
16620       bool IsFastLd = false;
16621       if (TLI.isTypeLegal(StoreTy) &&
16622           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16623           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16624                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
16625           IsFastSt &&
16626           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16627                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
16628           IsFastLd) {
16629         LastLegalVectorType = i + 1;
16630       }
16631 
16632       // Find a legal type for the integer store.
16633       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16634       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16635       if (TLI.isTypeLegal(StoreTy) &&
16636           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16637           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16638                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
16639           IsFastSt &&
16640           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16641                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
16642           IsFastLd) {
16643         LastLegalIntegerType = i + 1;
16644         DoIntegerTruncate = false;
16645         // Or check whether a truncstore and extload is legal.
16646       } else if (TLI.getTypeAction(Context, StoreTy) ==
16647                  TargetLowering::TypePromoteInteger) {
16648         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
16649         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16650             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16651             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
16652             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
16653             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
16654             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16655                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
16656             IsFastSt &&
16657             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16658                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
16659             IsFastLd) {
16660           LastLegalIntegerType = i + 1;
16661           DoIntegerTruncate = true;
16662         }
16663       }
16664     }
16665 
16666     // Only use vector types if the vector type is larger than the integer
16667     // type. If they are the same, use integers.
16668     bool UseVectorTy =
16669         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
16670     unsigned LastLegalType =
16671         std::max(LastLegalVectorType, LastLegalIntegerType);
16672 
16673     // We add +1 here because the LastXXX variables refer to location while
16674     // the NumElem refers to array/index size.
16675     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
16676     NumElem = std::min(LastLegalType, NumElem);
16677     unsigned FirstLoadAlign = FirstLoad->getAlignment();
16678 
16679     if (NumElem < 2) {
16680       // We know that candidate stores are in order and of correct
16681       // shape. While there is no mergeable sequence from the
16682       // beginning one may start later in the sequence. The only
16683       // reason a merge of size N could have failed where another of
16684       // the same size would not have is if the alignment or either
16685       // the load or store has improved. Drop as many candidates as we
16686       // can here.
16687       unsigned NumSkip = 1;
16688       while ((NumSkip < LoadNodes.size()) &&
16689              (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
16690              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16691         NumSkip++;
16692       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16693       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
16694       NumConsecutiveStores -= NumSkip;
16695       continue;
16696     }
16697 
16698     // Check that we can merge these candidates without causing a cycle.
16699     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16700                                                   RootNode)) {
16701       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16702       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16703       NumConsecutiveStores -= NumElem;
16704       continue;
16705     }
16706 
16707     // Find if it is better to use vectors or integers to load and store
16708     // to memory.
16709     EVT JointMemOpVT;
16710     if (UseVectorTy) {
16711       // Find a legal type for the vector store.
16712       unsigned Elts = NumElem * NumMemElts;
16713       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16714     } else {
16715       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
16716       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
16717     }
16718 
16719     SDLoc LoadDL(LoadNodes[0].MemNode);
16720     SDLoc StoreDL(StoreNodes[0].MemNode);
16721 
16722     // The merged loads are required to have the same incoming chain, so
16723     // using the first's chain is acceptable.
16724 
16725     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
16726     AddToWorklist(NewStoreChain.getNode());
16727 
16728     MachineMemOperand::Flags LdMMOFlags =
16729         isDereferenceable ? MachineMemOperand::MODereferenceable
16730                           : MachineMemOperand::MONone;
16731     if (IsNonTemporalLoad)
16732       LdMMOFlags |= MachineMemOperand::MONonTemporal;
16733 
16734     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
16735                                               ? MachineMemOperand::MONonTemporal
16736                                               : MachineMemOperand::MONone;
16737 
16738     SDValue NewLoad, NewStore;
16739     if (UseVectorTy || !DoIntegerTruncate) {
16740       NewLoad = DAG.getLoad(
16741           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
16742           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
16743       SDValue StoreOp = NewLoad;
16744       if (NeedRotate) {
16745         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
16746         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
16747                "Unexpected type for rotate-able load pair");
16748         SDValue RotAmt =
16749             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
16750         // Target can convert to the identical ROTR if it does not have ROTL.
16751         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
16752       }
16753       NewStore = DAG.getStore(
16754           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
16755           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
16756     } else { // This must be the truncstore/extload case
16757       EVT ExtendedTy =
16758           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
16759       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
16760                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
16761                                FirstLoad->getPointerInfo(), JointMemOpVT,
16762                                FirstLoadAlign, LdMMOFlags);
16763       NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
16764                                    FirstInChain->getBasePtr(),
16765                                    FirstInChain->getPointerInfo(), JointMemOpVT,
16766                                    FirstInChain->getAlignment(),
16767                                    FirstInChain->getMemOperand()->getFlags());
16768     }
16769 
16770     // Transfer chain users from old loads to the new load.
16771     for (unsigned i = 0; i < NumElem; ++i) {
16772       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
16773       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
16774                                     SDValue(NewLoad.getNode(), 1));
16775     }
16776 
16777     // Replace all stores with the new store. Recursively remove corresponding
16778     // values if they are no longer used.
16779     for (unsigned i = 0; i < NumElem; ++i) {
16780       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
16781       CombineTo(StoreNodes[i].MemNode, NewStore);
16782       if (Val.getNode()->use_empty())
16783         recursivelyDeleteUnusedNodes(Val.getNode());
16784     }
16785 
16786     MadeChange = true;
16787     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16788     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16789     NumConsecutiveStores -= NumElem;
16790   }
16791   return MadeChange;
16792 }
16793 
16794 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
16795   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
16796     return false;
16797 
16798   // TODO: Extend this function to merge stores of scalable vectors.
16799   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
16800   // store since we know <vscale x 16 x i8> is exactly twice as large as
16801   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
16802   EVT MemVT = St->getMemoryVT();
16803   if (MemVT.isScalableVector())
16804     return false;
16805   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
16806     return false;
16807 
16808   // This function cannot currently deal with non-byte-sized memory sizes.
16809   int64_t ElementSizeBytes = MemVT.getStoreSize();
16810   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
16811     return false;
16812 
16813   // Do not bother looking at stored values that are not constants, loads, or
16814   // extracted vector elements.
16815   SDValue StoredVal = peekThroughBitcasts(St->getValue());
16816   const StoreSource StoreSrc = getStoreSource(StoredVal);
16817   if (StoreSrc == StoreSource::Unknown)
16818     return false;
16819 
16820   SmallVector<MemOpLink, 8> StoreNodes;
16821   SDNode *RootNode;
16822   // Find potential store merge candidates by searching through chain sub-DAG
16823   getStoreMergeCandidates(St, StoreNodes, RootNode);
16824 
16825   // Check if there is anything to merge.
16826   if (StoreNodes.size() < 2)
16827     return false;
16828 
16829   // Sort the memory operands according to their distance from the
16830   // base pointer.
16831   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
16832     return LHS.OffsetFromBase < RHS.OffsetFromBase;
16833   });
16834 
16835   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
16836       Attribute::NoImplicitFloat);
16837   bool IsNonTemporalStore = St->isNonTemporal();
16838   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
16839                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
16840 
16841   // Store Merge attempts to merge the lowest stores. This generally
16842   // works out as if successful, as the remaining stores are checked
16843   // after the first collection of stores is merged. However, in the
16844   // case that a non-mergeable store is found first, e.g., {p[-2],
16845   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
16846   // mergeable cases. To prevent this, we prune such stores from the
16847   // front of StoreNodes here.
16848   bool MadeChange = false;
16849   while (StoreNodes.size() > 1) {
16850     unsigned NumConsecutiveStores =
16851         getConsecutiveStores(StoreNodes, ElementSizeBytes);
16852     // There are no more stores in the list to examine.
16853     if (NumConsecutiveStores == 0)
16854       return MadeChange;
16855 
16856     // We have at least 2 consecutive stores. Try to merge them.
16857     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
16858     switch (StoreSrc) {
16859     case StoreSource::Constant:
16860       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
16861                                              MemVT, RootNode, AllowVectors);
16862       break;
16863 
16864     case StoreSource::Extract:
16865       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
16866                                             MemVT, RootNode);
16867       break;
16868 
16869     case StoreSource::Load:
16870       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
16871                                          MemVT, RootNode, AllowVectors,
16872                                          IsNonTemporalStore, IsNonTemporalLoad);
16873       break;
16874 
16875     default:
16876       llvm_unreachable("Unhandled store source type");
16877     }
16878   }
16879   return MadeChange;
16880 }
16881 
16882 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
16883   SDLoc SL(ST);
16884   SDValue ReplStore;
16885 
16886   // Replace the chain to avoid dependency.
16887   if (ST->isTruncatingStore()) {
16888     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
16889                                   ST->getBasePtr(), ST->getMemoryVT(),
16890                                   ST->getMemOperand());
16891   } else {
16892     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
16893                              ST->getMemOperand());
16894   }
16895 
16896   // Create token to keep both nodes around.
16897   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
16898                               MVT::Other, ST->getChain(), ReplStore);
16899 
16900   // Make sure the new and old chains are cleaned up.
16901   AddToWorklist(Token.getNode());
16902 
16903   // Don't add users to work list.
16904   return CombineTo(ST, Token, false);
16905 }
16906 
16907 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
16908   SDValue Value = ST->getValue();
16909   if (Value.getOpcode() == ISD::TargetConstantFP)
16910     return SDValue();
16911 
16912   if (!ISD::isNormalStore(ST))
16913     return SDValue();
16914 
16915   SDLoc DL(ST);
16916 
16917   SDValue Chain = ST->getChain();
16918   SDValue Ptr = ST->getBasePtr();
16919 
16920   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
16921 
16922   // NOTE: If the original store is volatile, this transform must not increase
16923   // the number of stores.  For example, on x86-32 an f64 can be stored in one
16924   // processor operation but an i64 (which is not legal) requires two.  So the
16925   // transform should not be done in this case.
16926 
16927   SDValue Tmp;
16928   switch (CFP->getSimpleValueType(0).SimpleTy) {
16929   default:
16930     llvm_unreachable("Unknown FP type");
16931   case MVT::f16:    // We don't do this for these yet.
16932   case MVT::f80:
16933   case MVT::f128:
16934   case MVT::ppcf128:
16935     return SDValue();
16936   case MVT::f32:
16937     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
16938         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16939       ;
16940       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
16941                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
16942                             MVT::i32);
16943       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
16944     }
16945 
16946     return SDValue();
16947   case MVT::f64:
16948     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
16949          ST->isSimple()) ||
16950         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
16951       ;
16952       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
16953                             getZExtValue(), SDLoc(CFP), MVT::i64);
16954       return DAG.getStore(Chain, DL, Tmp,
16955                           Ptr, ST->getMemOperand());
16956     }
16957 
16958     if (ST->isSimple() &&
16959         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16960       // Many FP stores are not made apparent until after legalize, e.g. for
16961       // argument passing.  Since this is so common, custom legalize the
16962       // 64-bit integer store into two 32-bit stores.
16963       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
16964       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
16965       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
16966       if (DAG.getDataLayout().isBigEndian())
16967         std::swap(Lo, Hi);
16968 
16969       unsigned Alignment = ST->getAlignment();
16970       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16971       AAMDNodes AAInfo = ST->getAAInfo();
16972 
16973       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16974                                  ST->getAlignment(), MMOFlags, AAInfo);
16975       Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
16976       Alignment = MinAlign(Alignment, 4U);
16977       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
16978                                  ST->getPointerInfo().getWithOffset(4),
16979                                  Alignment, MMOFlags, AAInfo);
16980       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
16981                          St0, St1);
16982     }
16983 
16984     return SDValue();
16985   }
16986 }
16987 
16988 SDValue DAGCombiner::visitSTORE(SDNode *N) {
16989   StoreSDNode *ST  = cast<StoreSDNode>(N);
16990   SDValue Chain = ST->getChain();
16991   SDValue Value = ST->getValue();
16992   SDValue Ptr   = ST->getBasePtr();
16993 
16994   // If this is a store of a bit convert, store the input value if the
16995   // resultant store does not need a higher alignment than the original.
16996   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
16997       ST->isUnindexed()) {
16998     EVT SVT = Value.getOperand(0).getValueType();
16999     // If the store is volatile, we only want to change the store type if the
17000     // resulting store is legal. Otherwise we might increase the number of
17001     // memory accesses. We don't care if the original type was legal or not
17002     // as we assume software couldn't rely on the number of accesses of an
17003     // illegal type.
17004     // TODO: May be able to relax for unordered atomics (see D66309)
17005     if (((!LegalOperations && ST->isSimple()) ||
17006          TLI.isOperationLegal(ISD::STORE, SVT)) &&
17007         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
17008                                      DAG, *ST->getMemOperand())) {
17009       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
17010                           ST->getMemOperand());
17011     }
17012   }
17013 
17014   // Turn 'store undef, Ptr' -> nothing.
17015   if (Value.isUndef() && ST->isUnindexed())
17016     return Chain;
17017 
17018   // Try to infer better alignment information than the store already has.
17019   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
17020     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
17021       if (*Alignment > ST->getAlign() &&
17022           isAligned(*Alignment, ST->getSrcValueOffset())) {
17023         SDValue NewStore =
17024             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
17025                               ST->getMemoryVT(), *Alignment,
17026                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
17027         // NewStore will always be N as we are only refining the alignment
17028         assert(NewStore.getNode() == N);
17029         (void)NewStore;
17030       }
17031     }
17032   }
17033 
17034   // Try transforming a pair floating point load / store ops to integer
17035   // load / store ops.
17036   if (SDValue NewST = TransformFPLoadStorePair(N))
17037     return NewST;
17038 
17039   // Try transforming several stores into STORE (BSWAP).
17040   if (SDValue Store = MatchStoreCombine(ST))
17041     return Store;
17042 
17043   if (ST->isUnindexed()) {
17044     // Walk up chain skipping non-aliasing memory nodes, on this store and any
17045     // adjacent stores.
17046     if (findBetterNeighborChains(ST)) {
17047       // replaceStoreChain uses CombineTo, which handled all of the worklist
17048       // manipulation. Return the original node to not do anything else.
17049       return SDValue(ST, 0);
17050     }
17051     Chain = ST->getChain();
17052   }
17053 
17054   // FIXME: is there such a thing as a truncating indexed store?
17055   if (ST->isTruncatingStore() && ST->isUnindexed() &&
17056       Value.getValueType().isInteger() &&
17057       (!isa<ConstantSDNode>(Value) ||
17058        !cast<ConstantSDNode>(Value)->isOpaque())) {
17059     APInt TruncDemandedBits =
17060         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
17061                              ST->getMemoryVT().getScalarSizeInBits());
17062 
17063     // See if we can simplify the input to this truncstore with knowledge that
17064     // only the low bits are being used.  For example:
17065     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
17066     AddToWorklist(Value.getNode());
17067     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
17068       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
17069                                ST->getMemOperand());
17070 
17071     // Otherwise, see if we can simplify the operation with
17072     // SimplifyDemandedBits, which only works if the value has a single use.
17073     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
17074       // Re-visit the store if anything changed and the store hasn't been merged
17075       // with another node (N is deleted) SimplifyDemandedBits will add Value's
17076       // node back to the worklist if necessary, but we also need to re-visit
17077       // the Store node itself.
17078       if (N->getOpcode() != ISD::DELETED_NODE)
17079         AddToWorklist(N);
17080       return SDValue(N, 0);
17081     }
17082   }
17083 
17084   // If this is a load followed by a store to the same location, then the store
17085   // is dead/noop.
17086   // TODO: Can relax for unordered atomics (see D66309)
17087   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
17088     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
17089         ST->isUnindexed() && ST->isSimple() &&
17090         // There can't be any side effects between the load and store, such as
17091         // a call or store.
17092         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
17093       // The store is dead, remove it.
17094       return Chain;
17095     }
17096   }
17097 
17098   // TODO: Can relax for unordered atomics (see D66309)
17099   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
17100     if (ST->isUnindexed() && ST->isSimple() &&
17101         ST1->isUnindexed() && ST1->isSimple()) {
17102       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
17103           ST->getMemoryVT() == ST1->getMemoryVT()) {
17104         // If this is a store followed by a store with the same value to the
17105         // same location, then the store is dead/noop.
17106         return Chain;
17107       }
17108 
17109       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
17110           !ST1->getBasePtr().isUndef() &&
17111           // BaseIndexOffset and the code below requires knowing the size
17112           // of a vector, so bail out if MemoryVT is scalable.
17113           !ST1->getMemoryVT().isScalableVector()) {
17114         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
17115         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
17116         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
17117         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
17118         // If this is a store who's preceding store to a subset of the current
17119         // location and no one other node is chained to that store we can
17120         // effectively drop the store. Do not remove stores to undef as they may
17121         // be used as data sinks.
17122         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
17123           CombineTo(ST1, ST1->getChain());
17124           return SDValue();
17125         }
17126       }
17127     }
17128   }
17129 
17130   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
17131   // truncating store.  We can do this even if this is already a truncstore.
17132   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
17133       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
17134       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
17135                             ST->getMemoryVT())) {
17136     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
17137                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
17138   }
17139 
17140   // Always perform this optimization before types are legal. If the target
17141   // prefers, also try this after legalization to catch stores that were created
17142   // by intrinsics or other nodes.
17143   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
17144     while (true) {
17145       // There can be multiple store sequences on the same chain.
17146       // Keep trying to merge store sequences until we are unable to do so
17147       // or until we merge the last store on the chain.
17148       bool Changed = mergeConsecutiveStores(ST);
17149       if (!Changed) break;
17150       // Return N as merge only uses CombineTo and no worklist clean
17151       // up is necessary.
17152       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
17153         return SDValue(N, 0);
17154     }
17155   }
17156 
17157   // Try transforming N to an indexed store.
17158   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
17159     return SDValue(N, 0);
17160 
17161   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
17162   //
17163   // Make sure to do this only after attempting to merge stores in order to
17164   //  avoid changing the types of some subset of stores due to visit order,
17165   //  preventing their merging.
17166   if (isa<ConstantFPSDNode>(ST->getValue())) {
17167     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
17168       return NewSt;
17169   }
17170 
17171   if (SDValue NewSt = splitMergedValStore(ST))
17172     return NewSt;
17173 
17174   return ReduceLoadOpStoreWidth(N);
17175 }
17176 
17177 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
17178   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
17179   if (!LifetimeEnd->hasOffset())
17180     return SDValue();
17181 
17182   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
17183                                         LifetimeEnd->getOffset(), false);
17184 
17185   // We walk up the chains to find stores.
17186   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
17187   while (!Chains.empty()) {
17188     SDValue Chain = Chains.back();
17189     Chains.pop_back();
17190     if (!Chain.hasOneUse())
17191       continue;
17192     switch (Chain.getOpcode()) {
17193     case ISD::TokenFactor:
17194       for (unsigned Nops = Chain.getNumOperands(); Nops;)
17195         Chains.push_back(Chain.getOperand(--Nops));
17196       break;
17197     case ISD::LIFETIME_START:
17198     case ISD::LIFETIME_END:
17199       // We can forward past any lifetime start/end that can be proven not to
17200       // alias the node.
17201       if (!isAlias(Chain.getNode(), N))
17202         Chains.push_back(Chain.getOperand(0));
17203       break;
17204     case ISD::STORE: {
17205       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
17206       // TODO: Can relax for unordered atomics (see D66309)
17207       if (!ST->isSimple() || ST->isIndexed())
17208         continue;
17209       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
17210       // If we store purely within object bounds just before its lifetime ends,
17211       // we can remove the store.
17212       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
17213                                    ST->getMemoryVT().getStoreSizeInBits())) {
17214         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
17215                    dbgs() << "\nwithin LIFETIME_END of : ";
17216                    LifetimeEndBase.dump(); dbgs() << "\n");
17217         CombineTo(ST, ST->getChain());
17218         return SDValue(N, 0);
17219       }
17220     }
17221     }
17222   }
17223   return SDValue();
17224 }
17225 
17226 /// For the instruction sequence of store below, F and I values
17227 /// are bundled together as an i64 value before being stored into memory.
17228 /// Sometimes it is more efficent to generate separate stores for F and I,
17229 /// which can remove the bitwise instructions or sink them to colder places.
17230 ///
17231 ///   (store (or (zext (bitcast F to i32) to i64),
17232 ///              (shl (zext I to i64), 32)), addr)  -->
17233 ///   (store F, addr) and (store I, addr+4)
17234 ///
17235 /// Similarly, splitting for other merged store can also be beneficial, like:
17236 /// For pair of {i32, i32}, i64 store --> two i32 stores.
17237 /// For pair of {i32, i16}, i64 store --> two i32 stores.
17238 /// For pair of {i16, i16}, i32 store --> two i16 stores.
17239 /// For pair of {i16, i8},  i32 store --> two i16 stores.
17240 /// For pair of {i8, i8},   i16 store --> two i8 stores.
17241 ///
17242 /// We allow each target to determine specifically which kind of splitting is
17243 /// supported.
17244 ///
17245 /// The store patterns are commonly seen from the simple code snippet below
17246 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
17247 ///   void goo(const std::pair<int, float> &);
17248 ///   hoo() {
17249 ///     ...
17250 ///     goo(std::make_pair(tmp, ftmp));
17251 ///     ...
17252 ///   }
17253 ///
17254 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
17255   if (OptLevel == CodeGenOpt::None)
17256     return SDValue();
17257 
17258   // Can't change the number of memory accesses for a volatile store or break
17259   // atomicity for an atomic one.
17260   if (!ST->isSimple())
17261     return SDValue();
17262 
17263   SDValue Val = ST->getValue();
17264   SDLoc DL(ST);
17265 
17266   // Match OR operand.
17267   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
17268     return SDValue();
17269 
17270   // Match SHL operand and get Lower and Higher parts of Val.
17271   SDValue Op1 = Val.getOperand(0);
17272   SDValue Op2 = Val.getOperand(1);
17273   SDValue Lo, Hi;
17274   if (Op1.getOpcode() != ISD::SHL) {
17275     std::swap(Op1, Op2);
17276     if (Op1.getOpcode() != ISD::SHL)
17277       return SDValue();
17278   }
17279   Lo = Op2;
17280   Hi = Op1.getOperand(0);
17281   if (!Op1.hasOneUse())
17282     return SDValue();
17283 
17284   // Match shift amount to HalfValBitSize.
17285   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
17286   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
17287   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
17288     return SDValue();
17289 
17290   // Lo and Hi are zero-extended from int with size less equal than 32
17291   // to i64.
17292   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
17293       !Lo.getOperand(0).getValueType().isScalarInteger() ||
17294       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
17295       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
17296       !Hi.getOperand(0).getValueType().isScalarInteger() ||
17297       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
17298     return SDValue();
17299 
17300   // Use the EVT of low and high parts before bitcast as the input
17301   // of target query.
17302   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
17303                   ? Lo.getOperand(0).getValueType()
17304                   : Lo.getValueType();
17305   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
17306                    ? Hi.getOperand(0).getValueType()
17307                    : Hi.getValueType();
17308   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
17309     return SDValue();
17310 
17311   // Start to split store.
17312   unsigned Alignment = ST->getAlignment();
17313   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17314   AAMDNodes AAInfo = ST->getAAInfo();
17315 
17316   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
17317   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
17318   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
17319   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
17320 
17321   SDValue Chain = ST->getChain();
17322   SDValue Ptr = ST->getBasePtr();
17323   // Lower value store.
17324   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17325                              ST->getAlignment(), MMOFlags, AAInfo);
17326   Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
17327   // Higher value store.
17328   SDValue St1 =
17329       DAG.getStore(St0, DL, Hi, Ptr,
17330                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
17331                    Alignment / 2, MMOFlags, AAInfo);
17332   return St1;
17333 }
17334 
17335 /// Convert a disguised subvector insertion into a shuffle:
17336 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
17337   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
17338          "Expected extract_vector_elt");
17339   SDValue InsertVal = N->getOperand(1);
17340   SDValue Vec = N->getOperand(0);
17341 
17342   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
17343   // InsIndex)
17344   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
17345   //   CONCAT_VECTORS.
17346   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
17347       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17348       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
17349     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
17350     ArrayRef<int> Mask = SVN->getMask();
17351 
17352     SDValue X = Vec.getOperand(0);
17353     SDValue Y = Vec.getOperand(1);
17354 
17355     // Vec's operand 0 is using indices from 0 to N-1 and
17356     // operand 1 from N to 2N - 1, where N is the number of
17357     // elements in the vectors.
17358     SDValue InsertVal0 = InsertVal.getOperand(0);
17359     int ElementOffset = -1;
17360 
17361     // We explore the inputs of the shuffle in order to see if we find the
17362     // source of the extract_vector_elt. If so, we can use it to modify the
17363     // shuffle rather than perform an insert_vector_elt.
17364     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
17365     ArgWorkList.emplace_back(Mask.size(), Y);
17366     ArgWorkList.emplace_back(0, X);
17367 
17368     while (!ArgWorkList.empty()) {
17369       int ArgOffset;
17370       SDValue ArgVal;
17371       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
17372 
17373       if (ArgVal == InsertVal0) {
17374         ElementOffset = ArgOffset;
17375         break;
17376       }
17377 
17378       // Peek through concat_vector.
17379       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
17380         int CurrentArgOffset =
17381             ArgOffset + ArgVal.getValueType().getVectorNumElements();
17382         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
17383         for (SDValue Op : reverse(ArgVal->ops())) {
17384           CurrentArgOffset -= Step;
17385           ArgWorkList.emplace_back(CurrentArgOffset, Op);
17386         }
17387 
17388         // Make sure we went through all the elements and did not screw up index
17389         // computation.
17390         assert(CurrentArgOffset == ArgOffset);
17391       }
17392     }
17393 
17394     if (ElementOffset != -1) {
17395       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
17396 
17397       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
17398       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
17399       assert(NewMask[InsIndex] <
17400                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
17401              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
17402 
17403       SDValue LegalShuffle =
17404               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
17405                                           Y, NewMask, DAG);
17406       if (LegalShuffle)
17407         return LegalShuffle;
17408     }
17409   }
17410 
17411   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
17412   // bitcast(shuffle (bitcast V), (extended X), Mask)
17413   // Note: We do not use an insert_subvector node because that requires a
17414   // legal subvector type.
17415   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
17416       !InsertVal.getOperand(0).getValueType().isVector())
17417     return SDValue();
17418 
17419   SDValue SubVec = InsertVal.getOperand(0);
17420   SDValue DestVec = N->getOperand(0);
17421   EVT SubVecVT = SubVec.getValueType();
17422   EVT VT = DestVec.getValueType();
17423   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
17424   // If the source only has a single vector element, the cost of creating adding
17425   // it to a vector is likely to exceed the cost of a insert_vector_elt.
17426   if (NumSrcElts == 1)
17427     return SDValue();
17428   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
17429   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
17430 
17431   // Step 1: Create a shuffle mask that implements this insert operation. The
17432   // vector that we are inserting into will be operand 0 of the shuffle, so
17433   // those elements are just 'i'. The inserted subvector is in the first
17434   // positions of operand 1 of the shuffle. Example:
17435   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
17436   SmallVector<int, 16> Mask(NumMaskVals);
17437   for (unsigned i = 0; i != NumMaskVals; ++i) {
17438     if (i / NumSrcElts == InsIndex)
17439       Mask[i] = (i % NumSrcElts) + NumMaskVals;
17440     else
17441       Mask[i] = i;
17442   }
17443 
17444   // Bail out if the target can not handle the shuffle we want to create.
17445   EVT SubVecEltVT = SubVecVT.getVectorElementType();
17446   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
17447   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
17448     return SDValue();
17449 
17450   // Step 2: Create a wide vector from the inserted source vector by appending
17451   // undefined elements. This is the same size as our destination vector.
17452   SDLoc DL(N);
17453   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
17454   ConcatOps[0] = SubVec;
17455   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
17456 
17457   // Step 3: Shuffle in the padded subvector.
17458   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
17459   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
17460   AddToWorklist(PaddedSubV.getNode());
17461   AddToWorklist(DestVecBC.getNode());
17462   AddToWorklist(Shuf.getNode());
17463   return DAG.getBitcast(VT, Shuf);
17464 }
17465 
17466 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
17467   SDValue InVec = N->getOperand(0);
17468   SDValue InVal = N->getOperand(1);
17469   SDValue EltNo = N->getOperand(2);
17470   SDLoc DL(N);
17471 
17472   EVT VT = InVec.getValueType();
17473   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17474 
17475   // Insert into out-of-bounds element is undefined.
17476   if (IndexC && VT.isFixedLengthVector() &&
17477       IndexC->getZExtValue() >= VT.getVectorNumElements())
17478     return DAG.getUNDEF(VT);
17479 
17480   // Remove redundant insertions:
17481   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
17482   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17483       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
17484     return InVec;
17485 
17486   if (!IndexC) {
17487     // If this is variable insert to undef vector, it might be better to splat:
17488     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
17489     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
17490       if (VT.isScalableVector())
17491         return DAG.getSplatVector(VT, DL, InVal);
17492       else {
17493         SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
17494         return DAG.getBuildVector(VT, DL, Ops);
17495       }
17496     }
17497     return SDValue();
17498   }
17499 
17500   if (VT.isScalableVector())
17501     return SDValue();
17502 
17503   unsigned NumElts = VT.getVectorNumElements();
17504 
17505   // We must know which element is being inserted for folds below here.
17506   unsigned Elt = IndexC->getZExtValue();
17507   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
17508     return Shuf;
17509 
17510   // Canonicalize insert_vector_elt dag nodes.
17511   // Example:
17512   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
17513   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
17514   //
17515   // Do this only if the child insert_vector node has one use; also
17516   // do this only if indices are both constants and Idx1 < Idx0.
17517   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
17518       && isa<ConstantSDNode>(InVec.getOperand(2))) {
17519     unsigned OtherElt = InVec.getConstantOperandVal(2);
17520     if (Elt < OtherElt) {
17521       // Swap nodes.
17522       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
17523                                   InVec.getOperand(0), InVal, EltNo);
17524       AddToWorklist(NewOp.getNode());
17525       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
17526                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
17527     }
17528   }
17529 
17530   // If we can't generate a legal BUILD_VECTOR, exit
17531   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
17532     return SDValue();
17533 
17534   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
17535   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
17536   // vector elements.
17537   SmallVector<SDValue, 8> Ops;
17538   // Do not combine these two vectors if the output vector will not replace
17539   // the input vector.
17540   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
17541     Ops.append(InVec.getNode()->op_begin(),
17542                InVec.getNode()->op_end());
17543   } else if (InVec.isUndef()) {
17544     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
17545   } else {
17546     return SDValue();
17547   }
17548   assert(Ops.size() == NumElts && "Unexpected vector size");
17549 
17550   // Insert the element
17551   if (Elt < Ops.size()) {
17552     // All the operands of BUILD_VECTOR must have the same type;
17553     // we enforce that here.
17554     EVT OpVT = Ops[0].getValueType();
17555     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
17556   }
17557 
17558   // Return the new vector
17559   return DAG.getBuildVector(VT, DL, Ops);
17560 }
17561 
17562 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
17563                                                   SDValue EltNo,
17564                                                   LoadSDNode *OriginalLoad) {
17565   assert(OriginalLoad->isSimple());
17566 
17567   EVT ResultVT = EVE->getValueType(0);
17568   EVT VecEltVT = InVecVT.getVectorElementType();
17569   Align Alignment = OriginalLoad->getAlign();
17570   Align NewAlign = DAG.getDataLayout().getABITypeAlign(
17571       VecEltVT.getTypeForEVT(*DAG.getContext()));
17572 
17573   if (NewAlign > Alignment ||
17574       !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
17575     return SDValue();
17576 
17577   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
17578     ISD::NON_EXTLOAD : ISD::EXTLOAD;
17579   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
17580     return SDValue();
17581 
17582   Alignment = NewAlign;
17583 
17584   SDValue NewPtr = OriginalLoad->getBasePtr();
17585   SDValue Offset;
17586   EVT PtrType = NewPtr.getValueType();
17587   MachinePointerInfo MPI;
17588   SDLoc DL(EVE);
17589   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
17590     int Elt = ConstEltNo->getZExtValue();
17591     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
17592     Offset = DAG.getConstant(PtrOff, DL, PtrType);
17593     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
17594   } else {
17595     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
17596     Offset = DAG.getNode(
17597         ISD::MUL, DL, PtrType, Offset,
17598         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
17599     // Discard the pointer info except the address space because the memory
17600     // operand can't represent this new access since the offset is variable.
17601     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
17602   }
17603   NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
17604 
17605   // The replacement we need to do here is a little tricky: we need to
17606   // replace an extractelement of a load with a load.
17607   // Use ReplaceAllUsesOfValuesWith to do the replacement.
17608   // Note that this replacement assumes that the extractvalue is the only
17609   // use of the load; that's okay because we don't want to perform this
17610   // transformation in other cases anyway.
17611   SDValue Load;
17612   SDValue Chain;
17613   if (ResultVT.bitsGT(VecEltVT)) {
17614     // If the result type of vextract is wider than the load, then issue an
17615     // extending load instead.
17616     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
17617                                                   VecEltVT)
17618                                    ? ISD::ZEXTLOAD
17619                                    : ISD::EXTLOAD;
17620     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
17621                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
17622                           Alignment, OriginalLoad->getMemOperand()->getFlags(),
17623                           OriginalLoad->getAAInfo());
17624     Chain = Load.getValue(1);
17625   } else {
17626     Load = DAG.getLoad(
17627         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
17628         OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
17629     Chain = Load.getValue(1);
17630     if (ResultVT.bitsLT(VecEltVT))
17631       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
17632     else
17633       Load = DAG.getBitcast(ResultVT, Load);
17634   }
17635   WorklistRemover DeadNodes(*this);
17636   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
17637   SDValue To[] = { Load, Chain };
17638   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
17639   // Make sure to revisit this node to clean it up; it will usually be dead.
17640   AddToWorklist(EVE);
17641   // Since we're explicitly calling ReplaceAllUses, add the new node to the
17642   // worklist explicitly as well.
17643   AddToWorklistWithUsers(Load.getNode());
17644   ++OpsNarrowed;
17645   return SDValue(EVE, 0);
17646 }
17647 
17648 /// Transform a vector binary operation into a scalar binary operation by moving
17649 /// the math/logic after an extract element of a vector.
17650 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
17651                                        bool LegalOperations) {
17652   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17653   SDValue Vec = ExtElt->getOperand(0);
17654   SDValue Index = ExtElt->getOperand(1);
17655   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17656   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
17657       Vec.getNode()->getNumValues() != 1)
17658     return SDValue();
17659 
17660   // Targets may want to avoid this to prevent an expensive register transfer.
17661   if (!TLI.shouldScalarizeBinop(Vec))
17662     return SDValue();
17663 
17664   // Extracting an element of a vector constant is constant-folded, so this
17665   // transform is just replacing a vector op with a scalar op while moving the
17666   // extract.
17667   SDValue Op0 = Vec.getOperand(0);
17668   SDValue Op1 = Vec.getOperand(1);
17669   if (isAnyConstantBuildVector(Op0, true) ||
17670       isAnyConstantBuildVector(Op1, true)) {
17671     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
17672     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
17673     SDLoc DL(ExtElt);
17674     EVT VT = ExtElt->getValueType(0);
17675     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
17676     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
17677     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
17678   }
17679 
17680   return SDValue();
17681 }
17682 
17683 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
17684   SDValue VecOp = N->getOperand(0);
17685   SDValue Index = N->getOperand(1);
17686   EVT ScalarVT = N->getValueType(0);
17687   EVT VecVT = VecOp.getValueType();
17688   if (VecOp.isUndef())
17689     return DAG.getUNDEF(ScalarVT);
17690 
17691   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
17692   //
17693   // This only really matters if the index is non-constant since other combines
17694   // on the constant elements already work.
17695   SDLoc DL(N);
17696   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
17697       Index == VecOp.getOperand(2)) {
17698     SDValue Elt = VecOp.getOperand(1);
17699     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
17700   }
17701 
17702   // (vextract (scalar_to_vector val, 0) -> val
17703   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17704     // Only 0'th element of SCALAR_TO_VECTOR is defined.
17705     if (DAG.isKnownNeverZero(Index))
17706       return DAG.getUNDEF(ScalarVT);
17707 
17708     // Check if the result type doesn't match the inserted element type. A
17709     // SCALAR_TO_VECTOR may truncate the inserted element and the
17710     // EXTRACT_VECTOR_ELT may widen the extracted vector.
17711     SDValue InOp = VecOp.getOperand(0);
17712     if (InOp.getValueType() != ScalarVT) {
17713       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
17714       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17715     }
17716     return InOp;
17717   }
17718 
17719   // extract_vector_elt of out-of-bounds element -> UNDEF
17720   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17721   if (IndexC && VecVT.isFixedLengthVector() &&
17722       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
17723     return DAG.getUNDEF(ScalarVT);
17724 
17725   // extract_vector_elt (build_vector x, y), 1 -> y
17726   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
17727        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
17728       TLI.isTypeLegal(VecVT) &&
17729       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
17730     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
17731             VecVT.isFixedLengthVector()) &&
17732            "BUILD_VECTOR used for scalable vectors");
17733     unsigned IndexVal =
17734         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
17735     SDValue Elt = VecOp.getOperand(IndexVal);
17736     EVT InEltVT = Elt.getValueType();
17737 
17738     // Sometimes build_vector's scalar input types do not match result type.
17739     if (ScalarVT == InEltVT)
17740       return Elt;
17741 
17742     // TODO: It may be useful to truncate if free if the build_vector implicitly
17743     // converts.
17744   }
17745 
17746   if (VecVT.isScalableVector())
17747     return SDValue();
17748 
17749   // All the code from this point onwards assumes fixed width vectors, but it's
17750   // possible that some of the combinations could be made to work for scalable
17751   // vectors too.
17752   unsigned NumElts = VecVT.getVectorNumElements();
17753   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
17754 
17755   // TODO: These transforms should not require the 'hasOneUse' restriction, but
17756   // there are regressions on multiple targets without it. We can end up with a
17757   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
17758   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
17759       VecOp.hasOneUse()) {
17760     // The vector index of the LSBs of the source depend on the endian-ness.
17761     bool IsLE = DAG.getDataLayout().isLittleEndian();
17762     unsigned ExtractIndex = IndexC->getZExtValue();
17763     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
17764     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
17765     SDValue BCSrc = VecOp.getOperand(0);
17766     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
17767       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
17768 
17769     if (LegalTypes && BCSrc.getValueType().isInteger() &&
17770         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17771       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
17772       // trunc i64 X to i32
17773       SDValue X = BCSrc.getOperand(0);
17774       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
17775              "Extract element and scalar to vector can't change element type "
17776              "from FP to integer.");
17777       unsigned XBitWidth = X.getValueSizeInBits();
17778       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
17779 
17780       // An extract element return value type can be wider than its vector
17781       // operand element type. In that case, the high bits are undefined, so
17782       // it's possible that we may need to extend rather than truncate.
17783       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
17784         assert(XBitWidth % VecEltBitWidth == 0 &&
17785                "Scalar bitwidth must be a multiple of vector element bitwidth");
17786         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
17787       }
17788     }
17789   }
17790 
17791   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
17792     return BO;
17793 
17794   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
17795   // We only perform this optimization before the op legalization phase because
17796   // we may introduce new vector instructions which are not backed by TD
17797   // patterns. For example on AVX, extracting elements from a wide vector
17798   // without using extract_subvector. However, if we can find an underlying
17799   // scalar value, then we can always use that.
17800   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
17801     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
17802     // Find the new index to extract from.
17803     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
17804 
17805     // Extracting an undef index is undef.
17806     if (OrigElt == -1)
17807       return DAG.getUNDEF(ScalarVT);
17808 
17809     // Select the right vector half to extract from.
17810     SDValue SVInVec;
17811     if (OrigElt < (int)NumElts) {
17812       SVInVec = VecOp.getOperand(0);
17813     } else {
17814       SVInVec = VecOp.getOperand(1);
17815       OrigElt -= NumElts;
17816     }
17817 
17818     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
17819       SDValue InOp = SVInVec.getOperand(OrigElt);
17820       if (InOp.getValueType() != ScalarVT) {
17821         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
17822         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17823       }
17824 
17825       return InOp;
17826     }
17827 
17828     // FIXME: We should handle recursing on other vector shuffles and
17829     // scalar_to_vector here as well.
17830 
17831     if (!LegalOperations ||
17832         // FIXME: Should really be just isOperationLegalOrCustom.
17833         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
17834         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
17835       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
17836                          DAG.getVectorIdxConstant(OrigElt, DL));
17837     }
17838   }
17839 
17840   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
17841   // simplify it based on the (valid) extraction indices.
17842   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
17843         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17844                Use->getOperand(0) == VecOp &&
17845                isa<ConstantSDNode>(Use->getOperand(1));
17846       })) {
17847     APInt DemandedElts = APInt::getNullValue(NumElts);
17848     for (SDNode *Use : VecOp->uses()) {
17849       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
17850       if (CstElt->getAPIntValue().ult(NumElts))
17851         DemandedElts.setBit(CstElt->getZExtValue());
17852     }
17853     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
17854       // We simplified the vector operand of this extract element. If this
17855       // extract is not dead, visit it again so it is folded properly.
17856       if (N->getOpcode() != ISD::DELETED_NODE)
17857         AddToWorklist(N);
17858       return SDValue(N, 0);
17859     }
17860     APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
17861     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
17862       // We simplified the vector operand of this extract element. If this
17863       // extract is not dead, visit it again so it is folded properly.
17864       if (N->getOpcode() != ISD::DELETED_NODE)
17865         AddToWorklist(N);
17866       return SDValue(N, 0);
17867     }
17868   }
17869 
17870   // Everything under here is trying to match an extract of a loaded value.
17871   // If the result of load has to be truncated, then it's not necessarily
17872   // profitable.
17873   bool BCNumEltsChanged = false;
17874   EVT ExtVT = VecVT.getVectorElementType();
17875   EVT LVT = ExtVT;
17876   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
17877     return SDValue();
17878 
17879   if (VecOp.getOpcode() == ISD::BITCAST) {
17880     // Don't duplicate a load with other uses.
17881     if (!VecOp.hasOneUse())
17882       return SDValue();
17883 
17884     EVT BCVT = VecOp.getOperand(0).getValueType();
17885     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
17886       return SDValue();
17887     if (NumElts != BCVT.getVectorNumElements())
17888       BCNumEltsChanged = true;
17889     VecOp = VecOp.getOperand(0);
17890     ExtVT = BCVT.getVectorElementType();
17891   }
17892 
17893   // extract (vector load $addr), i --> load $addr + i * size
17894   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
17895       ISD::isNormalLoad(VecOp.getNode()) &&
17896       !Index->hasPredecessor(VecOp.getNode())) {
17897     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
17898     if (VecLoad && VecLoad->isSimple())
17899       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
17900   }
17901 
17902   // Perform only after legalization to ensure build_vector / vector_shuffle
17903   // optimizations have already been done.
17904   if (!LegalOperations || !IndexC)
17905     return SDValue();
17906 
17907   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
17908   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
17909   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
17910   int Elt = IndexC->getZExtValue();
17911   LoadSDNode *LN0 = nullptr;
17912   if (ISD::isNormalLoad(VecOp.getNode())) {
17913     LN0 = cast<LoadSDNode>(VecOp);
17914   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17915              VecOp.getOperand(0).getValueType() == ExtVT &&
17916              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
17917     // Don't duplicate a load with other uses.
17918     if (!VecOp.hasOneUse())
17919       return SDValue();
17920 
17921     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
17922   }
17923   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
17924     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
17925     // =>
17926     // (load $addr+1*size)
17927 
17928     // Don't duplicate a load with other uses.
17929     if (!VecOp.hasOneUse())
17930       return SDValue();
17931 
17932     // If the bit convert changed the number of elements, it is unsafe
17933     // to examine the mask.
17934     if (BCNumEltsChanged)
17935       return SDValue();
17936 
17937     // Select the input vector, guarding against out of range extract vector.
17938     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
17939     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
17940 
17941     if (VecOp.getOpcode() == ISD::BITCAST) {
17942       // Don't duplicate a load with other uses.
17943       if (!VecOp.hasOneUse())
17944         return SDValue();
17945 
17946       VecOp = VecOp.getOperand(0);
17947     }
17948     if (ISD::isNormalLoad(VecOp.getNode())) {
17949       LN0 = cast<LoadSDNode>(VecOp);
17950       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
17951       Index = DAG.getConstant(Elt, DL, Index.getValueType());
17952     }
17953   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
17954              VecVT.getVectorElementType() == ScalarVT &&
17955              (!LegalTypes ||
17956               TLI.isTypeLegal(
17957                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
17958     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
17959     //      -> extract_vector_elt a, 0
17960     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
17961     //      -> extract_vector_elt a, 1
17962     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
17963     //      -> extract_vector_elt b, 0
17964     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
17965     //      -> extract_vector_elt b, 1
17966     SDLoc SL(N);
17967     EVT ConcatVT = VecOp.getOperand(0).getValueType();
17968     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17969     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
17970                                      Index.getValueType());
17971 
17972     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
17973     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
17974                               ConcatVT.getVectorElementType(),
17975                               ConcatOp, NewIdx);
17976     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
17977   }
17978 
17979   // Make sure we found a non-volatile load and the extractelement is
17980   // the only use.
17981   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
17982     return SDValue();
17983 
17984   // If Idx was -1 above, Elt is going to be -1, so just return undef.
17985   if (Elt == -1)
17986     return DAG.getUNDEF(LVT);
17987 
17988   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
17989 }
17990 
17991 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
17992 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
17993   // We perform this optimization post type-legalization because
17994   // the type-legalizer often scalarizes integer-promoted vectors.
17995   // Performing this optimization before may create bit-casts which
17996   // will be type-legalized to complex code sequences.
17997   // We perform this optimization only before the operation legalizer because we
17998   // may introduce illegal operations.
17999   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
18000     return SDValue();
18001 
18002   unsigned NumInScalars = N->getNumOperands();
18003   SDLoc DL(N);
18004   EVT VT = N->getValueType(0);
18005 
18006   // Check to see if this is a BUILD_VECTOR of a bunch of values
18007   // which come from any_extend or zero_extend nodes. If so, we can create
18008   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
18009   // optimizations. We do not handle sign-extend because we can't fill the sign
18010   // using shuffles.
18011   EVT SourceType = MVT::Other;
18012   bool AllAnyExt = true;
18013 
18014   for (unsigned i = 0; i != NumInScalars; ++i) {
18015     SDValue In = N->getOperand(i);
18016     // Ignore undef inputs.
18017     if (In.isUndef()) continue;
18018 
18019     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
18020     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
18021 
18022     // Abort if the element is not an extension.
18023     if (!ZeroExt && !AnyExt) {
18024       SourceType = MVT::Other;
18025       break;
18026     }
18027 
18028     // The input is a ZeroExt or AnyExt. Check the original type.
18029     EVT InTy = In.getOperand(0).getValueType();
18030 
18031     // Check that all of the widened source types are the same.
18032     if (SourceType == MVT::Other)
18033       // First time.
18034       SourceType = InTy;
18035     else if (InTy != SourceType) {
18036       // Multiple income types. Abort.
18037       SourceType = MVT::Other;
18038       break;
18039     }
18040 
18041     // Check if all of the extends are ANY_EXTENDs.
18042     AllAnyExt &= AnyExt;
18043   }
18044 
18045   // In order to have valid types, all of the inputs must be extended from the
18046   // same source type and all of the inputs must be any or zero extend.
18047   // Scalar sizes must be a power of two.
18048   EVT OutScalarTy = VT.getScalarType();
18049   bool ValidTypes = SourceType != MVT::Other &&
18050                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
18051                  isPowerOf2_32(SourceType.getSizeInBits());
18052 
18053   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
18054   // turn into a single shuffle instruction.
18055   if (!ValidTypes)
18056     return SDValue();
18057 
18058   // If we already have a splat buildvector, then don't fold it if it means
18059   // introducing zeros.
18060   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
18061     return SDValue();
18062 
18063   bool isLE = DAG.getDataLayout().isLittleEndian();
18064   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
18065   assert(ElemRatio > 1 && "Invalid element size ratio");
18066   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
18067                                DAG.getConstant(0, DL, SourceType);
18068 
18069   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
18070   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
18071 
18072   // Populate the new build_vector
18073   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18074     SDValue Cast = N->getOperand(i);
18075     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
18076             Cast.getOpcode() == ISD::ZERO_EXTEND ||
18077             Cast.isUndef()) && "Invalid cast opcode");
18078     SDValue In;
18079     if (Cast.isUndef())
18080       In = DAG.getUNDEF(SourceType);
18081     else
18082       In = Cast->getOperand(0);
18083     unsigned Index = isLE ? (i * ElemRatio) :
18084                             (i * ElemRatio + (ElemRatio - 1));
18085 
18086     assert(Index < Ops.size() && "Invalid index");
18087     Ops[Index] = In;
18088   }
18089 
18090   // The type of the new BUILD_VECTOR node.
18091   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
18092   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
18093          "Invalid vector size");
18094   // Check if the new vector type is legal.
18095   if (!isTypeLegal(VecVT) ||
18096       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
18097        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
18098     return SDValue();
18099 
18100   // Make the new BUILD_VECTOR.
18101   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
18102 
18103   // The new BUILD_VECTOR node has the potential to be further optimized.
18104   AddToWorklist(BV.getNode());
18105   // Bitcast to the desired type.
18106   return DAG.getBitcast(VT, BV);
18107 }
18108 
18109 // Simplify (build_vec (trunc $1)
18110 //                     (trunc (srl $1 half-width))
18111 //                     (trunc (srl $1 (2 * half-width))) …)
18112 // to (bitcast $1)
18113 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
18114   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18115 
18116   // Only for little endian
18117   if (!DAG.getDataLayout().isLittleEndian())
18118     return SDValue();
18119 
18120   SDLoc DL(N);
18121   EVT VT = N->getValueType(0);
18122   EVT OutScalarTy = VT.getScalarType();
18123   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
18124 
18125   // Only for power of two types to be sure that bitcast works well
18126   if (!isPowerOf2_64(ScalarTypeBitsize))
18127     return SDValue();
18128 
18129   unsigned NumInScalars = N->getNumOperands();
18130 
18131   // Look through bitcasts
18132   auto PeekThroughBitcast = [](SDValue Op) {
18133     if (Op.getOpcode() == ISD::BITCAST)
18134       return Op.getOperand(0);
18135     return Op;
18136   };
18137 
18138   // The source value where all the parts are extracted.
18139   SDValue Src;
18140   for (unsigned i = 0; i != NumInScalars; ++i) {
18141     SDValue In = PeekThroughBitcast(N->getOperand(i));
18142     // Ignore undef inputs.
18143     if (In.isUndef()) continue;
18144 
18145     if (In.getOpcode() != ISD::TRUNCATE)
18146       return SDValue();
18147 
18148     In = PeekThroughBitcast(In.getOperand(0));
18149 
18150     if (In.getOpcode() != ISD::SRL) {
18151       // For now only build_vec without shuffling, handle shifts here in the
18152       // future.
18153       if (i != 0)
18154         return SDValue();
18155 
18156       Src = In;
18157     } else {
18158       // In is SRL
18159       SDValue part = PeekThroughBitcast(In.getOperand(0));
18160 
18161       if (!Src) {
18162         Src = part;
18163       } else if (Src != part) {
18164         // Vector parts do not stem from the same variable
18165         return SDValue();
18166       }
18167 
18168       SDValue ShiftAmtVal = In.getOperand(1);
18169       if (!isa<ConstantSDNode>(ShiftAmtVal))
18170         return SDValue();
18171 
18172       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
18173 
18174       // The extracted value is not extracted at the right position
18175       if (ShiftAmt != i * ScalarTypeBitsize)
18176         return SDValue();
18177     }
18178   }
18179 
18180   // Only cast if the size is the same
18181   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
18182     return SDValue();
18183 
18184   return DAG.getBitcast(VT, Src);
18185 }
18186 
18187 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
18188                                            ArrayRef<int> VectorMask,
18189                                            SDValue VecIn1, SDValue VecIn2,
18190                                            unsigned LeftIdx, bool DidSplitVec) {
18191   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18192 
18193   EVT VT = N->getValueType(0);
18194   EVT InVT1 = VecIn1.getValueType();
18195   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
18196 
18197   unsigned NumElems = VT.getVectorNumElements();
18198   unsigned ShuffleNumElems = NumElems;
18199 
18200   // If we artificially split a vector in two already, then the offsets in the
18201   // operands will all be based off of VecIn1, even those in VecIn2.
18202   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
18203 
18204   // We can't generate a shuffle node with mismatched input and output types.
18205   // Try to make the types match the type of the output.
18206   if (InVT1 != VT || InVT2 != VT) {
18207     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
18208       // If the output vector length is a multiple of both input lengths,
18209       // we can concatenate them and pad the rest with undefs.
18210       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
18211       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
18212       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
18213       ConcatOps[0] = VecIn1;
18214       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
18215       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18216       VecIn2 = SDValue();
18217     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
18218       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
18219         return SDValue();
18220 
18221       if (!VecIn2.getNode()) {
18222         // If we only have one input vector, and it's twice the size of the
18223         // output, split it in two.
18224         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
18225                              DAG.getVectorIdxConstant(NumElems, DL));
18226         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
18227         // Since we now have shorter input vectors, adjust the offset of the
18228         // second vector's start.
18229         Vec2Offset = NumElems;
18230       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
18231         // VecIn1 is wider than the output, and we have another, possibly
18232         // smaller input. Pad the smaller input with undefs, shuffle at the
18233         // input vector width, and extract the output.
18234         // The shuffle type is different than VT, so check legality again.
18235         if (LegalOperations &&
18236             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
18237           return SDValue();
18238 
18239         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
18240         // lower it back into a BUILD_VECTOR. So if the inserted type is
18241         // illegal, don't even try.
18242         if (InVT1 != InVT2) {
18243           if (!TLI.isTypeLegal(InVT2))
18244             return SDValue();
18245           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
18246                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
18247         }
18248         ShuffleNumElems = NumElems * 2;
18249       } else {
18250         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
18251         // than VecIn1. We can't handle this for now - this case will disappear
18252         // when we start sorting the vectors by type.
18253         return SDValue();
18254       }
18255     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
18256                InVT1.getSizeInBits() == VT.getSizeInBits()) {
18257       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
18258       ConcatOps[0] = VecIn2;
18259       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18260     } else {
18261       // TODO: Support cases where the length mismatch isn't exactly by a
18262       // factor of 2.
18263       // TODO: Move this check upwards, so that if we have bad type
18264       // mismatches, we don't create any DAG nodes.
18265       return SDValue();
18266     }
18267   }
18268 
18269   // Initialize mask to undef.
18270   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
18271 
18272   // Only need to run up to the number of elements actually used, not the
18273   // total number of elements in the shuffle - if we are shuffling a wider
18274   // vector, the high lanes should be set to undef.
18275   for (unsigned i = 0; i != NumElems; ++i) {
18276     if (VectorMask[i] <= 0)
18277       continue;
18278 
18279     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
18280     if (VectorMask[i] == (int)LeftIdx) {
18281       Mask[i] = ExtIndex;
18282     } else if (VectorMask[i] == (int)LeftIdx + 1) {
18283       Mask[i] = Vec2Offset + ExtIndex;
18284     }
18285   }
18286 
18287   // The type the input vectors may have changed above.
18288   InVT1 = VecIn1.getValueType();
18289 
18290   // If we already have a VecIn2, it should have the same type as VecIn1.
18291   // If we don't, get an undef/zero vector of the appropriate type.
18292   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
18293   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
18294 
18295   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
18296   if (ShuffleNumElems > NumElems)
18297     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
18298 
18299   return Shuffle;
18300 }
18301 
18302 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
18303   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18304 
18305   // First, determine where the build vector is not undef.
18306   // TODO: We could extend this to handle zero elements as well as undefs.
18307   int NumBVOps = BV->getNumOperands();
18308   int ZextElt = -1;
18309   for (int i = 0; i != NumBVOps; ++i) {
18310     SDValue Op = BV->getOperand(i);
18311     if (Op.isUndef())
18312       continue;
18313     if (ZextElt == -1)
18314       ZextElt = i;
18315     else
18316       return SDValue();
18317   }
18318   // Bail out if there's no non-undef element.
18319   if (ZextElt == -1)
18320     return SDValue();
18321 
18322   // The build vector contains some number of undef elements and exactly
18323   // one other element. That other element must be a zero-extended scalar
18324   // extracted from a vector at a constant index to turn this into a shuffle.
18325   // Also, require that the build vector does not implicitly truncate/extend
18326   // its elements.
18327   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
18328   EVT VT = BV->getValueType(0);
18329   SDValue Zext = BV->getOperand(ZextElt);
18330   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
18331       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18332       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
18333       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
18334     return SDValue();
18335 
18336   // The zero-extend must be a multiple of the source size, and we must be
18337   // building a vector of the same size as the source of the extract element.
18338   SDValue Extract = Zext.getOperand(0);
18339   unsigned DestSize = Zext.getValueSizeInBits();
18340   unsigned SrcSize = Extract.getValueSizeInBits();
18341   if (DestSize % SrcSize != 0 ||
18342       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
18343     return SDValue();
18344 
18345   // Create a shuffle mask that will combine the extracted element with zeros
18346   // and undefs.
18347   int ZextRatio = DestSize / SrcSize;
18348   int NumMaskElts = NumBVOps * ZextRatio;
18349   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
18350   for (int i = 0; i != NumMaskElts; ++i) {
18351     if (i / ZextRatio == ZextElt) {
18352       // The low bits of the (potentially translated) extracted element map to
18353       // the source vector. The high bits map to zero. We will use a zero vector
18354       // as the 2nd source operand of the shuffle, so use the 1st element of
18355       // that vector (mask value is number-of-elements) for the high bits.
18356       if (i % ZextRatio == 0)
18357         ShufMask[i] = Extract.getConstantOperandVal(1);
18358       else
18359         ShufMask[i] = NumMaskElts;
18360     }
18361 
18362     // Undef elements of the build vector remain undef because we initialize
18363     // the shuffle mask with -1.
18364   }
18365 
18366   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
18367   // bitcast (shuffle V, ZeroVec, VectorMask)
18368   SDLoc DL(BV);
18369   EVT VecVT = Extract.getOperand(0).getValueType();
18370   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
18371   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18372   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
18373                                              ZeroVec, ShufMask, DAG);
18374   if (!Shuf)
18375     return SDValue();
18376   return DAG.getBitcast(VT, Shuf);
18377 }
18378 
18379 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
18380 // operations. If the types of the vectors we're extracting from allow it,
18381 // turn this into a vector_shuffle node.
18382 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
18383   SDLoc DL(N);
18384   EVT VT = N->getValueType(0);
18385 
18386   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
18387   if (!isTypeLegal(VT))
18388     return SDValue();
18389 
18390   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
18391     return V;
18392 
18393   // May only combine to shuffle after legalize if shuffle is legal.
18394   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
18395     return SDValue();
18396 
18397   bool UsesZeroVector = false;
18398   unsigned NumElems = N->getNumOperands();
18399 
18400   // Record, for each element of the newly built vector, which input vector
18401   // that element comes from. -1 stands for undef, 0 for the zero vector,
18402   // and positive values for the input vectors.
18403   // VectorMask maps each element to its vector number, and VecIn maps vector
18404   // numbers to their initial SDValues.
18405 
18406   SmallVector<int, 8> VectorMask(NumElems, -1);
18407   SmallVector<SDValue, 8> VecIn;
18408   VecIn.push_back(SDValue());
18409 
18410   for (unsigned i = 0; i != NumElems; ++i) {
18411     SDValue Op = N->getOperand(i);
18412 
18413     if (Op.isUndef())
18414       continue;
18415 
18416     // See if we can use a blend with a zero vector.
18417     // TODO: Should we generalize this to a blend with an arbitrary constant
18418     // vector?
18419     if (isNullConstant(Op) || isNullFPConstant(Op)) {
18420       UsesZeroVector = true;
18421       VectorMask[i] = 0;
18422       continue;
18423     }
18424 
18425     // Not an undef or zero. If the input is something other than an
18426     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
18427     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18428         !isa<ConstantSDNode>(Op.getOperand(1)))
18429       return SDValue();
18430     SDValue ExtractedFromVec = Op.getOperand(0);
18431 
18432     if (ExtractedFromVec.getValueType().isScalableVector())
18433       return SDValue();
18434 
18435     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
18436     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
18437       return SDValue();
18438 
18439     // All inputs must have the same element type as the output.
18440     if (VT.getVectorElementType() !=
18441         ExtractedFromVec.getValueType().getVectorElementType())
18442       return SDValue();
18443 
18444     // Have we seen this input vector before?
18445     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
18446     // a map back from SDValues to numbers isn't worth it.
18447     unsigned Idx = std::distance(
18448         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
18449     if (Idx == VecIn.size())
18450       VecIn.push_back(ExtractedFromVec);
18451 
18452     VectorMask[i] = Idx;
18453   }
18454 
18455   // If we didn't find at least one input vector, bail out.
18456   if (VecIn.size() < 2)
18457     return SDValue();
18458 
18459   // If all the Operands of BUILD_VECTOR extract from same
18460   // vector, then split the vector efficiently based on the maximum
18461   // vector access index and adjust the VectorMask and
18462   // VecIn accordingly.
18463   bool DidSplitVec = false;
18464   if (VecIn.size() == 2) {
18465     unsigned MaxIndex = 0;
18466     unsigned NearestPow2 = 0;
18467     SDValue Vec = VecIn.back();
18468     EVT InVT = Vec.getValueType();
18469     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
18470 
18471     for (unsigned i = 0; i < NumElems; i++) {
18472       if (VectorMask[i] <= 0)
18473         continue;
18474       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
18475       IndexVec[i] = Index;
18476       MaxIndex = std::max(MaxIndex, Index);
18477     }
18478 
18479     NearestPow2 = PowerOf2Ceil(MaxIndex);
18480     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
18481         NumElems * 2 < NearestPow2) {
18482       unsigned SplitSize = NearestPow2 / 2;
18483       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
18484                                      InVT.getVectorElementType(), SplitSize);
18485       if (TLI.isTypeLegal(SplitVT)) {
18486         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18487                                      DAG.getVectorIdxConstant(SplitSize, DL));
18488         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18489                                      DAG.getVectorIdxConstant(0, DL));
18490         VecIn.pop_back();
18491         VecIn.push_back(VecIn1);
18492         VecIn.push_back(VecIn2);
18493         DidSplitVec = true;
18494 
18495         for (unsigned i = 0; i < NumElems; i++) {
18496           if (VectorMask[i] <= 0)
18497             continue;
18498           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
18499         }
18500       }
18501     }
18502   }
18503 
18504   // TODO: We want to sort the vectors by descending length, so that adjacent
18505   // pairs have similar length, and the longer vector is always first in the
18506   // pair.
18507 
18508   // TODO: Should this fire if some of the input vectors has illegal type (like
18509   // it does now), or should we let legalization run its course first?
18510 
18511   // Shuffle phase:
18512   // Take pairs of vectors, and shuffle them so that the result has elements
18513   // from these vectors in the correct places.
18514   // For example, given:
18515   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
18516   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
18517   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
18518   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
18519   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
18520   // We will generate:
18521   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
18522   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
18523   SmallVector<SDValue, 4> Shuffles;
18524   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
18525     unsigned LeftIdx = 2 * In + 1;
18526     SDValue VecLeft = VecIn[LeftIdx];
18527     SDValue VecRight =
18528         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
18529 
18530     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
18531                                                 VecRight, LeftIdx, DidSplitVec))
18532       Shuffles.push_back(Shuffle);
18533     else
18534       return SDValue();
18535   }
18536 
18537   // If we need the zero vector as an "ingredient" in the blend tree, add it
18538   // to the list of shuffles.
18539   if (UsesZeroVector)
18540     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
18541                                       : DAG.getConstantFP(0.0, DL, VT));
18542 
18543   // If we only have one shuffle, we're done.
18544   if (Shuffles.size() == 1)
18545     return Shuffles[0];
18546 
18547   // Update the vector mask to point to the post-shuffle vectors.
18548   for (int &Vec : VectorMask)
18549     if (Vec == 0)
18550       Vec = Shuffles.size() - 1;
18551     else
18552       Vec = (Vec - 1) / 2;
18553 
18554   // More than one shuffle. Generate a binary tree of blends, e.g. if from
18555   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
18556   // generate:
18557   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
18558   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
18559   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
18560   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
18561   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
18562   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
18563   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
18564 
18565   // Make sure the initial size of the shuffle list is even.
18566   if (Shuffles.size() % 2)
18567     Shuffles.push_back(DAG.getUNDEF(VT));
18568 
18569   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
18570     if (CurSize % 2) {
18571       Shuffles[CurSize] = DAG.getUNDEF(VT);
18572       CurSize++;
18573     }
18574     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
18575       int Left = 2 * In;
18576       int Right = 2 * In + 1;
18577       SmallVector<int, 8> Mask(NumElems, -1);
18578       for (unsigned i = 0; i != NumElems; ++i) {
18579         if (VectorMask[i] == Left) {
18580           Mask[i] = i;
18581           VectorMask[i] = In;
18582         } else if (VectorMask[i] == Right) {
18583           Mask[i] = i + NumElems;
18584           VectorMask[i] = In;
18585         }
18586       }
18587 
18588       Shuffles[In] =
18589           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
18590     }
18591   }
18592   return Shuffles[0];
18593 }
18594 
18595 // Try to turn a build vector of zero extends of extract vector elts into a
18596 // a vector zero extend and possibly an extract subvector.
18597 // TODO: Support sign extend?
18598 // TODO: Allow undef elements?
18599 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
18600   if (LegalOperations)
18601     return SDValue();
18602 
18603   EVT VT = N->getValueType(0);
18604 
18605   bool FoundZeroExtend = false;
18606   SDValue Op0 = N->getOperand(0);
18607   auto checkElem = [&](SDValue Op) -> int64_t {
18608     unsigned Opc = Op.getOpcode();
18609     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
18610     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
18611         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18612         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
18613       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
18614         return C->getZExtValue();
18615     return -1;
18616   };
18617 
18618   // Make sure the first element matches
18619   // (zext (extract_vector_elt X, C))
18620   int64_t Offset = checkElem(Op0);
18621   if (Offset < 0)
18622     return SDValue();
18623 
18624   unsigned NumElems = N->getNumOperands();
18625   SDValue In = Op0.getOperand(0).getOperand(0);
18626   EVT InSVT = In.getValueType().getScalarType();
18627   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
18628 
18629   // Don't create an illegal input type after type legalization.
18630   if (LegalTypes && !TLI.isTypeLegal(InVT))
18631     return SDValue();
18632 
18633   // Ensure all the elements come from the same vector and are adjacent.
18634   for (unsigned i = 1; i != NumElems; ++i) {
18635     if ((Offset + i) != checkElem(N->getOperand(i)))
18636       return SDValue();
18637   }
18638 
18639   SDLoc DL(N);
18640   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
18641                    Op0.getOperand(0).getOperand(1));
18642   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
18643                      VT, In);
18644 }
18645 
18646 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
18647   EVT VT = N->getValueType(0);
18648 
18649   // A vector built entirely of undefs is undef.
18650   if (ISD::allOperandsUndef(N))
18651     return DAG.getUNDEF(VT);
18652 
18653   // If this is a splat of a bitcast from another vector, change to a
18654   // concat_vector.
18655   // For example:
18656   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
18657   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
18658   //
18659   // If X is a build_vector itself, the concat can become a larger build_vector.
18660   // TODO: Maybe this is useful for non-splat too?
18661   if (!LegalOperations) {
18662     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18663       Splat = peekThroughBitcasts(Splat);
18664       EVT SrcVT = Splat.getValueType();
18665       if (SrcVT.isVector()) {
18666         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
18667         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
18668                                      SrcVT.getVectorElementType(), NumElts);
18669         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
18670           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
18671           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
18672                                        NewVT, Ops);
18673           return DAG.getBitcast(VT, Concat);
18674         }
18675       }
18676     }
18677   }
18678 
18679   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
18680   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
18681     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18682       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
18683       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
18684     }
18685 
18686   // Check if we can express BUILD VECTOR via subvector extract.
18687   if (!LegalTypes && (N->getNumOperands() > 1)) {
18688     SDValue Op0 = N->getOperand(0);
18689     auto checkElem = [&](SDValue Op) -> uint64_t {
18690       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
18691           (Op0.getOperand(0) == Op.getOperand(0)))
18692         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
18693           return CNode->getZExtValue();
18694       return -1;
18695     };
18696 
18697     int Offset = checkElem(Op0);
18698     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
18699       if (Offset + i != checkElem(N->getOperand(i))) {
18700         Offset = -1;
18701         break;
18702       }
18703     }
18704 
18705     if ((Offset == 0) &&
18706         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
18707       return Op0.getOperand(0);
18708     if ((Offset != -1) &&
18709         ((Offset % N->getValueType(0).getVectorNumElements()) ==
18710          0)) // IDX must be multiple of output size.
18711       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
18712                          Op0.getOperand(0), Op0.getOperand(1));
18713   }
18714 
18715   if (SDValue V = convertBuildVecZextToZext(N))
18716     return V;
18717 
18718   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
18719     return V;
18720 
18721   if (SDValue V = reduceBuildVecTruncToBitCast(N))
18722     return V;
18723 
18724   if (SDValue V = reduceBuildVecToShuffle(N))
18725     return V;
18726 
18727   return SDValue();
18728 }
18729 
18730 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
18731   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18732   EVT OpVT = N->getOperand(0).getValueType();
18733 
18734   // If the operands are legal vectors, leave them alone.
18735   if (TLI.isTypeLegal(OpVT))
18736     return SDValue();
18737 
18738   SDLoc DL(N);
18739   EVT VT = N->getValueType(0);
18740   SmallVector<SDValue, 8> Ops;
18741 
18742   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
18743   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18744 
18745   // Keep track of what we encounter.
18746   bool AnyInteger = false;
18747   bool AnyFP = false;
18748   for (const SDValue &Op : N->ops()) {
18749     if (ISD::BITCAST == Op.getOpcode() &&
18750         !Op.getOperand(0).getValueType().isVector())
18751       Ops.push_back(Op.getOperand(0));
18752     else if (ISD::UNDEF == Op.getOpcode())
18753       Ops.push_back(ScalarUndef);
18754     else
18755       return SDValue();
18756 
18757     // Note whether we encounter an integer or floating point scalar.
18758     // If it's neither, bail out, it could be something weird like x86mmx.
18759     EVT LastOpVT = Ops.back().getValueType();
18760     if (LastOpVT.isFloatingPoint())
18761       AnyFP = true;
18762     else if (LastOpVT.isInteger())
18763       AnyInteger = true;
18764     else
18765       return SDValue();
18766   }
18767 
18768   // If any of the operands is a floating point scalar bitcast to a vector,
18769   // use floating point types throughout, and bitcast everything.
18770   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
18771   if (AnyFP) {
18772     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
18773     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18774     if (AnyInteger) {
18775       for (SDValue &Op : Ops) {
18776         if (Op.getValueType() == SVT)
18777           continue;
18778         if (Op.isUndef())
18779           Op = ScalarUndef;
18780         else
18781           Op = DAG.getBitcast(SVT, Op);
18782       }
18783     }
18784   }
18785 
18786   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
18787                                VT.getSizeInBits() / SVT.getSizeInBits());
18788   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
18789 }
18790 
18791 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
18792 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
18793 // most two distinct vectors the same size as the result, attempt to turn this
18794 // into a legal shuffle.
18795 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
18796   EVT VT = N->getValueType(0);
18797   EVT OpVT = N->getOperand(0).getValueType();
18798   int NumElts = VT.getVectorNumElements();
18799   int NumOpElts = OpVT.getVectorNumElements();
18800 
18801   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
18802   SmallVector<int, 8> Mask;
18803 
18804   for (SDValue Op : N->ops()) {
18805     Op = peekThroughBitcasts(Op);
18806 
18807     // UNDEF nodes convert to UNDEF shuffle mask values.
18808     if (Op.isUndef()) {
18809       Mask.append((unsigned)NumOpElts, -1);
18810       continue;
18811     }
18812 
18813     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18814       return SDValue();
18815 
18816     // What vector are we extracting the subvector from and at what index?
18817     SDValue ExtVec = Op.getOperand(0);
18818     int ExtIdx = Op.getConstantOperandVal(1);
18819 
18820     // We want the EVT of the original extraction to correctly scale the
18821     // extraction index.
18822     EVT ExtVT = ExtVec.getValueType();
18823     ExtVec = peekThroughBitcasts(ExtVec);
18824 
18825     // UNDEF nodes convert to UNDEF shuffle mask values.
18826     if (ExtVec.isUndef()) {
18827       Mask.append((unsigned)NumOpElts, -1);
18828       continue;
18829     }
18830 
18831     // Ensure that we are extracting a subvector from a vector the same
18832     // size as the result.
18833     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
18834       return SDValue();
18835 
18836     // Scale the subvector index to account for any bitcast.
18837     int NumExtElts = ExtVT.getVectorNumElements();
18838     if (0 == (NumExtElts % NumElts))
18839       ExtIdx /= (NumExtElts / NumElts);
18840     else if (0 == (NumElts % NumExtElts))
18841       ExtIdx *= (NumElts / NumExtElts);
18842     else
18843       return SDValue();
18844 
18845     // At most we can reference 2 inputs in the final shuffle.
18846     if (SV0.isUndef() || SV0 == ExtVec) {
18847       SV0 = ExtVec;
18848       for (int i = 0; i != NumOpElts; ++i)
18849         Mask.push_back(i + ExtIdx);
18850     } else if (SV1.isUndef() || SV1 == ExtVec) {
18851       SV1 = ExtVec;
18852       for (int i = 0; i != NumOpElts; ++i)
18853         Mask.push_back(i + ExtIdx + NumElts);
18854     } else {
18855       return SDValue();
18856     }
18857   }
18858 
18859   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18860   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
18861                                      DAG.getBitcast(VT, SV1), Mask, DAG);
18862 }
18863 
18864 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
18865   unsigned CastOpcode = N->getOperand(0).getOpcode();
18866   switch (CastOpcode) {
18867   case ISD::SINT_TO_FP:
18868   case ISD::UINT_TO_FP:
18869   case ISD::FP_TO_SINT:
18870   case ISD::FP_TO_UINT:
18871     // TODO: Allow more opcodes?
18872     //  case ISD::BITCAST:
18873     //  case ISD::TRUNCATE:
18874     //  case ISD::ZERO_EXTEND:
18875     //  case ISD::SIGN_EXTEND:
18876     //  case ISD::FP_EXTEND:
18877     break;
18878   default:
18879     return SDValue();
18880   }
18881 
18882   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
18883   if (!SrcVT.isVector())
18884     return SDValue();
18885 
18886   // All operands of the concat must be the same kind of cast from the same
18887   // source type.
18888   SmallVector<SDValue, 4> SrcOps;
18889   for (SDValue Op : N->ops()) {
18890     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
18891         Op.getOperand(0).getValueType() != SrcVT)
18892       return SDValue();
18893     SrcOps.push_back(Op.getOperand(0));
18894   }
18895 
18896   // The wider cast must be supported by the target. This is unusual because
18897   // the operation support type parameter depends on the opcode. In addition,
18898   // check the other type in the cast to make sure this is really legal.
18899   EVT VT = N->getValueType(0);
18900   EVT SrcEltVT = SrcVT.getVectorElementType();
18901   unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands();
18902   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
18903   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18904   switch (CastOpcode) {
18905   case ISD::SINT_TO_FP:
18906   case ISD::UINT_TO_FP:
18907     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
18908         !TLI.isTypeLegal(VT))
18909       return SDValue();
18910     break;
18911   case ISD::FP_TO_SINT:
18912   case ISD::FP_TO_UINT:
18913     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
18914         !TLI.isTypeLegal(ConcatSrcVT))
18915       return SDValue();
18916     break;
18917   default:
18918     llvm_unreachable("Unexpected cast opcode");
18919   }
18920 
18921   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
18922   SDLoc DL(N);
18923   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
18924   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
18925 }
18926 
18927 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
18928   // If we only have one input vector, we don't need to do any concatenation.
18929   if (N->getNumOperands() == 1)
18930     return N->getOperand(0);
18931 
18932   // Check if all of the operands are undefs.
18933   EVT VT = N->getValueType(0);
18934   if (ISD::allOperandsUndef(N))
18935     return DAG.getUNDEF(VT);
18936 
18937   // Optimize concat_vectors where all but the first of the vectors are undef.
18938   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
18939         return Op.isUndef();
18940       })) {
18941     SDValue In = N->getOperand(0);
18942     assert(In.getValueType().isVector() && "Must concat vectors");
18943 
18944     // If the input is a concat_vectors, just make a larger concat by padding
18945     // with smaller undefs.
18946     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
18947       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
18948       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
18949       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
18950       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18951     }
18952 
18953     SDValue Scalar = peekThroughOneUseBitcasts(In);
18954 
18955     // concat_vectors(scalar_to_vector(scalar), undef) ->
18956     //     scalar_to_vector(scalar)
18957     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18958          Scalar.hasOneUse()) {
18959       EVT SVT = Scalar.getValueType().getVectorElementType();
18960       if (SVT == Scalar.getOperand(0).getValueType())
18961         Scalar = Scalar.getOperand(0);
18962     }
18963 
18964     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
18965     if (!Scalar.getValueType().isVector()) {
18966       // If the bitcast type isn't legal, it might be a trunc of a legal type;
18967       // look through the trunc so we can still do the transform:
18968       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
18969       if (Scalar->getOpcode() == ISD::TRUNCATE &&
18970           !TLI.isTypeLegal(Scalar.getValueType()) &&
18971           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
18972         Scalar = Scalar->getOperand(0);
18973 
18974       EVT SclTy = Scalar.getValueType();
18975 
18976       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
18977         return SDValue();
18978 
18979       // Bail out if the vector size is not a multiple of the scalar size.
18980       if (VT.getSizeInBits() % SclTy.getSizeInBits())
18981         return SDValue();
18982 
18983       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
18984       if (VNTNumElms < 2)
18985         return SDValue();
18986 
18987       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
18988       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
18989         return SDValue();
18990 
18991       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
18992       return DAG.getBitcast(VT, Res);
18993     }
18994   }
18995 
18996   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
18997   // We have already tested above for an UNDEF only concatenation.
18998   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
18999   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
19000   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
19001     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
19002   };
19003   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
19004     SmallVector<SDValue, 8> Opnds;
19005     EVT SVT = VT.getScalarType();
19006 
19007     EVT MinVT = SVT;
19008     if (!SVT.isFloatingPoint()) {
19009       // If BUILD_VECTOR are from built from integer, they may have different
19010       // operand types. Get the smallest type and truncate all operands to it.
19011       bool FoundMinVT = false;
19012       for (const SDValue &Op : N->ops())
19013         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19014           EVT OpSVT = Op.getOperand(0).getValueType();
19015           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
19016           FoundMinVT = true;
19017         }
19018       assert(FoundMinVT && "Concat vector type mismatch");
19019     }
19020 
19021     for (const SDValue &Op : N->ops()) {
19022       EVT OpVT = Op.getValueType();
19023       unsigned NumElts = OpVT.getVectorNumElements();
19024 
19025       if (ISD::UNDEF == Op.getOpcode())
19026         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
19027 
19028       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19029         if (SVT.isFloatingPoint()) {
19030           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
19031           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
19032         } else {
19033           for (unsigned i = 0; i != NumElts; ++i)
19034             Opnds.push_back(
19035                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
19036         }
19037       }
19038     }
19039 
19040     assert(VT.getVectorNumElements() == Opnds.size() &&
19041            "Concat vector type mismatch");
19042     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
19043   }
19044 
19045   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
19046   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
19047     return V;
19048 
19049   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
19050   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
19051     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
19052       return V;
19053 
19054   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
19055     return V;
19056 
19057   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
19058   // nodes often generate nop CONCAT_VECTOR nodes.
19059   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
19060   // place the incoming vectors at the exact same location.
19061   SDValue SingleSource = SDValue();
19062   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
19063 
19064   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19065     SDValue Op = N->getOperand(i);
19066 
19067     if (Op.isUndef())
19068       continue;
19069 
19070     // Check if this is the identity extract:
19071     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19072       return SDValue();
19073 
19074     // Find the single incoming vector for the extract_subvector.
19075     if (SingleSource.getNode()) {
19076       if (Op.getOperand(0) != SingleSource)
19077         return SDValue();
19078     } else {
19079       SingleSource = Op.getOperand(0);
19080 
19081       // Check the source type is the same as the type of the result.
19082       // If not, this concat may extend the vector, so we can not
19083       // optimize it away.
19084       if (SingleSource.getValueType() != N->getValueType(0))
19085         return SDValue();
19086     }
19087 
19088     // Check that we are reading from the identity index.
19089     unsigned IdentityIndex = i * PartNumElem;
19090     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
19091       return SDValue();
19092   }
19093 
19094   if (SingleSource.getNode())
19095     return SingleSource;
19096 
19097   return SDValue();
19098 }
19099 
19100 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
19101 // if the subvector can be sourced for free.
19102 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
19103   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
19104       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
19105     return V.getOperand(1);
19106   }
19107   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19108   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
19109       V.getOperand(0).getValueType() == SubVT &&
19110       (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
19111     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
19112     return V.getOperand(SubIdx);
19113   }
19114   return SDValue();
19115 }
19116 
19117 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
19118                                               SelectionDAG &DAG) {
19119   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19120   SDValue BinOp = Extract->getOperand(0);
19121   unsigned BinOpcode = BinOp.getOpcode();
19122   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
19123     return SDValue();
19124 
19125   EVT VecVT = BinOp.getValueType();
19126   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
19127   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
19128     return SDValue();
19129 
19130   SDValue Index = Extract->getOperand(1);
19131   EVT SubVT = Extract->getValueType(0);
19132   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
19133     return SDValue();
19134 
19135   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
19136   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
19137 
19138   // TODO: We could handle the case where only 1 operand is being inserted by
19139   //       creating an extract of the other operand, but that requires checking
19140   //       number of uses and/or costs.
19141   if (!Sub0 || !Sub1)
19142     return SDValue();
19143 
19144   // We are inserting both operands of the wide binop only to extract back
19145   // to the narrow vector size. Eliminate all of the insert/extract:
19146   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
19147   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
19148                      BinOp->getFlags());
19149 }
19150 
19151 /// If we are extracting a subvector produced by a wide binary operator try
19152 /// to use a narrow binary operator and/or avoid concatenation and extraction.
19153 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
19154   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
19155   // some of these bailouts with other transforms.
19156 
19157   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
19158     return V;
19159 
19160   // The extract index must be a constant, so we can map it to a concat operand.
19161   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19162   if (!ExtractIndexC)
19163     return SDValue();
19164 
19165   // We are looking for an optionally bitcasted wide vector binary operator
19166   // feeding an extract subvector.
19167   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19168   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
19169   unsigned BOpcode = BinOp.getOpcode();
19170   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
19171     return SDValue();
19172 
19173   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
19174   // reduced to the unary fneg when it is visited, and we probably want to deal
19175   // with fneg in a target-specific way.
19176   if (BOpcode == ISD::FSUB) {
19177     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
19178     if (C && C->getValueAPF().isNegZero())
19179       return SDValue();
19180   }
19181 
19182   // The binop must be a vector type, so we can extract some fraction of it.
19183   EVT WideBVT = BinOp.getValueType();
19184   if (!WideBVT.isVector())
19185     return SDValue();
19186 
19187   EVT VT = Extract->getValueType(0);
19188   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
19189   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
19190          "Extract index is not a multiple of the vector length.");
19191 
19192   // Bail out if this is not a proper multiple width extraction.
19193   unsigned WideWidth = WideBVT.getSizeInBits();
19194   unsigned NarrowWidth = VT.getSizeInBits();
19195   if (WideWidth % NarrowWidth != 0)
19196     return SDValue();
19197 
19198   // Bail out if we are extracting a fraction of a single operation. This can
19199   // occur because we potentially looked through a bitcast of the binop.
19200   unsigned NarrowingRatio = WideWidth / NarrowWidth;
19201   unsigned WideNumElts = WideBVT.getVectorNumElements();
19202   if (WideNumElts % NarrowingRatio != 0)
19203     return SDValue();
19204 
19205   // Bail out if the target does not support a narrower version of the binop.
19206   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
19207                                    WideNumElts / NarrowingRatio);
19208   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
19209     return SDValue();
19210 
19211   // If extraction is cheap, we don't need to look at the binop operands
19212   // for concat ops. The narrow binop alone makes this transform profitable.
19213   // We can't just reuse the original extract index operand because we may have
19214   // bitcasted.
19215   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
19216   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
19217   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
19218       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
19219     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
19220     SDLoc DL(Extract);
19221     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19222     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19223                             BinOp.getOperand(0), NewExtIndex);
19224     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19225                             BinOp.getOperand(1), NewExtIndex);
19226     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
19227                                       BinOp.getNode()->getFlags());
19228     return DAG.getBitcast(VT, NarrowBinOp);
19229   }
19230 
19231   // Only handle the case where we are doubling and then halving. A larger ratio
19232   // may require more than two narrow binops to replace the wide binop.
19233   if (NarrowingRatio != 2)
19234     return SDValue();
19235 
19236   // TODO: The motivating case for this transform is an x86 AVX1 target. That
19237   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
19238   // flavors, but no other 256-bit integer support. This could be extended to
19239   // handle any binop, but that may require fixing/adding other folds to avoid
19240   // codegen regressions.
19241   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
19242     return SDValue();
19243 
19244   // We need at least one concatenation operation of a binop operand to make
19245   // this transform worthwhile. The concat must double the input vector sizes.
19246   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
19247     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
19248       return V.getOperand(ConcatOpNum);
19249     return SDValue();
19250   };
19251   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
19252   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
19253 
19254   if (SubVecL || SubVecR) {
19255     // If a binop operand was not the result of a concat, we must extract a
19256     // half-sized operand for our new narrow binop:
19257     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
19258     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
19259     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
19260     SDLoc DL(Extract);
19261     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19262     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
19263                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19264                                       BinOp.getOperand(0), IndexC);
19265 
19266     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
19267                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19268                                       BinOp.getOperand(1), IndexC);
19269 
19270     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
19271     return DAG.getBitcast(VT, NarrowBinOp);
19272   }
19273 
19274   return SDValue();
19275 }
19276 
19277 /// If we are extracting a subvector from a wide vector load, convert to a
19278 /// narrow load to eliminate the extraction:
19279 /// (extract_subvector (load wide vector)) --> (load narrow vector)
19280 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
19281   // TODO: Add support for big-endian. The offset calculation must be adjusted.
19282   if (DAG.getDataLayout().isBigEndian())
19283     return SDValue();
19284 
19285   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
19286   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19287   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
19288       !ExtIdx)
19289     return SDValue();
19290 
19291   // Allow targets to opt-out.
19292   EVT VT = Extract->getValueType(0);
19293 
19294   // We can only create byte sized loads.
19295   if (!VT.isByteSized())
19296     return SDValue();
19297 
19298   unsigned Index = ExtIdx->getZExtValue();
19299   unsigned NumElts = VT.getVectorNumElements();
19300 
19301   // If the index is a multiple of the extract element count, we can offset the
19302   // address by the store size multiplied by the subvector index. Otherwise if
19303   // the scalar type is byte sized, we can just use the index multiplied by
19304   // the element size in bytes as the offset.
19305   unsigned Offset;
19306   if (Index % NumElts == 0)
19307     Offset = (Index / NumElts) * VT.getStoreSize();
19308   else if (VT.getScalarType().isByteSized())
19309     Offset = Index * VT.getScalarType().getStoreSize();
19310   else
19311     return SDValue();
19312 
19313   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19314   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
19315     return SDValue();
19316 
19317   // The narrow load will be offset from the base address of the old load if
19318   // we are extracting from something besides index 0 (little-endian).
19319   SDLoc DL(Extract);
19320   SDValue BaseAddr = Ld->getBasePtr();
19321 
19322   // TODO: Use "BaseIndexOffset" to make this more effective.
19323   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
19324   MachineFunction &MF = DAG.getMachineFunction();
19325   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
19326                                                    VT.getStoreSize());
19327   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
19328   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
19329   return NewLd;
19330 }
19331 
19332 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
19333   EVT NVT = N->getValueType(0);
19334   SDValue V = N->getOperand(0);
19335   uint64_t ExtIdx = N->getConstantOperandVal(1);
19336 
19337   // Extract from UNDEF is UNDEF.
19338   if (V.isUndef())
19339     return DAG.getUNDEF(NVT);
19340 
19341   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
19342     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
19343       return NarrowLoad;
19344 
19345   // Combine an extract of an extract into a single extract_subvector.
19346   // ext (ext X, C), 0 --> ext X, C
19347   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
19348     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
19349                                     V.getConstantOperandVal(1)) &&
19350         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
19351       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
19352                          V.getOperand(1));
19353     }
19354   }
19355 
19356   // Try to move vector bitcast after extract_subv by scaling extraction index:
19357   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
19358   if (V.getOpcode() == ISD::BITCAST &&
19359       V.getOperand(0).getValueType().isVector()) {
19360     SDValue SrcOp = V.getOperand(0);
19361     EVT SrcVT = SrcOp.getValueType();
19362     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
19363     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
19364     if ((SrcNumElts % DestNumElts) == 0) {
19365       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
19366       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
19367       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
19368                                       NewExtEC);
19369       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19370         SDLoc DL(N);
19371         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
19372         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19373                                          V.getOperand(0), NewIndex);
19374         return DAG.getBitcast(NVT, NewExtract);
19375       }
19376     }
19377     if ((DestNumElts % SrcNumElts) == 0) {
19378       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
19379       if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) {
19380         ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio;
19381         EVT ScalarVT = SrcVT.getScalarType();
19382         if ((ExtIdx % DestSrcRatio) == 0) {
19383           SDLoc DL(N);
19384           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
19385           EVT NewExtVT =
19386               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
19387           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19388             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19389             SDValue NewExtract =
19390                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19391                             V.getOperand(0), NewIndex);
19392             return DAG.getBitcast(NVT, NewExtract);
19393           }
19394           if (NewExtEC == 1 &&
19395               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
19396             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19397             SDValue NewExtract =
19398                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
19399                             V.getOperand(0), NewIndex);
19400             return DAG.getBitcast(NVT, NewExtract);
19401           }
19402         }
19403       }
19404     }
19405   }
19406 
19407   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
19408     unsigned ExtNumElts = NVT.getVectorMinNumElements();
19409     EVT ConcatSrcVT = V.getOperand(0).getValueType();
19410     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
19411            "Concat and extract subvector do not change element type");
19412     assert((ExtIdx % ExtNumElts) == 0 &&
19413            "Extract index is not a multiple of the input vector length.");
19414 
19415     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
19416     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
19417 
19418     // If the concatenated source types match this extract, it's a direct
19419     // simplification:
19420     // extract_subvec (concat V1, V2, ...), i --> Vi
19421     if (ConcatSrcNumElts == ExtNumElts)
19422       return V.getOperand(ConcatOpIdx);
19423 
19424     // If the concatenated source vectors are a multiple length of this extract,
19425     // then extract a fraction of one of those source vectors directly from a
19426     // concat operand. Example:
19427     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
19428     //   v2i8 extract_subvec v8i8 Y, 6
19429     if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
19430       SDLoc DL(N);
19431       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
19432       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
19433              "Trying to extract from >1 concat operand?");
19434       assert(NewExtIdx % ExtNumElts == 0 &&
19435              "Extract index is not a multiple of the input vector length.");
19436       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
19437       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
19438                          V.getOperand(ConcatOpIdx), NewIndexC);
19439     }
19440   }
19441 
19442   V = peekThroughBitcasts(V);
19443 
19444   // If the input is a build vector. Try to make a smaller build vector.
19445   if (V.getOpcode() == ISD::BUILD_VECTOR) {
19446     EVT InVT = V.getValueType();
19447     unsigned ExtractSize = NVT.getSizeInBits();
19448     unsigned EltSize = InVT.getScalarSizeInBits();
19449     // Only do this if we won't split any elements.
19450     if (ExtractSize % EltSize == 0) {
19451       unsigned NumElems = ExtractSize / EltSize;
19452       EVT EltVT = InVT.getVectorElementType();
19453       EVT ExtractVT =
19454           NumElems == 1 ? EltVT
19455                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
19456       if ((Level < AfterLegalizeDAG ||
19457            (NumElems == 1 ||
19458             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
19459           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
19460         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
19461 
19462         if (NumElems == 1) {
19463           SDValue Src = V->getOperand(IdxVal);
19464           if (EltVT != Src.getValueType())
19465             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
19466           return DAG.getBitcast(NVT, Src);
19467         }
19468 
19469         // Extract the pieces from the original build_vector.
19470         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
19471                                               V->ops().slice(IdxVal, NumElems));
19472         return DAG.getBitcast(NVT, BuildVec);
19473       }
19474     }
19475   }
19476 
19477   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
19478     // Handle only simple case where vector being inserted and vector
19479     // being extracted are of same size.
19480     EVT SmallVT = V.getOperand(1).getValueType();
19481     if (!NVT.bitsEq(SmallVT))
19482       return SDValue();
19483 
19484     // Combine:
19485     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
19486     // Into:
19487     //    indices are equal or bit offsets are equal => V1
19488     //    otherwise => (extract_subvec V1, ExtIdx)
19489     uint64_t InsIdx = V.getConstantOperandVal(2);
19490     if (InsIdx * SmallVT.getScalarSizeInBits() ==
19491         ExtIdx * NVT.getScalarSizeInBits())
19492       return DAG.getBitcast(NVT, V.getOperand(1));
19493     return DAG.getNode(
19494         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
19495         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
19496         N->getOperand(1));
19497   }
19498 
19499   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
19500     return NarrowBOp;
19501 
19502   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19503     return SDValue(N, 0);
19504 
19505   return SDValue();
19506 }
19507 
19508 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
19509 /// followed by concatenation. Narrow vector ops may have better performance
19510 /// than wide ops, and this can unlock further narrowing of other vector ops.
19511 /// Targets can invert this transform later if it is not profitable.
19512 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
19513                                          SelectionDAG &DAG) {
19514   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
19515   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
19516       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
19517       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
19518     return SDValue();
19519 
19520   // Split the wide shuffle mask into halves. Any mask element that is accessing
19521   // operand 1 is offset down to account for narrowing of the vectors.
19522   ArrayRef<int> Mask = Shuf->getMask();
19523   EVT VT = Shuf->getValueType(0);
19524   unsigned NumElts = VT.getVectorNumElements();
19525   unsigned HalfNumElts = NumElts / 2;
19526   SmallVector<int, 16> Mask0(HalfNumElts, -1);
19527   SmallVector<int, 16> Mask1(HalfNumElts, -1);
19528   for (unsigned i = 0; i != NumElts; ++i) {
19529     if (Mask[i] == -1)
19530       continue;
19531     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
19532     if (i < HalfNumElts)
19533       Mask0[i] = M;
19534     else
19535       Mask1[i - HalfNumElts] = M;
19536   }
19537 
19538   // Ask the target if this is a valid transform.
19539   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19540   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
19541                                 HalfNumElts);
19542   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
19543       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
19544     return SDValue();
19545 
19546   // shuffle (concat X, undef), (concat Y, undef), Mask -->
19547   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
19548   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
19549   SDLoc DL(Shuf);
19550   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
19551   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
19552   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
19553 }
19554 
19555 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
19556 // or turn a shuffle of a single concat into simpler shuffle then concat.
19557 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
19558   EVT VT = N->getValueType(0);
19559   unsigned NumElts = VT.getVectorNumElements();
19560 
19561   SDValue N0 = N->getOperand(0);
19562   SDValue N1 = N->getOperand(1);
19563   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
19564   ArrayRef<int> Mask = SVN->getMask();
19565 
19566   SmallVector<SDValue, 4> Ops;
19567   EVT ConcatVT = N0.getOperand(0).getValueType();
19568   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
19569   unsigned NumConcats = NumElts / NumElemsPerConcat;
19570 
19571   auto IsUndefMaskElt = [](int i) { return i == -1; };
19572 
19573   // Special case: shuffle(concat(A,B)) can be more efficiently represented
19574   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
19575   // half vector elements.
19576   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
19577       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
19578                    IsUndefMaskElt)) {
19579     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
19580                               N0.getOperand(1),
19581                               Mask.slice(0, NumElemsPerConcat));
19582     N1 = DAG.getUNDEF(ConcatVT);
19583     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
19584   }
19585 
19586   // Look at every vector that's inserted. We're looking for exact
19587   // subvector-sized copies from a concatenated vector
19588   for (unsigned I = 0; I != NumConcats; ++I) {
19589     unsigned Begin = I * NumElemsPerConcat;
19590     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
19591 
19592     // Make sure we're dealing with a copy.
19593     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
19594       Ops.push_back(DAG.getUNDEF(ConcatVT));
19595       continue;
19596     }
19597 
19598     int OpIdx = -1;
19599     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
19600       if (IsUndefMaskElt(SubMask[i]))
19601         continue;
19602       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
19603         return SDValue();
19604       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
19605       if (0 <= OpIdx && EltOpIdx != OpIdx)
19606         return SDValue();
19607       OpIdx = EltOpIdx;
19608     }
19609     assert(0 <= OpIdx && "Unknown concat_vectors op");
19610 
19611     if (OpIdx < (int)N0.getNumOperands())
19612       Ops.push_back(N0.getOperand(OpIdx));
19613     else
19614       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
19615   }
19616 
19617   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19618 }
19619 
19620 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
19621 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
19622 //
19623 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
19624 // a simplification in some sense, but it isn't appropriate in general: some
19625 // BUILD_VECTORs are substantially cheaper than others. The general case
19626 // of a BUILD_VECTOR requires inserting each element individually (or
19627 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
19628 // all constants is a single constant pool load.  A BUILD_VECTOR where each
19629 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
19630 // are undef lowers to a small number of element insertions.
19631 //
19632 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
19633 // We don't fold shuffles where one side is a non-zero constant, and we don't
19634 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
19635 // non-constant operands. This seems to work out reasonably well in practice.
19636 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
19637                                        SelectionDAG &DAG,
19638                                        const TargetLowering &TLI) {
19639   EVT VT = SVN->getValueType(0);
19640   unsigned NumElts = VT.getVectorNumElements();
19641   SDValue N0 = SVN->getOperand(0);
19642   SDValue N1 = SVN->getOperand(1);
19643 
19644   if (!N0->hasOneUse())
19645     return SDValue();
19646 
19647   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
19648   // discussed above.
19649   if (!N1.isUndef()) {
19650     if (!N1->hasOneUse())
19651       return SDValue();
19652 
19653     bool N0AnyConst = isAnyConstantBuildVector(N0);
19654     bool N1AnyConst = isAnyConstantBuildVector(N1);
19655     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
19656       return SDValue();
19657     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
19658       return SDValue();
19659   }
19660 
19661   // If both inputs are splats of the same value then we can safely merge this
19662   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
19663   bool IsSplat = false;
19664   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
19665   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
19666   if (BV0 && BV1)
19667     if (SDValue Splat0 = BV0->getSplatValue())
19668       IsSplat = (Splat0 == BV1->getSplatValue());
19669 
19670   SmallVector<SDValue, 8> Ops;
19671   SmallSet<SDValue, 16> DuplicateOps;
19672   for (int M : SVN->getMask()) {
19673     SDValue Op = DAG.getUNDEF(VT.getScalarType());
19674     if (M >= 0) {
19675       int Idx = M < (int)NumElts ? M : M - NumElts;
19676       SDValue &S = (M < (int)NumElts ? N0 : N1);
19677       if (S.getOpcode() == ISD::BUILD_VECTOR) {
19678         Op = S.getOperand(Idx);
19679       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19680         SDValue Op0 = S.getOperand(0);
19681         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
19682       } else {
19683         // Operand can't be combined - bail out.
19684         return SDValue();
19685       }
19686     }
19687 
19688     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
19689     // generating a splat; semantically, this is fine, but it's likely to
19690     // generate low-quality code if the target can't reconstruct an appropriate
19691     // shuffle.
19692     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
19693       if (!IsSplat && !DuplicateOps.insert(Op).second)
19694         return SDValue();
19695 
19696     Ops.push_back(Op);
19697   }
19698 
19699   // BUILD_VECTOR requires all inputs to be of the same type, find the
19700   // maximum type and extend them all.
19701   EVT SVT = VT.getScalarType();
19702   if (SVT.isInteger())
19703     for (SDValue &Op : Ops)
19704       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
19705   if (SVT != VT.getScalarType())
19706     for (SDValue &Op : Ops)
19707       Op = TLI.isZExtFree(Op.getValueType(), SVT)
19708                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
19709                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
19710   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
19711 }
19712 
19713 // Match shuffles that can be converted to any_vector_extend_in_reg.
19714 // This is often generated during legalization.
19715 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
19716 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
19717 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
19718                                             SelectionDAG &DAG,
19719                                             const TargetLowering &TLI,
19720                                             bool LegalOperations) {
19721   EVT VT = SVN->getValueType(0);
19722   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19723 
19724   // TODO Add support for big-endian when we have a test case.
19725   if (!VT.isInteger() || IsBigEndian)
19726     return SDValue();
19727 
19728   unsigned NumElts = VT.getVectorNumElements();
19729   unsigned EltSizeInBits = VT.getScalarSizeInBits();
19730   ArrayRef<int> Mask = SVN->getMask();
19731   SDValue N0 = SVN->getOperand(0);
19732 
19733   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
19734   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
19735     for (unsigned i = 0; i != NumElts; ++i) {
19736       if (Mask[i] < 0)
19737         continue;
19738       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
19739         continue;
19740       return false;
19741     }
19742     return true;
19743   };
19744 
19745   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
19746   // power-of-2 extensions as they are the most likely.
19747   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
19748     // Check for non power of 2 vector sizes
19749     if (NumElts % Scale != 0)
19750       continue;
19751     if (!isAnyExtend(Scale))
19752       continue;
19753 
19754     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
19755     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
19756     // Never create an illegal type. Only create unsupported operations if we
19757     // are pre-legalization.
19758     if (TLI.isTypeLegal(OutVT))
19759       if (!LegalOperations ||
19760           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
19761         return DAG.getBitcast(VT,
19762                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
19763                                           SDLoc(SVN), OutVT, N0));
19764   }
19765 
19766   return SDValue();
19767 }
19768 
19769 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
19770 // each source element of a large type into the lowest elements of a smaller
19771 // destination type. This is often generated during legalization.
19772 // If the source node itself was a '*_extend_vector_inreg' node then we should
19773 // then be able to remove it.
19774 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
19775                                         SelectionDAG &DAG) {
19776   EVT VT = SVN->getValueType(0);
19777   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19778 
19779   // TODO Add support for big-endian when we have a test case.
19780   if (!VT.isInteger() || IsBigEndian)
19781     return SDValue();
19782 
19783   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
19784 
19785   unsigned Opcode = N0.getOpcode();
19786   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
19787       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
19788       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
19789     return SDValue();
19790 
19791   SDValue N00 = N0.getOperand(0);
19792   ArrayRef<int> Mask = SVN->getMask();
19793   unsigned NumElts = VT.getVectorNumElements();
19794   unsigned EltSizeInBits = VT.getScalarSizeInBits();
19795   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
19796   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
19797 
19798   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
19799     return SDValue();
19800   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
19801 
19802   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
19803   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
19804   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
19805   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
19806     for (unsigned i = 0; i != NumElts; ++i) {
19807       if (Mask[i] < 0)
19808         continue;
19809       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
19810         continue;
19811       return false;
19812     }
19813     return true;
19814   };
19815 
19816   // At the moment we just handle the case where we've truncated back to the
19817   // same size as before the extension.
19818   // TODO: handle more extension/truncation cases as cases arise.
19819   if (EltSizeInBits != ExtSrcSizeInBits)
19820     return SDValue();
19821 
19822   // We can remove *extend_vector_inreg only if the truncation happens at
19823   // the same scale as the extension.
19824   if (isTruncate(ExtScale))
19825     return DAG.getBitcast(VT, N00);
19826 
19827   return SDValue();
19828 }
19829 
19830 // Combine shuffles of splat-shuffles of the form:
19831 // shuffle (shuffle V, undef, splat-mask), undef, M
19832 // If splat-mask contains undef elements, we need to be careful about
19833 // introducing undef's in the folded mask which are not the result of composing
19834 // the masks of the shuffles.
19835 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
19836                                         SelectionDAG &DAG) {
19837   if (!Shuf->getOperand(1).isUndef())
19838     return SDValue();
19839   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
19840   if (!Splat || !Splat->isSplat())
19841     return SDValue();
19842 
19843   ArrayRef<int> ShufMask = Shuf->getMask();
19844   ArrayRef<int> SplatMask = Splat->getMask();
19845   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
19846 
19847   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
19848   // every undef mask element in the splat-shuffle has a corresponding undef
19849   // element in the user-shuffle's mask or if the composition of mask elements
19850   // would result in undef.
19851   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
19852   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
19853   //   In this case it is not legal to simplify to the splat-shuffle because we
19854   //   may be exposing the users of the shuffle an undef element at index 1
19855   //   which was not there before the combine.
19856   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
19857   //   In this case the composition of masks yields SplatMask, so it's ok to
19858   //   simplify to the splat-shuffle.
19859   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
19860   //   In this case the composed mask includes all undef elements of SplatMask
19861   //   and in addition sets element zero to undef. It is safe to simplify to
19862   //   the splat-shuffle.
19863   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
19864                                        ArrayRef<int> SplatMask) {
19865     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
19866       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
19867           SplatMask[UserMask[i]] != -1)
19868         return false;
19869     return true;
19870   };
19871   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
19872     return Shuf->getOperand(0);
19873 
19874   // Create a new shuffle with a mask that is composed of the two shuffles'
19875   // masks.
19876   SmallVector<int, 32> NewMask;
19877   for (int Idx : ShufMask)
19878     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
19879 
19880   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
19881                               Splat->getOperand(0), Splat->getOperand(1),
19882                               NewMask);
19883 }
19884 
19885 /// Combine shuffle of shuffle of the form:
19886 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
19887 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
19888                                      SelectionDAG &DAG) {
19889   if (!OuterShuf->getOperand(1).isUndef())
19890     return SDValue();
19891   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
19892   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
19893     return SDValue();
19894 
19895   ArrayRef<int> OuterMask = OuterShuf->getMask();
19896   ArrayRef<int> InnerMask = InnerShuf->getMask();
19897   unsigned NumElts = OuterMask.size();
19898   assert(NumElts == InnerMask.size() && "Mask length mismatch");
19899   SmallVector<int, 32> CombinedMask(NumElts, -1);
19900   int SplatIndex = -1;
19901   for (unsigned i = 0; i != NumElts; ++i) {
19902     // Undef lanes remain undef.
19903     int OuterMaskElt = OuterMask[i];
19904     if (OuterMaskElt == -1)
19905       continue;
19906 
19907     // Peek through the shuffle masks to get the underlying source element.
19908     int InnerMaskElt = InnerMask[OuterMaskElt];
19909     if (InnerMaskElt == -1)
19910       continue;
19911 
19912     // Initialize the splatted element.
19913     if (SplatIndex == -1)
19914       SplatIndex = InnerMaskElt;
19915 
19916     // Non-matching index - this is not a splat.
19917     if (SplatIndex != InnerMaskElt)
19918       return SDValue();
19919 
19920     CombinedMask[i] = InnerMaskElt;
19921   }
19922   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
19923           getSplatIndex(CombinedMask) != -1) &&
19924          "Expected a splat mask");
19925 
19926   // TODO: The transform may be a win even if the mask is not legal.
19927   EVT VT = OuterShuf->getValueType(0);
19928   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
19929   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
19930     return SDValue();
19931 
19932   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
19933                               InnerShuf->getOperand(1), CombinedMask);
19934 }
19935 
19936 /// If the shuffle mask is taking exactly one element from the first vector
19937 /// operand and passing through all other elements from the second vector
19938 /// operand, return the index of the mask element that is choosing an element
19939 /// from the first operand. Otherwise, return -1.
19940 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
19941   int MaskSize = Mask.size();
19942   int EltFromOp0 = -1;
19943   // TODO: This does not match if there are undef elements in the shuffle mask.
19944   // Should we ignore undefs in the shuffle mask instead? The trade-off is
19945   // removing an instruction (a shuffle), but losing the knowledge that some
19946   // vector lanes are not needed.
19947   for (int i = 0; i != MaskSize; ++i) {
19948     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
19949       // We're looking for a shuffle of exactly one element from operand 0.
19950       if (EltFromOp0 != -1)
19951         return -1;
19952       EltFromOp0 = i;
19953     } else if (Mask[i] != i + MaskSize) {
19954       // Nothing from operand 1 can change lanes.
19955       return -1;
19956     }
19957   }
19958   return EltFromOp0;
19959 }
19960 
19961 /// If a shuffle inserts exactly one element from a source vector operand into
19962 /// another vector operand and we can access the specified element as a scalar,
19963 /// then we can eliminate the shuffle.
19964 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
19965                                       SelectionDAG &DAG) {
19966   // First, check if we are taking one element of a vector and shuffling that
19967   // element into another vector.
19968   ArrayRef<int> Mask = Shuf->getMask();
19969   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
19970   SDValue Op0 = Shuf->getOperand(0);
19971   SDValue Op1 = Shuf->getOperand(1);
19972   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
19973   if (ShufOp0Index == -1) {
19974     // Commute mask and check again.
19975     ShuffleVectorSDNode::commuteMask(CommutedMask);
19976     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
19977     if (ShufOp0Index == -1)
19978       return SDValue();
19979     // Commute operands to match the commuted shuffle mask.
19980     std::swap(Op0, Op1);
19981     Mask = CommutedMask;
19982   }
19983 
19984   // The shuffle inserts exactly one element from operand 0 into operand 1.
19985   // Now see if we can access that element as a scalar via a real insert element
19986   // instruction.
19987   // TODO: We can try harder to locate the element as a scalar. Examples: it
19988   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
19989   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
19990          "Shuffle mask value must be from operand 0");
19991   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
19992     return SDValue();
19993 
19994   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
19995   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
19996     return SDValue();
19997 
19998   // There's an existing insertelement with constant insertion index, so we
19999   // don't need to check the legality/profitability of a replacement operation
20000   // that differs at most in the constant value. The target should be able to
20001   // lower any of those in a similar way. If not, legalization will expand this
20002   // to a scalar-to-vector plus shuffle.
20003   //
20004   // Note that the shuffle may move the scalar from the position that the insert
20005   // element used. Therefore, our new insert element occurs at the shuffle's
20006   // mask index value, not the insert's index value.
20007   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
20008   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
20009   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
20010                      Op1, Op0.getOperand(1), NewInsIndex);
20011 }
20012 
20013 /// If we have a unary shuffle of a shuffle, see if it can be folded away
20014 /// completely. This has the potential to lose undef knowledge because the first
20015 /// shuffle may not have an undef mask element where the second one does. So
20016 /// only call this after doing simplifications based on demanded elements.
20017 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
20018   // shuf (shuf0 X, Y, Mask0), undef, Mask
20019   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20020   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
20021     return SDValue();
20022 
20023   ArrayRef<int> Mask = Shuf->getMask();
20024   ArrayRef<int> Mask0 = Shuf0->getMask();
20025   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
20026     // Ignore undef elements.
20027     if (Mask[i] == -1)
20028       continue;
20029     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
20030 
20031     // Is the element of the shuffle operand chosen by this shuffle the same as
20032     // the element chosen by the shuffle operand itself?
20033     if (Mask0[Mask[i]] != Mask0[i])
20034       return SDValue();
20035   }
20036   // Every element of this shuffle is identical to the result of the previous
20037   // shuffle, so we can replace this value.
20038   return Shuf->getOperand(0);
20039 }
20040 
20041 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
20042   EVT VT = N->getValueType(0);
20043   unsigned NumElts = VT.getVectorNumElements();
20044 
20045   SDValue N0 = N->getOperand(0);
20046   SDValue N1 = N->getOperand(1);
20047 
20048   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
20049 
20050   // Canonicalize shuffle undef, undef -> undef
20051   if (N0.isUndef() && N1.isUndef())
20052     return DAG.getUNDEF(VT);
20053 
20054   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20055 
20056   // Canonicalize shuffle v, v -> v, undef
20057   if (N0 == N1) {
20058     SmallVector<int, 8> NewMask;
20059     for (unsigned i = 0; i != NumElts; ++i) {
20060       int Idx = SVN->getMaskElt(i);
20061       if (Idx >= (int)NumElts) Idx -= NumElts;
20062       NewMask.push_back(Idx);
20063     }
20064     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
20065   }
20066 
20067   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
20068   if (N0.isUndef())
20069     return DAG.getCommutedVectorShuffle(*SVN);
20070 
20071   // Remove references to rhs if it is undef
20072   if (N1.isUndef()) {
20073     bool Changed = false;
20074     SmallVector<int, 8> NewMask;
20075     for (unsigned i = 0; i != NumElts; ++i) {
20076       int Idx = SVN->getMaskElt(i);
20077       if (Idx >= (int)NumElts) {
20078         Idx = -1;
20079         Changed = true;
20080       }
20081       NewMask.push_back(Idx);
20082     }
20083     if (Changed)
20084       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
20085   }
20086 
20087   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
20088     return InsElt;
20089 
20090   // A shuffle of a single vector that is a splatted value can always be folded.
20091   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
20092     return V;
20093 
20094   if (SDValue V = formSplatFromShuffles(SVN, DAG))
20095     return V;
20096 
20097   // If it is a splat, check if the argument vector is another splat or a
20098   // build_vector.
20099   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
20100     int SplatIndex = SVN->getSplatIndex();
20101     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
20102         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
20103       // splat (vector_bo L, R), Index -->
20104       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
20105       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
20106       SDLoc DL(N);
20107       EVT EltVT = VT.getScalarType();
20108       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
20109       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
20110       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
20111       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
20112                                   N0.getNode()->getFlags());
20113       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
20114       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
20115       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
20116     }
20117 
20118     // If this is a bit convert that changes the element type of the vector but
20119     // not the number of vector elements, look through it.  Be careful not to
20120     // look though conversions that change things like v4f32 to v2f64.
20121     SDNode *V = N0.getNode();
20122     if (V->getOpcode() == ISD::BITCAST) {
20123       SDValue ConvInput = V->getOperand(0);
20124       if (ConvInput.getValueType().isVector() &&
20125           ConvInput.getValueType().getVectorNumElements() == NumElts)
20126         V = ConvInput.getNode();
20127     }
20128 
20129     if (V->getOpcode() == ISD::BUILD_VECTOR) {
20130       assert(V->getNumOperands() == NumElts &&
20131              "BUILD_VECTOR has wrong number of operands");
20132       SDValue Base;
20133       bool AllSame = true;
20134       for (unsigned i = 0; i != NumElts; ++i) {
20135         if (!V->getOperand(i).isUndef()) {
20136           Base = V->getOperand(i);
20137           break;
20138         }
20139       }
20140       // Splat of <u, u, u, u>, return <u, u, u, u>
20141       if (!Base.getNode())
20142         return N0;
20143       for (unsigned i = 0; i != NumElts; ++i) {
20144         if (V->getOperand(i) != Base) {
20145           AllSame = false;
20146           break;
20147         }
20148       }
20149       // Splat of <x, x, x, x>, return <x, x, x, x>
20150       if (AllSame)
20151         return N0;
20152 
20153       // Canonicalize any other splat as a build_vector.
20154       SDValue Splatted = V->getOperand(SplatIndex);
20155       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
20156       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
20157 
20158       // We may have jumped through bitcasts, so the type of the
20159       // BUILD_VECTOR may not match the type of the shuffle.
20160       if (V->getValueType(0) != VT)
20161         NewBV = DAG.getBitcast(VT, NewBV);
20162       return NewBV;
20163     }
20164   }
20165 
20166   // Simplify source operands based on shuffle mask.
20167   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20168     return SDValue(N, 0);
20169 
20170   // This is intentionally placed after demanded elements simplification because
20171   // it could eliminate knowledge of undef elements created by this shuffle.
20172   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
20173     return ShufOp;
20174 
20175   // Match shuffles that can be converted to any_vector_extend_in_reg.
20176   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
20177     return V;
20178 
20179   // Combine "truncate_vector_in_reg" style shuffles.
20180   if (SDValue V = combineTruncationShuffle(SVN, DAG))
20181     return V;
20182 
20183   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
20184       Level < AfterLegalizeVectorOps &&
20185       (N1.isUndef() ||
20186       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
20187        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
20188     if (SDValue V = partitionShuffleOfConcats(N, DAG))
20189       return V;
20190   }
20191 
20192   // A shuffle of a concat of the same narrow vector can be reduced to use
20193   // only low-half elements of a concat with undef:
20194   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
20195   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
20196       N0.getNumOperands() == 2 &&
20197       N0.getOperand(0) == N0.getOperand(1)) {
20198     int HalfNumElts = (int)NumElts / 2;
20199     SmallVector<int, 8> NewMask;
20200     for (unsigned i = 0; i != NumElts; ++i) {
20201       int Idx = SVN->getMaskElt(i);
20202       if (Idx >= HalfNumElts) {
20203         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
20204         Idx -= HalfNumElts;
20205       }
20206       NewMask.push_back(Idx);
20207     }
20208     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
20209       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
20210       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
20211                                    N0.getOperand(0), UndefVec);
20212       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
20213     }
20214   }
20215 
20216   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20217   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20218   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
20219     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
20220       return Res;
20221 
20222   // If this shuffle only has a single input that is a bitcasted shuffle,
20223   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
20224   // back to their original types.
20225   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
20226       N1.isUndef() && Level < AfterLegalizeVectorOps &&
20227       TLI.isTypeLegal(VT)) {
20228 
20229     SDValue BC0 = peekThroughOneUseBitcasts(N0);
20230     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
20231       EVT SVT = VT.getScalarType();
20232       EVT InnerVT = BC0->getValueType(0);
20233       EVT InnerSVT = InnerVT.getScalarType();
20234 
20235       // Determine which shuffle works with the smaller scalar type.
20236       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
20237       EVT ScaleSVT = ScaleVT.getScalarType();
20238 
20239       if (TLI.isTypeLegal(ScaleVT) &&
20240           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
20241           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
20242         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20243         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20244 
20245         // Scale the shuffle masks to the smaller scalar type.
20246         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
20247         SmallVector<int, 8> InnerMask;
20248         SmallVector<int, 8> OuterMask;
20249         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
20250         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
20251 
20252         // Merge the shuffle masks.
20253         SmallVector<int, 8> NewMask;
20254         for (int M : OuterMask)
20255           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
20256 
20257         // Test for shuffle mask legality over both commutations.
20258         SDValue SV0 = BC0->getOperand(0);
20259         SDValue SV1 = BC0->getOperand(1);
20260         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20261         if (!LegalMask) {
20262           std::swap(SV0, SV1);
20263           ShuffleVectorSDNode::commuteMask(NewMask);
20264           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20265         }
20266 
20267         if (LegalMask) {
20268           SV0 = DAG.getBitcast(ScaleVT, SV0);
20269           SV1 = DAG.getBitcast(ScaleVT, SV1);
20270           return DAG.getBitcast(
20271               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
20272         }
20273       }
20274     }
20275   }
20276 
20277   // Canonicalize shuffles according to rules:
20278   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
20279   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
20280   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
20281   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
20282       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
20283       TLI.isTypeLegal(VT)) {
20284     // The incoming shuffle must be of the same type as the result of the
20285     // current shuffle.
20286     assert(N1->getOperand(0).getValueType() == VT &&
20287            "Shuffle types don't match");
20288 
20289     SDValue SV0 = N1->getOperand(0);
20290     SDValue SV1 = N1->getOperand(1);
20291     bool HasSameOp0 = N0 == SV0;
20292     bool IsSV1Undef = SV1.isUndef();
20293     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
20294       // Commute the operands of this shuffle so that next rule
20295       // will trigger.
20296       return DAG.getCommutedVectorShuffle(*SVN);
20297   }
20298 
20299   // Try to fold according to rules:
20300   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20301   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20302   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20303   // Don't try to fold shuffles with illegal type.
20304   // Only fold if this shuffle is the only user of the other shuffle.
20305   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
20306       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
20307     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
20308 
20309     // Don't try to fold splats; they're likely to simplify somehow, or they
20310     // might be free.
20311     if (OtherSV->isSplat())
20312       return SDValue();
20313 
20314     // The incoming shuffle must be of the same type as the result of the
20315     // current shuffle.
20316     assert(OtherSV->getOperand(0).getValueType() == VT &&
20317            "Shuffle types don't match");
20318 
20319     SDValue SV0, SV1;
20320     SmallVector<int, 4> Mask;
20321     // Compute the combined shuffle mask for a shuffle with SV0 as the first
20322     // operand, and SV1 as the second operand.
20323     for (unsigned i = 0; i != NumElts; ++i) {
20324       int Idx = SVN->getMaskElt(i);
20325       if (Idx < 0) {
20326         // Propagate Undef.
20327         Mask.push_back(Idx);
20328         continue;
20329       }
20330 
20331       SDValue CurrentVec;
20332       if (Idx < (int)NumElts) {
20333         // This shuffle index refers to the inner shuffle N0. Lookup the inner
20334         // shuffle mask to identify which vector is actually referenced.
20335         Idx = OtherSV->getMaskElt(Idx);
20336         if (Idx < 0) {
20337           // Propagate Undef.
20338           Mask.push_back(Idx);
20339           continue;
20340         }
20341 
20342         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
20343                                            : OtherSV->getOperand(1);
20344       } else {
20345         // This shuffle index references an element within N1.
20346         CurrentVec = N1;
20347       }
20348 
20349       // Simple case where 'CurrentVec' is UNDEF.
20350       if (CurrentVec.isUndef()) {
20351         Mask.push_back(-1);
20352         continue;
20353       }
20354 
20355       // Canonicalize the shuffle index. We don't know yet if CurrentVec
20356       // will be the first or second operand of the combined shuffle.
20357       Idx = Idx % NumElts;
20358       if (!SV0.getNode() || SV0 == CurrentVec) {
20359         // Ok. CurrentVec is the left hand side.
20360         // Update the mask accordingly.
20361         SV0 = CurrentVec;
20362         Mask.push_back(Idx);
20363         continue;
20364       }
20365 
20366       // Bail out if we cannot convert the shuffle pair into a single shuffle.
20367       if (SV1.getNode() && SV1 != CurrentVec)
20368         return SDValue();
20369 
20370       // Ok. CurrentVec is the right hand side.
20371       // Update the mask accordingly.
20372       SV1 = CurrentVec;
20373       Mask.push_back(Idx + NumElts);
20374     }
20375 
20376     // Check if all indices in Mask are Undef. In case, propagate Undef.
20377     bool isUndefMask = true;
20378     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
20379       isUndefMask &= Mask[i] < 0;
20380 
20381     if (isUndefMask)
20382       return DAG.getUNDEF(VT);
20383 
20384     if (!SV0.getNode())
20385       SV0 = DAG.getUNDEF(VT);
20386     if (!SV1.getNode())
20387       SV1 = DAG.getUNDEF(VT);
20388 
20389     // Avoid introducing shuffles with illegal mask.
20390     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20391     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20392     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20393     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
20394     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
20395     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
20396     return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
20397   }
20398 
20399   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
20400     return V;
20401 
20402   return SDValue();
20403 }
20404 
20405 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
20406   SDValue InVal = N->getOperand(0);
20407   EVT VT = N->getValueType(0);
20408 
20409   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
20410   // with a VECTOR_SHUFFLE and possible truncate.
20411   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20412       VT.isFixedLengthVector() &&
20413       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
20414     SDValue InVec = InVal->getOperand(0);
20415     SDValue EltNo = InVal->getOperand(1);
20416     auto InVecT = InVec.getValueType();
20417     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
20418       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
20419       int Elt = C0->getZExtValue();
20420       NewMask[0] = Elt;
20421       // If we have an implict truncate do truncate here as long as it's legal.
20422       // if it's not legal, this should
20423       if (VT.getScalarType() != InVal.getValueType() &&
20424           InVal.getValueType().isScalarInteger() &&
20425           isTypeLegal(VT.getScalarType())) {
20426         SDValue Val =
20427             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
20428         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
20429       }
20430       if (VT.getScalarType() == InVecT.getScalarType() &&
20431           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
20432         SDValue LegalShuffle =
20433           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
20434                                       DAG.getUNDEF(InVecT), NewMask, DAG);
20435         if (LegalShuffle) {
20436           // If the initial vector is the correct size this shuffle is a
20437           // valid result.
20438           if (VT == InVecT)
20439             return LegalShuffle;
20440           // If not we must truncate the vector.
20441           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
20442             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
20443             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
20444                                          InVecT.getVectorElementType(),
20445                                          VT.getVectorNumElements());
20446             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
20447                                LegalShuffle, ZeroIdx);
20448           }
20449         }
20450       }
20451     }
20452   }
20453 
20454   return SDValue();
20455 }
20456 
20457 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
20458   EVT VT = N->getValueType(0);
20459   SDValue N0 = N->getOperand(0);
20460   SDValue N1 = N->getOperand(1);
20461   SDValue N2 = N->getOperand(2);
20462   uint64_t InsIdx = N->getConstantOperandVal(2);
20463 
20464   // If inserting an UNDEF, just return the original vector.
20465   if (N1.isUndef())
20466     return N0;
20467 
20468   // If this is an insert of an extracted vector into an undef vector, we can
20469   // just use the input to the extract.
20470   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20471       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
20472     return N1.getOperand(0);
20473 
20474   // If we are inserting a bitcast value into an undef, with the same
20475   // number of elements, just use the bitcast input of the extract.
20476   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
20477   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
20478   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
20479       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20480       N1.getOperand(0).getOperand(1) == N2 &&
20481       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
20482           VT.getVectorNumElements() &&
20483       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
20484           VT.getSizeInBits()) {
20485     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
20486   }
20487 
20488   // If both N1 and N2 are bitcast values on which insert_subvector
20489   // would makes sense, pull the bitcast through.
20490   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
20491   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
20492   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
20493     SDValue CN0 = N0.getOperand(0);
20494     SDValue CN1 = N1.getOperand(0);
20495     EVT CN0VT = CN0.getValueType();
20496     EVT CN1VT = CN1.getValueType();
20497     if (CN0VT.isVector() && CN1VT.isVector() &&
20498         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
20499         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
20500       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
20501                                       CN0.getValueType(), CN0, CN1, N2);
20502       return DAG.getBitcast(VT, NewINSERT);
20503     }
20504   }
20505 
20506   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
20507   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
20508   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
20509   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
20510       N0.getOperand(1).getValueType() == N1.getValueType() &&
20511       N0.getOperand(2) == N2)
20512     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
20513                        N1, N2);
20514 
20515   // Eliminate an intermediate insert into an undef vector:
20516   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
20517   // insert_subvector undef, X, N2
20518   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
20519       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
20520     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
20521                        N1.getOperand(1), N2);
20522 
20523   // Push subvector bitcasts to the output, adjusting the index as we go.
20524   // insert_subvector(bitcast(v), bitcast(s), c1)
20525   // -> bitcast(insert_subvector(v, s, c2))
20526   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
20527       N1.getOpcode() == ISD::BITCAST) {
20528     SDValue N0Src = peekThroughBitcasts(N0);
20529     SDValue N1Src = peekThroughBitcasts(N1);
20530     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
20531     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
20532     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
20533         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
20534       EVT NewVT;
20535       SDLoc DL(N);
20536       SDValue NewIdx;
20537       LLVMContext &Ctx = *DAG.getContext();
20538       unsigned NumElts = VT.getVectorNumElements();
20539       unsigned EltSizeInBits = VT.getScalarSizeInBits();
20540       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
20541         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
20542         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
20543         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
20544       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
20545         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
20546         if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
20547           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
20548           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
20549         }
20550       }
20551       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
20552         SDValue Res = DAG.getBitcast(NewVT, N0Src);
20553         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
20554         return DAG.getBitcast(VT, Res);
20555       }
20556     }
20557   }
20558 
20559   // Canonicalize insert_subvector dag nodes.
20560   // Example:
20561   // (insert_subvector (insert_subvector A, Idx0), Idx1)
20562   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
20563   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
20564       N1.getValueType() == N0.getOperand(1).getValueType()) {
20565     unsigned OtherIdx = N0.getConstantOperandVal(2);
20566     if (InsIdx < OtherIdx) {
20567       // Swap nodes.
20568       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
20569                                   N0.getOperand(0), N1, N2);
20570       AddToWorklist(NewOp.getNode());
20571       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
20572                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
20573     }
20574   }
20575 
20576   // If the input vector is a concatenation, and the insert replaces
20577   // one of the pieces, we can optimize into a single concat_vectors.
20578   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
20579       N0.getOperand(0).getValueType() == N1.getValueType()) {
20580     unsigned Factor = N1.getValueType().getVectorNumElements();
20581     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
20582     Ops[InsIdx / Factor] = N1;
20583     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20584   }
20585 
20586   // Simplify source operands based on insertion.
20587   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20588     return SDValue(N, 0);
20589 
20590   return SDValue();
20591 }
20592 
20593 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
20594   SDValue N0 = N->getOperand(0);
20595 
20596   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
20597   if (N0->getOpcode() == ISD::FP16_TO_FP)
20598     return N0->getOperand(0);
20599 
20600   return SDValue();
20601 }
20602 
20603 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
20604   SDValue N0 = N->getOperand(0);
20605 
20606   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
20607   if (N0->getOpcode() == ISD::AND) {
20608     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
20609     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
20610       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
20611                          N0.getOperand(0));
20612     }
20613   }
20614 
20615   return SDValue();
20616 }
20617 
20618 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
20619   SDValue N0 = N->getOperand(0);
20620   EVT VT = N0.getValueType();
20621   unsigned Opcode = N->getOpcode();
20622 
20623   // VECREDUCE over 1-element vector is just an extract.
20624   if (VT.getVectorNumElements() == 1) {
20625     SDLoc dl(N);
20626     SDValue Res =
20627         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
20628                     DAG.getVectorIdxConstant(0, dl));
20629     if (Res.getValueType() != N->getValueType(0))
20630       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
20631     return Res;
20632   }
20633 
20634   // On an boolean vector an and/or reduction is the same as a umin/umax
20635   // reduction. Convert them if the latter is legal while the former isn't.
20636   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
20637     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
20638         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
20639     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
20640         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
20641         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
20642       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
20643   }
20644 
20645   return SDValue();
20646 }
20647 
20648 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
20649 /// with the destination vector and a zero vector.
20650 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
20651 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
20652 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
20653   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
20654 
20655   EVT VT = N->getValueType(0);
20656   SDValue LHS = N->getOperand(0);
20657   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
20658   SDLoc DL(N);
20659 
20660   // Make sure we're not running after operation legalization where it
20661   // may have custom lowered the vector shuffles.
20662   if (LegalOperations)
20663     return SDValue();
20664 
20665   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
20666     return SDValue();
20667 
20668   EVT RVT = RHS.getValueType();
20669   unsigned NumElts = RHS.getNumOperands();
20670 
20671   // Attempt to create a valid clear mask, splitting the mask into
20672   // sub elements and checking to see if each is
20673   // all zeros or all ones - suitable for shuffle masking.
20674   auto BuildClearMask = [&](int Split) {
20675     int NumSubElts = NumElts * Split;
20676     int NumSubBits = RVT.getScalarSizeInBits() / Split;
20677 
20678     SmallVector<int, 8> Indices;
20679     for (int i = 0; i != NumSubElts; ++i) {
20680       int EltIdx = i / Split;
20681       int SubIdx = i % Split;
20682       SDValue Elt = RHS.getOperand(EltIdx);
20683       // X & undef --> 0 (not undef). So this lane must be converted to choose
20684       // from the zero constant vector (same as if the element had all 0-bits).
20685       if (Elt.isUndef()) {
20686         Indices.push_back(i + NumSubElts);
20687         continue;
20688       }
20689 
20690       APInt Bits;
20691       if (isa<ConstantSDNode>(Elt))
20692         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
20693       else if (isa<ConstantFPSDNode>(Elt))
20694         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
20695       else
20696         return SDValue();
20697 
20698       // Extract the sub element from the constant bit mask.
20699       if (DAG.getDataLayout().isBigEndian())
20700         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
20701       else
20702         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
20703 
20704       if (Bits.isAllOnesValue())
20705         Indices.push_back(i);
20706       else if (Bits == 0)
20707         Indices.push_back(i + NumSubElts);
20708       else
20709         return SDValue();
20710     }
20711 
20712     // Let's see if the target supports this vector_shuffle.
20713     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
20714     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
20715     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
20716       return SDValue();
20717 
20718     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
20719     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
20720                                                    DAG.getBitcast(ClearVT, LHS),
20721                                                    Zero, Indices));
20722   };
20723 
20724   // Determine maximum split level (byte level masking).
20725   int MaxSplit = 1;
20726   if (RVT.getScalarSizeInBits() % 8 == 0)
20727     MaxSplit = RVT.getScalarSizeInBits() / 8;
20728 
20729   for (int Split = 1; Split <= MaxSplit; ++Split)
20730     if (RVT.getScalarSizeInBits() % Split == 0)
20731       if (SDValue S = BuildClearMask(Split))
20732         return S;
20733 
20734   return SDValue();
20735 }
20736 
20737 /// If a vector binop is performed on splat values, it may be profitable to
20738 /// extract, scalarize, and insert/splat.
20739 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
20740   SDValue N0 = N->getOperand(0);
20741   SDValue N1 = N->getOperand(1);
20742   unsigned Opcode = N->getOpcode();
20743   EVT VT = N->getValueType(0);
20744   EVT EltVT = VT.getVectorElementType();
20745   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20746 
20747   // TODO: Remove/replace the extract cost check? If the elements are available
20748   //       as scalars, then there may be no extract cost. Should we ask if
20749   //       inserting a scalar back into a vector is cheap instead?
20750   int Index0, Index1;
20751   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
20752   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
20753   if (!Src0 || !Src1 || Index0 != Index1 ||
20754       Src0.getValueType().getVectorElementType() != EltVT ||
20755       Src1.getValueType().getVectorElementType() != EltVT ||
20756       !TLI.isExtractVecEltCheap(VT, Index0) ||
20757       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
20758     return SDValue();
20759 
20760   SDLoc DL(N);
20761   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
20762   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
20763   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
20764   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
20765 
20766   // If all lanes but 1 are undefined, no need to splat the scalar result.
20767   // TODO: Keep track of undefs and use that info in the general case.
20768   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
20769       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
20770       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
20771     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
20772     // build_vec ..undef, (bo X, Y), undef...
20773     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
20774     Ops[Index0] = ScalarBO;
20775     return DAG.getBuildVector(VT, DL, Ops);
20776   }
20777 
20778   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
20779   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
20780   return DAG.getBuildVector(VT, DL, Ops);
20781 }
20782 
20783 /// Visit a binary vector operation, like ADD.
20784 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
20785   assert(N->getValueType(0).isVector() &&
20786          "SimplifyVBinOp only works on vectors!");
20787 
20788   SDValue LHS = N->getOperand(0);
20789   SDValue RHS = N->getOperand(1);
20790   SDValue Ops[] = {LHS, RHS};
20791   EVT VT = N->getValueType(0);
20792   unsigned Opcode = N->getOpcode();
20793   SDNodeFlags Flags = N->getFlags();
20794 
20795   // See if we can constant fold the vector operation.
20796   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
20797           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
20798     return Fold;
20799 
20800   // Move unary shuffles with identical masks after a vector binop:
20801   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
20802   //   --> shuffle (VBinOp A, B), Undef, Mask
20803   // This does not require type legality checks because we are creating the
20804   // same types of operations that are in the original sequence. We do have to
20805   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
20806   // though. This code is adapted from the identical transform in instcombine.
20807   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
20808       Opcode != ISD::UREM && Opcode != ISD::SREM &&
20809       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
20810     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
20811     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
20812     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
20813         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
20814         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
20815       SDLoc DL(N);
20816       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
20817                                      RHS.getOperand(0), Flags);
20818       SDValue UndefV = LHS.getOperand(1);
20819       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
20820     }
20821 
20822     // Try to sink a splat shuffle after a binop with a uniform constant.
20823     // This is limited to cases where neither the shuffle nor the constant have
20824     // undefined elements because that could be poison-unsafe or inhibit
20825     // demanded elements analysis. It is further limited to not change a splat
20826     // of an inserted scalar because that may be optimized better by
20827     // load-folding or other target-specific behaviors.
20828     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
20829         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
20830         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
20831       // binop (splat X), (splat C) --> splat (binop X, C)
20832       SDLoc DL(N);
20833       SDValue X = Shuf0->getOperand(0);
20834       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
20835       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
20836                                   Shuf0->getMask());
20837     }
20838     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
20839         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
20840         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
20841       // binop (splat C), (splat X) --> splat (binop C, X)
20842       SDLoc DL(N);
20843       SDValue X = Shuf1->getOperand(0);
20844       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
20845       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
20846                                   Shuf1->getMask());
20847     }
20848   }
20849 
20850   // The following pattern is likely to emerge with vector reduction ops. Moving
20851   // the binary operation ahead of insertion may allow using a narrower vector
20852   // instruction that has better performance than the wide version of the op:
20853   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
20854   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
20855       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
20856       LHS.getOperand(2) == RHS.getOperand(2) &&
20857       (LHS.hasOneUse() || RHS.hasOneUse())) {
20858     SDValue X = LHS.getOperand(1);
20859     SDValue Y = RHS.getOperand(1);
20860     SDValue Z = LHS.getOperand(2);
20861     EVT NarrowVT = X.getValueType();
20862     if (NarrowVT == Y.getValueType() &&
20863         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20864       // (binop undef, undef) may not return undef, so compute that result.
20865       SDLoc DL(N);
20866       SDValue VecC =
20867           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
20868       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
20869       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
20870     }
20871   }
20872 
20873   // Make sure all but the first op are undef or constant.
20874   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
20875     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
20876            std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
20877                      [](const SDValue &Op) {
20878                        return Op.isUndef() ||
20879                               ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
20880                      });
20881   };
20882 
20883   // The following pattern is likely to emerge with vector reduction ops. Moving
20884   // the binary operation ahead of the concat may allow using a narrower vector
20885   // instruction that has better performance than the wide version of the op:
20886   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
20887   //   concat (VBinOp X, Y), VecC
20888   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
20889       (LHS.hasOneUse() || RHS.hasOneUse())) {
20890     EVT NarrowVT = LHS.getOperand(0).getValueType();
20891     if (NarrowVT == RHS.getOperand(0).getValueType() &&
20892         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20893       SDLoc DL(N);
20894       unsigned NumOperands = LHS.getNumOperands();
20895       SmallVector<SDValue, 4> ConcatOps;
20896       for (unsigned i = 0; i != NumOperands; ++i) {
20897         // This constant fold for operands 1 and up.
20898         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
20899                                         RHS.getOperand(i)));
20900       }
20901 
20902       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20903     }
20904   }
20905 
20906   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
20907     return V;
20908 
20909   return SDValue();
20910 }
20911 
20912 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
20913                                     SDValue N2) {
20914   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
20915 
20916   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
20917                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
20918 
20919   // If we got a simplified select_cc node back from SimplifySelectCC, then
20920   // break it down into a new SETCC node, and a new SELECT node, and then return
20921   // the SELECT node, since we were called with a SELECT node.
20922   if (SCC.getNode()) {
20923     // Check to see if we got a select_cc back (to turn into setcc/select).
20924     // Otherwise, just return whatever node we got back, like fabs.
20925     if (SCC.getOpcode() == ISD::SELECT_CC) {
20926       const SDNodeFlags Flags = N0.getNode()->getFlags();
20927       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
20928                                   N0.getValueType(),
20929                                   SCC.getOperand(0), SCC.getOperand(1),
20930                                   SCC.getOperand(4), Flags);
20931       AddToWorklist(SETCC.getNode());
20932       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
20933                                          SCC.getOperand(2), SCC.getOperand(3));
20934       SelectNode->setFlags(Flags);
20935       return SelectNode;
20936     }
20937 
20938     return SCC;
20939   }
20940   return SDValue();
20941 }
20942 
20943 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
20944 /// being selected between, see if we can simplify the select.  Callers of this
20945 /// should assume that TheSelect is deleted if this returns true.  As such, they
20946 /// should return the appropriate thing (e.g. the node) back to the top-level of
20947 /// the DAG combiner loop to avoid it being looked at.
20948 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
20949                                     SDValue RHS) {
20950   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20951   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
20952   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
20953     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
20954       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
20955       SDValue Sqrt = RHS;
20956       ISD::CondCode CC;
20957       SDValue CmpLHS;
20958       const ConstantFPSDNode *Zero = nullptr;
20959 
20960       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
20961         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
20962         CmpLHS = TheSelect->getOperand(0);
20963         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
20964       } else {
20965         // SELECT or VSELECT
20966         SDValue Cmp = TheSelect->getOperand(0);
20967         if (Cmp.getOpcode() == ISD::SETCC) {
20968           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
20969           CmpLHS = Cmp.getOperand(0);
20970           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
20971         }
20972       }
20973       if (Zero && Zero->isZero() &&
20974           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
20975           CC == ISD::SETULT || CC == ISD::SETLT)) {
20976         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20977         CombineTo(TheSelect, Sqrt);
20978         return true;
20979       }
20980     }
20981   }
20982   // Cannot simplify select with vector condition
20983   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
20984 
20985   // If this is a select from two identical things, try to pull the operation
20986   // through the select.
20987   if (LHS.getOpcode() != RHS.getOpcode() ||
20988       !LHS.hasOneUse() || !RHS.hasOneUse())
20989     return false;
20990 
20991   // If this is a load and the token chain is identical, replace the select
20992   // of two loads with a load through a select of the address to load from.
20993   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
20994   // constants have been dropped into the constant pool.
20995   if (LHS.getOpcode() == ISD::LOAD) {
20996     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
20997     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
20998 
20999     // Token chains must be identical.
21000     if (LHS.getOperand(0) != RHS.getOperand(0) ||
21001         // Do not let this transformation reduce the number of volatile loads.
21002         // Be conservative for atomics for the moment
21003         // TODO: This does appear to be legal for unordered atomics (see D66309)
21004         !LLD->isSimple() || !RLD->isSimple() ||
21005         // FIXME: If either is a pre/post inc/dec load,
21006         // we'd need to split out the address adjustment.
21007         LLD->isIndexed() || RLD->isIndexed() ||
21008         // If this is an EXTLOAD, the VT's must match.
21009         LLD->getMemoryVT() != RLD->getMemoryVT() ||
21010         // If this is an EXTLOAD, the kind of extension must match.
21011         (LLD->getExtensionType() != RLD->getExtensionType() &&
21012          // The only exception is if one of the extensions is anyext.
21013          LLD->getExtensionType() != ISD::EXTLOAD &&
21014          RLD->getExtensionType() != ISD::EXTLOAD) ||
21015         // FIXME: this discards src value information.  This is
21016         // over-conservative. It would be beneficial to be able to remember
21017         // both potential memory locations.  Since we are discarding
21018         // src value info, don't do the transformation if the memory
21019         // locations are not in the default address space.
21020         LLD->getPointerInfo().getAddrSpace() != 0 ||
21021         RLD->getPointerInfo().getAddrSpace() != 0 ||
21022         // We can't produce a CMOV of a TargetFrameIndex since we won't
21023         // generate the address generation required.
21024         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21025         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21026         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
21027                                       LLD->getBasePtr().getValueType()))
21028       return false;
21029 
21030     // The loads must not depend on one another.
21031     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
21032       return false;
21033 
21034     // Check that the select condition doesn't reach either load.  If so,
21035     // folding this will induce a cycle into the DAG.  If not, this is safe to
21036     // xform, so create a select of the addresses.
21037 
21038     SmallPtrSet<const SDNode *, 32> Visited;
21039     SmallVector<const SDNode *, 16> Worklist;
21040 
21041     // Always fail if LLD and RLD are not independent. TheSelect is a
21042     // predecessor to all Nodes in question so we need not search past it.
21043 
21044     Visited.insert(TheSelect);
21045     Worklist.push_back(LLD);
21046     Worklist.push_back(RLD);
21047 
21048     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
21049         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
21050       return false;
21051 
21052     SDValue Addr;
21053     if (TheSelect->getOpcode() == ISD::SELECT) {
21054       // We cannot do this optimization if any pair of {RLD, LLD} is a
21055       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
21056       // Loads, we only need to check if CondNode is a successor to one of the
21057       // loads. We can further avoid this if there's no use of their chain
21058       // value.
21059       SDNode *CondNode = TheSelect->getOperand(0).getNode();
21060       Worklist.push_back(CondNode);
21061 
21062       if ((LLD->hasAnyUseOfValue(1) &&
21063            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21064           (RLD->hasAnyUseOfValue(1) &&
21065            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21066         return false;
21067 
21068       Addr = DAG.getSelect(SDLoc(TheSelect),
21069                            LLD->getBasePtr().getValueType(),
21070                            TheSelect->getOperand(0), LLD->getBasePtr(),
21071                            RLD->getBasePtr());
21072     } else {  // Otherwise SELECT_CC
21073       // We cannot do this optimization if any pair of {RLD, LLD} is a
21074       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
21075       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
21076       // one of the loads. We can further avoid this if there's no use of their
21077       // chain value.
21078 
21079       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
21080       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
21081       Worklist.push_back(CondLHS);
21082       Worklist.push_back(CondRHS);
21083 
21084       if ((LLD->hasAnyUseOfValue(1) &&
21085            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21086           (RLD->hasAnyUseOfValue(1) &&
21087            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21088         return false;
21089 
21090       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
21091                          LLD->getBasePtr().getValueType(),
21092                          TheSelect->getOperand(0),
21093                          TheSelect->getOperand(1),
21094                          LLD->getBasePtr(), RLD->getBasePtr(),
21095                          TheSelect->getOperand(4));
21096     }
21097 
21098     SDValue Load;
21099     // It is safe to replace the two loads if they have different alignments,
21100     // but the new load must be the minimum (most restrictive) alignment of the
21101     // inputs.
21102     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
21103     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
21104     if (!RLD->isInvariant())
21105       MMOFlags &= ~MachineMemOperand::MOInvariant;
21106     if (!RLD->isDereferenceable())
21107       MMOFlags &= ~MachineMemOperand::MODereferenceable;
21108     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
21109       // FIXME: Discards pointer and AA info.
21110       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
21111                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
21112                          MMOFlags);
21113     } else {
21114       // FIXME: Discards pointer and AA info.
21115       Load = DAG.getExtLoad(
21116           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
21117                                                   : LLD->getExtensionType(),
21118           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
21119           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
21120     }
21121 
21122     // Users of the select now use the result of the load.
21123     CombineTo(TheSelect, Load);
21124 
21125     // Users of the old loads now use the new load's chain.  We know the
21126     // old-load value is dead now.
21127     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
21128     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
21129     return true;
21130   }
21131 
21132   return false;
21133 }
21134 
21135 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
21136 /// bitwise 'and'.
21137 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
21138                                             SDValue N1, SDValue N2, SDValue N3,
21139                                             ISD::CondCode CC) {
21140   // If this is a select where the false operand is zero and the compare is a
21141   // check of the sign bit, see if we can perform the "gzip trick":
21142   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
21143   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
21144   EVT XType = N0.getValueType();
21145   EVT AType = N2.getValueType();
21146   if (!isNullConstant(N3) || !XType.bitsGE(AType))
21147     return SDValue();
21148 
21149   // If the comparison is testing for a positive value, we have to invert
21150   // the sign bit mask, so only do that transform if the target has a bitwise
21151   // 'and not' instruction (the invert is free).
21152   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
21153     // (X > -1) ? A : 0
21154     // (X >  0) ? X : 0 <-- This is canonical signed max.
21155     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
21156       return SDValue();
21157   } else if (CC == ISD::SETLT) {
21158     // (X <  0) ? A : 0
21159     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
21160     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
21161       return SDValue();
21162   } else {
21163     return SDValue();
21164   }
21165 
21166   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
21167   // constant.
21168   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
21169   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21170   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
21171     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
21172     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
21173       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21174       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
21175       AddToWorklist(Shift.getNode());
21176 
21177       if (XType.bitsGT(AType)) {
21178         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21179         AddToWorklist(Shift.getNode());
21180       }
21181 
21182       if (CC == ISD::SETGT)
21183         Shift = DAG.getNOT(DL, Shift, AType);
21184 
21185       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21186     }
21187   }
21188 
21189   unsigned ShCt = XType.getSizeInBits() - 1;
21190   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
21191     return SDValue();
21192 
21193   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21194   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
21195   AddToWorklist(Shift.getNode());
21196 
21197   if (XType.bitsGT(AType)) {
21198     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21199     AddToWorklist(Shift.getNode());
21200   }
21201 
21202   if (CC == ISD::SETGT)
21203     Shift = DAG.getNOT(DL, Shift, AType);
21204 
21205   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21206 }
21207 
21208 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
21209 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
21210 /// in it. This may be a win when the constant is not otherwise available
21211 /// because it replaces two constant pool loads with one.
21212 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
21213     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
21214     ISD::CondCode CC) {
21215   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
21216     return SDValue();
21217 
21218   // If we are before legalize types, we want the other legalization to happen
21219   // first (for example, to avoid messing with soft float).
21220   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
21221   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
21222   EVT VT = N2.getValueType();
21223   if (!TV || !FV || !TLI.isTypeLegal(VT))
21224     return SDValue();
21225 
21226   // If a constant can be materialized without loads, this does not make sense.
21227   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
21228       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
21229       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
21230     return SDValue();
21231 
21232   // If both constants have multiple uses, then we won't need to do an extra
21233   // load. The values are likely around in registers for other users.
21234   if (!TV->hasOneUse() && !FV->hasOneUse())
21235     return SDValue();
21236 
21237   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
21238                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
21239   Type *FPTy = Elts[0]->getType();
21240   const DataLayout &TD = DAG.getDataLayout();
21241 
21242   // Create a ConstantArray of the two constants.
21243   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
21244   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
21245                                       TD.getPrefTypeAlign(FPTy));
21246   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
21247 
21248   // Get offsets to the 0 and 1 elements of the array, so we can select between
21249   // them.
21250   SDValue Zero = DAG.getIntPtrConstant(0, DL);
21251   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
21252   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
21253   SDValue Cond =
21254       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
21255   AddToWorklist(Cond.getNode());
21256   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
21257   AddToWorklist(CstOffset.getNode());
21258   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
21259   AddToWorklist(CPIdx.getNode());
21260   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
21261                      MachinePointerInfo::getConstantPool(
21262                          DAG.getMachineFunction()), Alignment);
21263 }
21264 
21265 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
21266 /// where 'cond' is the comparison specified by CC.
21267 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
21268                                       SDValue N2, SDValue N3, ISD::CondCode CC,
21269                                       bool NotExtCompare) {
21270   // (x ? y : y) -> y.
21271   if (N2 == N3) return N2;
21272 
21273   EVT CmpOpVT = N0.getValueType();
21274   EVT CmpResVT = getSetCCResultType(CmpOpVT);
21275   EVT VT = N2.getValueType();
21276   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
21277   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21278   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
21279 
21280   // Determine if the condition we're dealing with is constant.
21281   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
21282     AddToWorklist(SCC.getNode());
21283     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
21284       // fold select_cc true, x, y -> x
21285       // fold select_cc false, x, y -> y
21286       return !(SCCC->isNullValue()) ? N2 : N3;
21287     }
21288   }
21289 
21290   if (SDValue V =
21291           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
21292     return V;
21293 
21294   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
21295     return V;
21296 
21297   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
21298   // where y is has a single bit set.
21299   // A plaintext description would be, we can turn the SELECT_CC into an AND
21300   // when the condition can be materialized as an all-ones register.  Any
21301   // single bit-test can be materialized as an all-ones register with
21302   // shift-left and shift-right-arith.
21303   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
21304       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
21305     SDValue AndLHS = N0->getOperand(0);
21306     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21307     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
21308       // Shift the tested bit over the sign bit.
21309       const APInt &AndMask = ConstAndRHS->getAPIntValue();
21310       unsigned ShCt = AndMask.getBitWidth() - 1;
21311       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
21312         SDValue ShlAmt =
21313           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
21314                           getShiftAmountTy(AndLHS.getValueType()));
21315         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
21316 
21317         // Now arithmetic right shift it all the way over, so the result is
21318         // either all-ones, or zero.
21319         SDValue ShrAmt =
21320           DAG.getConstant(ShCt, SDLoc(Shl),
21321                           getShiftAmountTy(Shl.getValueType()));
21322         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
21323 
21324         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
21325       }
21326     }
21327   }
21328 
21329   // fold select C, 16, 0 -> shl C, 4
21330   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
21331   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
21332 
21333   if ((Fold || Swap) &&
21334       TLI.getBooleanContents(CmpOpVT) ==
21335           TargetLowering::ZeroOrOneBooleanContent &&
21336       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
21337 
21338     if (Swap) {
21339       CC = ISD::getSetCCInverse(CC, CmpOpVT);
21340       std::swap(N2C, N3C);
21341     }
21342 
21343     // If the caller doesn't want us to simplify this into a zext of a compare,
21344     // don't do it.
21345     if (NotExtCompare && N2C->isOne())
21346       return SDValue();
21347 
21348     SDValue Temp, SCC;
21349     // zext (setcc n0, n1)
21350     if (LegalTypes) {
21351       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
21352       if (VT.bitsLT(SCC.getValueType()))
21353         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
21354       else
21355         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21356     } else {
21357       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
21358       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21359     }
21360 
21361     AddToWorklist(SCC.getNode());
21362     AddToWorklist(Temp.getNode());
21363 
21364     if (N2C->isOne())
21365       return Temp;
21366 
21367     unsigned ShCt = N2C->getAPIntValue().logBase2();
21368     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
21369       return SDValue();
21370 
21371     // shl setcc result by log2 n2c
21372     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
21373                        DAG.getConstant(ShCt, SDLoc(Temp),
21374                                        getShiftAmountTy(Temp.getValueType())));
21375   }
21376 
21377   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
21378   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
21379   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
21380   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
21381   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
21382   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
21383   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
21384   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
21385   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
21386     SDValue ValueOnZero = N2;
21387     SDValue Count = N3;
21388     // If the condition is NE instead of E, swap the operands.
21389     if (CC == ISD::SETNE)
21390       std::swap(ValueOnZero, Count);
21391     // Check if the value on zero is a constant equal to the bits in the type.
21392     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
21393       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
21394         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
21395         // legal, combine to just cttz.
21396         if ((Count.getOpcode() == ISD::CTTZ ||
21397              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
21398             N0 == Count.getOperand(0) &&
21399             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
21400           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
21401         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
21402         // legal, combine to just ctlz.
21403         if ((Count.getOpcode() == ISD::CTLZ ||
21404              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
21405             N0 == Count.getOperand(0) &&
21406             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
21407           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
21408       }
21409     }
21410   }
21411 
21412   return SDValue();
21413 }
21414 
21415 /// This is a stub for TargetLowering::SimplifySetCC.
21416 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
21417                                    ISD::CondCode Cond, const SDLoc &DL,
21418                                    bool foldBooleans) {
21419   TargetLowering::DAGCombinerInfo
21420     DagCombineInfo(DAG, Level, false, this);
21421   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
21422 }
21423 
21424 /// Given an ISD::SDIV node expressing a divide by constant, return
21425 /// a DAG expression to select that will generate the same value by multiplying
21426 /// by a magic number.
21427 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
21428 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
21429   // when optimising for minimum size, we don't want to expand a div to a mul
21430   // and a shift.
21431   if (DAG.getMachineFunction().getFunction().hasMinSize())
21432     return SDValue();
21433 
21434   SmallVector<SDNode *, 8> Built;
21435   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
21436     for (SDNode *N : Built)
21437       AddToWorklist(N);
21438     return S;
21439   }
21440 
21441   return SDValue();
21442 }
21443 
21444 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
21445 /// DAG expression that will generate the same value by right shifting.
21446 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
21447   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
21448   if (!C)
21449     return SDValue();
21450 
21451   // Avoid division by zero.
21452   if (C->isNullValue())
21453     return SDValue();
21454 
21455   SmallVector<SDNode *, 8> Built;
21456   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
21457     for (SDNode *N : Built)
21458       AddToWorklist(N);
21459     return S;
21460   }
21461 
21462   return SDValue();
21463 }
21464 
21465 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
21466 /// expression that will generate the same value by multiplying by a magic
21467 /// number.
21468 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
21469 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
21470   // when optimising for minimum size, we don't want to expand a div to a mul
21471   // and a shift.
21472   if (DAG.getMachineFunction().getFunction().hasMinSize())
21473     return SDValue();
21474 
21475   SmallVector<SDNode *, 8> Built;
21476   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
21477     for (SDNode *N : Built)
21478       AddToWorklist(N);
21479     return S;
21480   }
21481 
21482   return SDValue();
21483 }
21484 
21485 /// Determines the LogBase2 value for a non-null input value using the
21486 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
21487 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
21488   EVT VT = V.getValueType();
21489   unsigned EltBits = VT.getScalarSizeInBits();
21490   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
21491   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
21492   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
21493   return LogBase2;
21494 }
21495 
21496 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21497 /// For the reciprocal, we need to find the zero of the function:
21498 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
21499 ///     =>
21500 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
21501 ///     does not require additional intermediate precision]
21502 /// For the last iteration, put numerator N into it to gain more precision:
21503 ///   Result = N X_i + X_i (N - N A X_i)
21504 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
21505                                       SDNodeFlags Flags) {
21506   if (LegalDAG)
21507     return SDValue();
21508 
21509   // TODO: Handle half and/or extended types?
21510   EVT VT = Op.getValueType();
21511   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
21512     return SDValue();
21513 
21514   // If estimates are explicitly disabled for this function, we're done.
21515   MachineFunction &MF = DAG.getMachineFunction();
21516   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
21517   if (Enabled == TLI.ReciprocalEstimate::Disabled)
21518     return SDValue();
21519 
21520   // Estimates may be explicitly enabled for this type with a custom number of
21521   // refinement steps.
21522   int Iterations = TLI.getDivRefinementSteps(VT, MF);
21523   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
21524     AddToWorklist(Est.getNode());
21525 
21526     SDLoc DL(Op);
21527     if (Iterations) {
21528       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
21529 
21530       // Newton iterations: Est = Est + Est (N - Arg * Est)
21531       // If this is the last iteration, also multiply by the numerator.
21532       for (int i = 0; i < Iterations; ++i) {
21533         SDValue MulEst = Est;
21534 
21535         if (i == Iterations - 1) {
21536           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
21537           AddToWorklist(MulEst.getNode());
21538         }
21539 
21540         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
21541         AddToWorklist(NewEst.getNode());
21542 
21543         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
21544                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
21545         AddToWorklist(NewEst.getNode());
21546 
21547         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
21548         AddToWorklist(NewEst.getNode());
21549 
21550         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
21551         AddToWorklist(Est.getNode());
21552       }
21553     } else {
21554       // If no iterations are available, multiply with N.
21555       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
21556       AddToWorklist(Est.getNode());
21557     }
21558 
21559     return Est;
21560   }
21561 
21562   return SDValue();
21563 }
21564 
21565 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21566 /// For the reciprocal sqrt, we need to find the zero of the function:
21567 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
21568 ///     =>
21569 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
21570 /// As a result, we precompute A/2 prior to the iteration loop.
21571 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
21572                                          unsigned Iterations,
21573                                          SDNodeFlags Flags, bool Reciprocal) {
21574   EVT VT = Arg.getValueType();
21575   SDLoc DL(Arg);
21576   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
21577 
21578   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
21579   // this entire sequence requires only one FP constant.
21580   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
21581   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
21582 
21583   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
21584   for (unsigned i = 0; i < Iterations; ++i) {
21585     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
21586     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
21587     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
21588     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
21589   }
21590 
21591   // If non-reciprocal square root is requested, multiply the result by Arg.
21592   if (!Reciprocal)
21593     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
21594 
21595   return Est;
21596 }
21597 
21598 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21599 /// For the reciprocal sqrt, we need to find the zero of the function:
21600 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
21601 ///     =>
21602 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
21603 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
21604                                          unsigned Iterations,
21605                                          SDNodeFlags Flags, bool Reciprocal) {
21606   EVT VT = Arg.getValueType();
21607   SDLoc DL(Arg);
21608   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
21609   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
21610 
21611   // This routine must enter the loop below to work correctly
21612   // when (Reciprocal == false).
21613   assert(Iterations > 0);
21614 
21615   // Newton iterations for reciprocal square root:
21616   // E = (E * -0.5) * ((A * E) * E + -3.0)
21617   for (unsigned i = 0; i < Iterations; ++i) {
21618     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
21619     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
21620     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
21621 
21622     // When calculating a square root at the last iteration build:
21623     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
21624     // (notice a common subexpression)
21625     SDValue LHS;
21626     if (Reciprocal || (i + 1) < Iterations) {
21627       // RSQRT: LHS = (E * -0.5)
21628       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
21629     } else {
21630       // SQRT: LHS = (A * E) * -0.5
21631       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
21632     }
21633 
21634     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
21635   }
21636 
21637   return Est;
21638 }
21639 
21640 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
21641 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
21642 /// Op can be zero.
21643 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
21644                                            bool Reciprocal) {
21645   if (LegalDAG)
21646     return SDValue();
21647 
21648   // TODO: Handle half and/or extended types?
21649   EVT VT = Op.getValueType();
21650   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
21651     return SDValue();
21652 
21653   // If estimates are explicitly disabled for this function, we're done.
21654   MachineFunction &MF = DAG.getMachineFunction();
21655   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
21656   if (Enabled == TLI.ReciprocalEstimate::Disabled)
21657     return SDValue();
21658 
21659   // Estimates may be explicitly enabled for this type with a custom number of
21660   // refinement steps.
21661   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
21662 
21663   bool UseOneConstNR = false;
21664   if (SDValue Est =
21665       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
21666                           Reciprocal)) {
21667     AddToWorklist(Est.getNode());
21668 
21669     if (Iterations) {
21670       Est = UseOneConstNR
21671             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
21672             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
21673 
21674       if (!Reciprocal) {
21675         // The estimate is now completely wrong if the input was exactly 0.0 or
21676         // possibly a denormal. Force the answer to 0.0 for those cases.
21677         SDLoc DL(Op);
21678         EVT CCVT = getSetCCResultType(VT);
21679         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
21680         DenormalMode DenormMode = DAG.getDenormalMode(VT);
21681         if (DenormMode.Input == DenormalMode::IEEE) {
21682           // This is specifically a check for the handling of denormal inputs,
21683           // not the result.
21684 
21685           // fabs(X) < SmallestNormal ? 0.0 : Est
21686           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
21687           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
21688           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
21689           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21690           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
21691           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
21692           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
21693         } else {
21694           // X == 0.0 ? 0.0 : Est
21695           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21696           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
21697           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
21698         }
21699       }
21700     }
21701     return Est;
21702   }
21703 
21704   return SDValue();
21705 }
21706 
21707 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21708   return buildSqrtEstimateImpl(Op, Flags, true);
21709 }
21710 
21711 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21712   return buildSqrtEstimateImpl(Op, Flags, false);
21713 }
21714 
21715 /// Return true if there is any possibility that the two addresses overlap.
21716 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
21717 
21718   struct MemUseCharacteristics {
21719     bool IsVolatile;
21720     bool IsAtomic;
21721     SDValue BasePtr;
21722     int64_t Offset;
21723     Optional<int64_t> NumBytes;
21724     MachineMemOperand *MMO;
21725   };
21726 
21727   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
21728     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
21729       int64_t Offset = 0;
21730       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
21731         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
21732                      ? C->getSExtValue()
21733                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
21734                            ? -1 * C->getSExtValue()
21735                            : 0;
21736       uint64_t Size =
21737           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
21738       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
21739               Offset /*base offset*/,
21740               Optional<int64_t>(Size),
21741               LSN->getMemOperand()};
21742     }
21743     if (const auto *LN = cast<LifetimeSDNode>(N))
21744       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
21745               (LN->hasOffset()) ? LN->getOffset() : 0,
21746               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
21747                                 : Optional<int64_t>(),
21748               (MachineMemOperand *)nullptr};
21749     // Default.
21750     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
21751             (int64_t)0 /*offset*/,
21752             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
21753   };
21754 
21755   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
21756                         MUC1 = getCharacteristics(Op1);
21757 
21758   // If they are to the same address, then they must be aliases.
21759   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
21760       MUC0.Offset == MUC1.Offset)
21761     return true;
21762 
21763   // If they are both volatile then they cannot be reordered.
21764   if (MUC0.IsVolatile && MUC1.IsVolatile)
21765     return true;
21766 
21767   // Be conservative about atomics for the moment
21768   // TODO: This is way overconservative for unordered atomics (see D66309)
21769   if (MUC0.IsAtomic && MUC1.IsAtomic)
21770     return true;
21771 
21772   if (MUC0.MMO && MUC1.MMO) {
21773     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21774         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21775       return false;
21776   }
21777 
21778   // Try to prove that there is aliasing, or that there is no aliasing. Either
21779   // way, we can return now. If nothing can be proved, proceed with more tests.
21780   bool IsAlias;
21781   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
21782                                        DAG, IsAlias))
21783     return IsAlias;
21784 
21785   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
21786   // either are not known.
21787   if (!MUC0.MMO || !MUC1.MMO)
21788     return true;
21789 
21790   // If one operation reads from invariant memory, and the other may store, they
21791   // cannot alias. These should really be checking the equivalent of mayWrite,
21792   // but it only matters for memory nodes other than load /store.
21793   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21794       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21795     return false;
21796 
21797   // If we know required SrcValue1 and SrcValue2 have relatively large
21798   // alignment compared to the size and offset of the access, we may be able
21799   // to prove they do not alias. This check is conservative for now to catch
21800   // cases created by splitting vector types, it only works when the offsets are
21801   // multiples of the size of the data.
21802   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
21803   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
21804   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
21805   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
21806   auto &Size0 = MUC0.NumBytes;
21807   auto &Size1 = MUC1.NumBytes;
21808   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
21809       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
21810       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
21811       SrcValOffset1 % *Size1 == 0) {
21812     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
21813     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
21814 
21815     // There is no overlap between these relatively aligned accesses of
21816     // similar size. Return no alias.
21817     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
21818       return false;
21819   }
21820 
21821   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
21822                    ? CombinerGlobalAA
21823                    : DAG.getSubtarget().useAA();
21824 #ifndef NDEBUG
21825   if (CombinerAAOnlyFunc.getNumOccurrences() &&
21826       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
21827     UseAA = false;
21828 #endif
21829 
21830   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
21831       Size0.hasValue() && Size1.hasValue()) {
21832     // Use alias analysis information.
21833     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
21834     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
21835     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
21836     AliasResult AAResult = AA->alias(
21837         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
21838                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
21839         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
21840                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
21841     if (AAResult == NoAlias)
21842       return false;
21843   }
21844 
21845   // Otherwise we have to assume they alias.
21846   return true;
21847 }
21848 
21849 /// Walk up chain skipping non-aliasing memory nodes,
21850 /// looking for aliasing nodes and adding them to the Aliases vector.
21851 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
21852                                    SmallVectorImpl<SDValue> &Aliases) {
21853   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
21854   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
21855 
21856   // Get alias information for node.
21857   // TODO: relax aliasing for unordered atomics (see D66309)
21858   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
21859 
21860   // Starting off.
21861   Chains.push_back(OriginalChain);
21862   unsigned Depth = 0;
21863 
21864   // Attempt to improve chain by a single step
21865   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
21866     switch (C.getOpcode()) {
21867     case ISD::EntryToken:
21868       // No need to mark EntryToken.
21869       C = SDValue();
21870       return true;
21871     case ISD::LOAD:
21872     case ISD::STORE: {
21873       // Get alias information for C.
21874       // TODO: Relax aliasing for unordered atomics (see D66309)
21875       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
21876                       cast<LSBaseSDNode>(C.getNode())->isSimple();
21877       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
21878         // Look further up the chain.
21879         C = C.getOperand(0);
21880         return true;
21881       }
21882       // Alias, so stop here.
21883       return false;
21884     }
21885 
21886     case ISD::CopyFromReg:
21887       // Always forward past past CopyFromReg.
21888       C = C.getOperand(0);
21889       return true;
21890 
21891     case ISD::LIFETIME_START:
21892     case ISD::LIFETIME_END: {
21893       // We can forward past any lifetime start/end that can be proven not to
21894       // alias the memory access.
21895       if (!isAlias(N, C.getNode())) {
21896         // Look further up the chain.
21897         C = C.getOperand(0);
21898         return true;
21899       }
21900       return false;
21901     }
21902     default:
21903       return false;
21904     }
21905   };
21906 
21907   // Look at each chain and determine if it is an alias.  If so, add it to the
21908   // aliases list.  If not, then continue up the chain looking for the next
21909   // candidate.
21910   while (!Chains.empty()) {
21911     SDValue Chain = Chains.pop_back_val();
21912 
21913     // Don't bother if we've seen Chain before.
21914     if (!Visited.insert(Chain.getNode()).second)
21915       continue;
21916 
21917     // For TokenFactor nodes, look at each operand and only continue up the
21918     // chain until we reach the depth limit.
21919     //
21920     // FIXME: The depth check could be made to return the last non-aliasing
21921     // chain we found before we hit a tokenfactor rather than the original
21922     // chain.
21923     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
21924       Aliases.clear();
21925       Aliases.push_back(OriginalChain);
21926       return;
21927     }
21928 
21929     if (Chain.getOpcode() == ISD::TokenFactor) {
21930       // We have to check each of the operands of the token factor for "small"
21931       // token factors, so we queue them up.  Adding the operands to the queue
21932       // (stack) in reverse order maintains the original order and increases the
21933       // likelihood that getNode will find a matching token factor (CSE.)
21934       if (Chain.getNumOperands() > 16) {
21935         Aliases.push_back(Chain);
21936         continue;
21937       }
21938       for (unsigned n = Chain.getNumOperands(); n;)
21939         Chains.push_back(Chain.getOperand(--n));
21940       ++Depth;
21941       continue;
21942     }
21943     // Everything else
21944     if (ImproveChain(Chain)) {
21945       // Updated Chain Found, Consider new chain if one exists.
21946       if (Chain.getNode())
21947         Chains.push_back(Chain);
21948       ++Depth;
21949       continue;
21950     }
21951     // No Improved Chain Possible, treat as Alias.
21952     Aliases.push_back(Chain);
21953   }
21954 }
21955 
21956 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
21957 /// (aliasing node.)
21958 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
21959   if (OptLevel == CodeGenOpt::None)
21960     return OldChain;
21961 
21962   // Ops for replacing token factor.
21963   SmallVector<SDValue, 8> Aliases;
21964 
21965   // Accumulate all the aliases to this node.
21966   GatherAllAliases(N, OldChain, Aliases);
21967 
21968   // If no operands then chain to entry token.
21969   if (Aliases.size() == 0)
21970     return DAG.getEntryNode();
21971 
21972   // If a single operand then chain to it.  We don't need to revisit it.
21973   if (Aliases.size() == 1)
21974     return Aliases[0];
21975 
21976   // Construct a custom tailored token factor.
21977   return DAG.getTokenFactor(SDLoc(N), Aliases);
21978 }
21979 
21980 namespace {
21981 // TODO: Replace with with std::monostate when we move to C++17.
21982 struct UnitT { } Unit;
21983 bool operator==(const UnitT &, const UnitT &) { return true; }
21984 bool operator!=(const UnitT &, const UnitT &) { return false; }
21985 } // namespace
21986 
21987 // This function tries to collect a bunch of potentially interesting
21988 // nodes to improve the chains of, all at once. This might seem
21989 // redundant, as this function gets called when visiting every store
21990 // node, so why not let the work be done on each store as it's visited?
21991 //
21992 // I believe this is mainly important because mergeConsecutiveStores
21993 // is unable to deal with merging stores of different sizes, so unless
21994 // we improve the chains of all the potential candidates up-front
21995 // before running mergeConsecutiveStores, it might only see some of
21996 // the nodes that will eventually be candidates, and then not be able
21997 // to go from a partially-merged state to the desired final
21998 // fully-merged state.
21999 
22000 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
22001   SmallVector<StoreSDNode *, 8> ChainedStores;
22002   StoreSDNode *STChain = St;
22003   // Intervals records which offsets from BaseIndex have been covered. In
22004   // the common case, every store writes to the immediately previous address
22005   // space and thus merged with the previous interval at insertion time.
22006 
22007   using IMap =
22008       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
22009   IMap::Allocator A;
22010   IMap Intervals(A);
22011 
22012   // This holds the base pointer, index, and the offset in bytes from the base
22013   // pointer.
22014   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22015 
22016   // We must have a base and an offset.
22017   if (!BasePtr.getBase().getNode())
22018     return false;
22019 
22020   // Do not handle stores to undef base pointers.
22021   if (BasePtr.getBase().isUndef())
22022     return false;
22023 
22024   // BaseIndexOffset assumes that offsets are fixed-size, which
22025   // is not valid for scalable vectors where the offsets are
22026   // scaled by `vscale`, so bail out early.
22027   if (St->getMemoryVT().isScalableVector())
22028     return false;
22029 
22030   // Add ST's interval.
22031   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
22032 
22033   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
22034     // If the chain has more than one use, then we can't reorder the mem ops.
22035     if (!SDValue(Chain, 0)->hasOneUse())
22036       break;
22037     // TODO: Relax for unordered atomics (see D66309)
22038     if (!Chain->isSimple() || Chain->isIndexed())
22039       break;
22040 
22041     // Find the base pointer and offset for this memory node.
22042     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
22043     // Check that the base pointer is the same as the original one.
22044     int64_t Offset;
22045     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
22046       break;
22047     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
22048     // Make sure we don't overlap with other intervals by checking the ones to
22049     // the left or right before inserting.
22050     auto I = Intervals.find(Offset);
22051     // If there's a next interval, we should end before it.
22052     if (I != Intervals.end() && I.start() < (Offset + Length))
22053       break;
22054     // If there's a previous interval, we should start after it.
22055     if (I != Intervals.begin() && (--I).stop() <= Offset)
22056       break;
22057     Intervals.insert(Offset, Offset + Length, Unit);
22058 
22059     ChainedStores.push_back(Chain);
22060     STChain = Chain;
22061   }
22062 
22063   // If we didn't find a chained store, exit.
22064   if (ChainedStores.size() == 0)
22065     return false;
22066 
22067   // Improve all chained stores (St and ChainedStores members) starting from
22068   // where the store chain ended and return single TokenFactor.
22069   SDValue NewChain = STChain->getChain();
22070   SmallVector<SDValue, 8> TFOps;
22071   for (unsigned I = ChainedStores.size(); I;) {
22072     StoreSDNode *S = ChainedStores[--I];
22073     SDValue BetterChain = FindBetterChain(S, NewChain);
22074     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
22075         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
22076     TFOps.push_back(SDValue(S, 0));
22077     ChainedStores[I] = S;
22078   }
22079 
22080   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
22081   SDValue BetterChain = FindBetterChain(St, NewChain);
22082   SDValue NewST;
22083   if (St->isTruncatingStore())
22084     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
22085                               St->getBasePtr(), St->getMemoryVT(),
22086                               St->getMemOperand());
22087   else
22088     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
22089                          St->getBasePtr(), St->getMemOperand());
22090 
22091   TFOps.push_back(NewST);
22092 
22093   // If we improved every element of TFOps, then we've lost the dependence on
22094   // NewChain to successors of St and we need to add it back to TFOps. Do so at
22095   // the beginning to keep relative order consistent with FindBetterChains.
22096   auto hasImprovedChain = [&](SDValue ST) -> bool {
22097     return ST->getOperand(0) != NewChain;
22098   };
22099   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
22100   if (AddNewChain)
22101     TFOps.insert(TFOps.begin(), NewChain);
22102 
22103   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
22104   CombineTo(St, TF);
22105 
22106   // Add TF and its operands to the worklist.
22107   AddToWorklist(TF.getNode());
22108   for (const SDValue &Op : TF->ops())
22109     AddToWorklist(Op.getNode());
22110   AddToWorklist(STChain);
22111   return true;
22112 }
22113 
22114 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
22115   if (OptLevel == CodeGenOpt::None)
22116     return false;
22117 
22118   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22119 
22120   // We must have a base and an offset.
22121   if (!BasePtr.getBase().getNode())
22122     return false;
22123 
22124   // Do not handle stores to undef base pointers.
22125   if (BasePtr.getBase().isUndef())
22126     return false;
22127 
22128   // Directly improve a chain of disjoint stores starting at St.
22129   if (parallelizeChainedStores(St))
22130     return true;
22131 
22132   // Improve St's Chain..
22133   SDValue BetterChain = FindBetterChain(St, St->getChain());
22134   if (St->getChain() != BetterChain) {
22135     replaceStoreChain(St, BetterChain);
22136     return true;
22137   }
22138   return false;
22139 }
22140 
22141 /// This is the entry point for the file.
22142 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
22143                            CodeGenOpt::Level OptLevel) {
22144   /// This is the main entry point to this class.
22145   DAGCombiner(*this, AA, OptLevel).Run(Level);
22146 }
22147