xref: /llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (revision 9fb4bc5bf4ecdc8f53bd1b8eeea20390fb6e642e)
1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Analysis/VectorUtils.h"
16 #include "llvm/CodeGen/CallingConvLower.h"
17 #include "llvm/CodeGen/CodeGenCommonISel.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineJumpTableInfo.h"
21 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/TargetRegisterInfo.h"
25 #include "llvm/IR/DataLayout.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/Support/DivisionByConstantInfo.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/KnownBits.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include <cctype>
37 using namespace llvm;
38 
39 /// NOTE: The TargetMachine owns TLOF.
40 TargetLowering::TargetLowering(const TargetMachine &tm)
41     : TargetLoweringBase(tm) {}
42 
43 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44   return nullptr;
45 }
46 
47 bool TargetLowering::isPositionIndependent() const {
48   return getTargetMachine().isPositionIndependent();
49 }
50 
51 /// Check whether a given call node is in tail position within its function. If
52 /// so, it sets Chain to the input chain of the tail call.
53 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54                                           SDValue &Chain) const {
55   const Function &F = DAG.getMachineFunction().getFunction();
56 
57   // First, check if tail calls have been disabled in this function.
58   if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59     return false;
60 
61   // Conservatively require the attributes of the call to match those of
62   // the return. Ignore following attributes because they don't affect the
63   // call sequence.
64   AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65   for (const auto &Attr :
66        {Attribute::Alignment, Attribute::Dereferenceable,
67         Attribute::DereferenceableOrNull, Attribute::NoAlias,
68         Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
69     CallerAttrs.removeAttribute(Attr);
70 
71   if (CallerAttrs.hasAttributes())
72     return false;
73 
74   // It's not safe to eliminate the sign / zero extension of the return value.
75   if (CallerAttrs.contains(Attribute::ZExt) ||
76       CallerAttrs.contains(Attribute::SExt))
77     return false;
78 
79   // Check if the only use is a function return node.
80   return isUsedByReturnOnly(Node, Chain);
81 }
82 
83 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
84     const uint32_t *CallerPreservedMask,
85     const SmallVectorImpl<CCValAssign> &ArgLocs,
86     const SmallVectorImpl<SDValue> &OutVals) const {
87   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
88     const CCValAssign &ArgLoc = ArgLocs[I];
89     if (!ArgLoc.isRegLoc())
90       continue;
91     MCRegister Reg = ArgLoc.getLocReg();
92     // Only look at callee saved registers.
93     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94       continue;
95     // Check that we pass the value used for the caller.
96     // (We look for a CopyFromReg reading a virtual register that is used
97     //  for the function live-in value of register Reg)
98     SDValue Value = OutVals[I];
99     if (Value->getOpcode() == ISD::AssertZext)
100       Value = Value.getOperand(0);
101     if (Value->getOpcode() != ISD::CopyFromReg)
102       return false;
103     Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105       return false;
106   }
107   return true;
108 }
109 
110 /// Set CallLoweringInfo attribute flags based on a call instruction
111 /// and called function attributes.
112 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
113                                                      unsigned ArgIdx) {
114   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116   IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
117   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
118   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
119   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
120   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
121   IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
122   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
123   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
124   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
125   IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
126   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
127   Alignment = Call->getParamStackAlign(ArgIdx);
128   IndirectType = nullptr;
129   assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
130          "multiple ABI attributes?");
131   if (IsByVal) {
132     IndirectType = Call->getParamByValType(ArgIdx);
133     if (!Alignment)
134       Alignment = Call->getParamAlign(ArgIdx);
135   }
136   if (IsPreallocated)
137     IndirectType = Call->getParamPreallocatedType(ArgIdx);
138   if (IsInAlloca)
139     IndirectType = Call->getParamInAllocaType(ArgIdx);
140   if (IsSRet)
141     IndirectType = Call->getParamStructRetType(ArgIdx);
142 }
143 
144 /// Generate a libcall taking the given operands as arguments and returning a
145 /// result of type RetVT.
146 std::pair<SDValue, SDValue>
147 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
148                             ArrayRef<SDValue> Ops,
149                             MakeLibCallOptions CallOptions,
150                             const SDLoc &dl,
151                             SDValue InChain) const {
152   if (!InChain)
153     InChain = DAG.getEntryNode();
154 
155   TargetLowering::ArgListTy Args;
156   Args.reserve(Ops.size());
157 
158   TargetLowering::ArgListEntry Entry;
159   for (unsigned i = 0; i < Ops.size(); ++i) {
160     SDValue NewOp = Ops[i];
161     Entry.Node = NewOp;
162     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
163     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
164                                                  CallOptions.IsSExt);
165     Entry.IsZExt = !Entry.IsSExt;
166 
167     if (CallOptions.IsSoften &&
168         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
169       Entry.IsSExt = Entry.IsZExt = false;
170     }
171     Args.push_back(Entry);
172   }
173 
174   if (LC == RTLIB::UNKNOWN_LIBCALL)
175     report_fatal_error("Unsupported library call operation!");
176   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
177                                          getPointerTy(DAG.getDataLayout()));
178 
179   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180   TargetLowering::CallLoweringInfo CLI(DAG);
181   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
182   bool zeroExtend = !signExtend;
183 
184   if (CallOptions.IsSoften &&
185       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
186     signExtend = zeroExtend = false;
187   }
188 
189   CLI.setDebugLoc(dl)
190       .setChain(InChain)
191       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
192       .setNoReturn(CallOptions.DoesNotReturn)
193       .setDiscardResult(!CallOptions.IsReturnValueUsed)
194       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
195       .setSExtResult(signExtend)
196       .setZExtResult(zeroExtend);
197   return LowerCallTo(CLI);
198 }
199 
200 bool TargetLowering::findOptimalMemOpLowering(
201     std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
202     unsigned SrcAS, const AttributeList &FuncAttributes) const {
203   if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
204       Op.getSrcAlign() < Op.getDstAlign())
205     return false;
206 
207   EVT VT = getOptimalMemOpType(Op, FuncAttributes);
208 
209   if (VT == MVT::Other) {
210     // Use the largest integer type whose alignment constraints are satisfied.
211     // We only need to check DstAlign here as SrcAlign is always greater or
212     // equal to DstAlign (or zero).
213     VT = MVT::LAST_INTEGER_VALUETYPE;
214     if (Op.isFixedDstAlign())
215       while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
216              !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
217         VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
218     assert(VT.isInteger());
219 
220     // Find the largest legal integer type.
221     MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
222     while (!isTypeLegal(LVT))
223       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
224     assert(LVT.isInteger());
225 
226     // If the type we've chosen is larger than the largest legal integer type
227     // then use that instead.
228     if (VT.bitsGT(LVT))
229       VT = LVT;
230   }
231 
232   unsigned NumMemOps = 0;
233   uint64_t Size = Op.size();
234   while (Size) {
235     unsigned VTSize = VT.getSizeInBits() / 8;
236     while (VTSize > Size) {
237       // For now, only use non-vector load / store's for the left-over pieces.
238       EVT NewVT = VT;
239       unsigned NewVTSize;
240 
241       bool Found = false;
242       if (VT.isVector() || VT.isFloatingPoint()) {
243         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
244         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
245             isSafeMemOpType(NewVT.getSimpleVT()))
246           Found = true;
247         else if (NewVT == MVT::i64 &&
248                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
249                  isSafeMemOpType(MVT::f64)) {
250           // i64 is usually not legal on 32-bit targets, but f64 may be.
251           NewVT = MVT::f64;
252           Found = true;
253         }
254       }
255 
256       if (!Found) {
257         do {
258           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
259           if (NewVT == MVT::i8)
260             break;
261         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
262       }
263       NewVTSize = NewVT.getSizeInBits() / 8;
264 
265       // If the new VT cannot cover all of the remaining bits, then consider
266       // issuing a (or a pair of) unaligned and overlapping load / store.
267       unsigned Fast;
268       if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
269           allowsMisalignedMemoryAccesses(
270               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
271               MachineMemOperand::MONone, &Fast) &&
272           Fast)
273         VTSize = Size;
274       else {
275         VT = NewVT;
276         VTSize = NewVTSize;
277       }
278     }
279 
280     if (++NumMemOps > Limit)
281       return false;
282 
283     MemOps.push_back(VT);
284     Size -= VTSize;
285   }
286 
287   return true;
288 }
289 
290 /// Soften the operands of a comparison. This code is shared among BR_CC,
291 /// SELECT_CC, and SETCC handlers.
292 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
293                                          SDValue &NewLHS, SDValue &NewRHS,
294                                          ISD::CondCode &CCCode,
295                                          const SDLoc &dl, const SDValue OldLHS,
296                                          const SDValue OldRHS) const {
297   SDValue Chain;
298   return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
299                              OldRHS, Chain);
300 }
301 
302 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
303                                          SDValue &NewLHS, SDValue &NewRHS,
304                                          ISD::CondCode &CCCode,
305                                          const SDLoc &dl, const SDValue OldLHS,
306                                          const SDValue OldRHS,
307                                          SDValue &Chain,
308                                          bool IsSignaling) const {
309   // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
310   // not supporting it. We can update this code when libgcc provides such
311   // functions.
312 
313   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
314          && "Unsupported setcc type!");
315 
316   // Expand into one or more soft-fp libcall(s).
317   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
318   bool ShouldInvertCC = false;
319   switch (CCCode) {
320   case ISD::SETEQ:
321   case ISD::SETOEQ:
322     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
323           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
324           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
325     break;
326   case ISD::SETNE:
327   case ISD::SETUNE:
328     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
329           (VT == MVT::f64) ? RTLIB::UNE_F64 :
330           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
331     break;
332   case ISD::SETGE:
333   case ISD::SETOGE:
334     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
335           (VT == MVT::f64) ? RTLIB::OGE_F64 :
336           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
337     break;
338   case ISD::SETLT:
339   case ISD::SETOLT:
340     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
341           (VT == MVT::f64) ? RTLIB::OLT_F64 :
342           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
343     break;
344   case ISD::SETLE:
345   case ISD::SETOLE:
346     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
347           (VT == MVT::f64) ? RTLIB::OLE_F64 :
348           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
349     break;
350   case ISD::SETGT:
351   case ISD::SETOGT:
352     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
353           (VT == MVT::f64) ? RTLIB::OGT_F64 :
354           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
355     break;
356   case ISD::SETO:
357     ShouldInvertCC = true;
358     [[fallthrough]];
359   case ISD::SETUO:
360     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
361           (VT == MVT::f64) ? RTLIB::UO_F64 :
362           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
363     break;
364   case ISD::SETONE:
365     // SETONE = O && UNE
366     ShouldInvertCC = true;
367     [[fallthrough]];
368   case ISD::SETUEQ:
369     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
370           (VT == MVT::f64) ? RTLIB::UO_F64 :
371           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
372     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
373           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
374           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
375     break;
376   default:
377     // Invert CC for unordered comparisons
378     ShouldInvertCC = true;
379     switch (CCCode) {
380     case ISD::SETULT:
381       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
382             (VT == MVT::f64) ? RTLIB::OGE_F64 :
383             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
384       break;
385     case ISD::SETULE:
386       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
387             (VT == MVT::f64) ? RTLIB::OGT_F64 :
388             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
389       break;
390     case ISD::SETUGT:
391       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
392             (VT == MVT::f64) ? RTLIB::OLE_F64 :
393             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
394       break;
395     case ISD::SETUGE:
396       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
397             (VT == MVT::f64) ? RTLIB::OLT_F64 :
398             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
399       break;
400     default: llvm_unreachable("Do not know how to soften this setcc!");
401     }
402   }
403 
404   // Use the target specific return value for comparison lib calls.
405   EVT RetVT = getCmpLibcallReturnType();
406   SDValue Ops[2] = {NewLHS, NewRHS};
407   TargetLowering::MakeLibCallOptions CallOptions;
408   EVT OpsVT[2] = { OldLHS.getValueType(),
409                    OldRHS.getValueType() };
410   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
411   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
412   NewLHS = Call.first;
413   NewRHS = DAG.getConstant(0, dl, RetVT);
414 
415   CCCode = getCmpLibcallCC(LC1);
416   if (ShouldInvertCC) {
417     assert(RetVT.isInteger());
418     CCCode = getSetCCInverse(CCCode, RetVT);
419   }
420 
421   if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
422     // Update Chain.
423     Chain = Call.second;
424   } else {
425     EVT SetCCVT =
426         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
427     SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
428     auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
429     CCCode = getCmpLibcallCC(LC2);
430     if (ShouldInvertCC)
431       CCCode = getSetCCInverse(CCCode, RetVT);
432     NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
433     if (Chain)
434       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
435                           Call2.second);
436     NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
437                          Tmp.getValueType(), Tmp, NewLHS);
438     NewRHS = SDValue();
439   }
440 }
441 
442 /// Return the entry encoding for a jump table in the current function. The
443 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444 unsigned TargetLowering::getJumpTableEncoding() const {
445   // In non-pic modes, just use the address of a block.
446   if (!isPositionIndependent())
447     return MachineJumpTableInfo::EK_BlockAddress;
448 
449   // In PIC mode, if the target supports a GPRel32 directive, use it.
450   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
451     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
452 
453   // Otherwise, use a label difference.
454   return MachineJumpTableInfo::EK_LabelDifference32;
455 }
456 
457 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
458                                                  SelectionDAG &DAG) const {
459   // If our PIC model is GP relative, use the global offset table as the base.
460   unsigned JTEncoding = getJumpTableEncoding();
461 
462   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
463       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
464     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
465 
466   return Table;
467 }
468 
469 /// This returns the relocation base for the given PIC jumptable, the same as
470 /// getPICJumpTableRelocBase, but as an MCExpr.
471 const MCExpr *
472 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
473                                              unsigned JTI,MCContext &Ctx) const{
474   // The normal PIC reloc base is the label at the start of the jump table.
475   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
476 }
477 
478 SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
479                                                SDValue Addr, int JTI,
480                                                SelectionDAG &DAG) const {
481   SDValue Chain = Value;
482   // Jump table debug info is only needed if CodeView is enabled.
483   if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
484     Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
485   }
486   return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
487 }
488 
489 bool
490 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
491   const TargetMachine &TM = getTargetMachine();
492   const GlobalValue *GV = GA->getGlobal();
493 
494   // If the address is not even local to this DSO we will have to load it from
495   // a got and then add the offset.
496   if (!TM.shouldAssumeDSOLocal(GV))
497     return false;
498 
499   // If the code is position independent we will have to add a base register.
500   if (isPositionIndependent())
501     return false;
502 
503   // Otherwise we can do it.
504   return true;
505 }
506 
507 //===----------------------------------------------------------------------===//
508 //  Optimization Methods
509 //===----------------------------------------------------------------------===//
510 
511 /// If the specified instruction has a constant integer operand and there are
512 /// bits set in that constant that are not demanded, then clear those bits and
513 /// return true.
514 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
515                                             const APInt &DemandedBits,
516                                             const APInt &DemandedElts,
517                                             TargetLoweringOpt &TLO) const {
518   SDLoc DL(Op);
519   unsigned Opcode = Op.getOpcode();
520 
521   // Early-out if we've ended up calling an undemanded node, leave this to
522   // constant folding.
523   if (DemandedBits.isZero() || DemandedElts.isZero())
524     return false;
525 
526   // Do target-specific constant optimization.
527   if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
528     return TLO.New.getNode();
529 
530   // FIXME: ISD::SELECT, ISD::SELECT_CC
531   switch (Opcode) {
532   default:
533     break;
534   case ISD::XOR:
535   case ISD::AND:
536   case ISD::OR: {
537     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
538     if (!Op1C || Op1C->isOpaque())
539       return false;
540 
541     // If this is a 'not' op, don't touch it because that's a canonical form.
542     const APInt &C = Op1C->getAPIntValue();
543     if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
544       return false;
545 
546     if (!C.isSubsetOf(DemandedBits)) {
547       EVT VT = Op.getValueType();
548       SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
549       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
550                                       Op->getFlags());
551       return TLO.CombineTo(Op, NewOp);
552     }
553 
554     break;
555   }
556   }
557 
558   return false;
559 }
560 
561 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
562                                             const APInt &DemandedBits,
563                                             TargetLoweringOpt &TLO) const {
564   EVT VT = Op.getValueType();
565   APInt DemandedElts = VT.isVector()
566                            ? APInt::getAllOnes(VT.getVectorNumElements())
567                            : APInt(1, 1);
568   return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
569 }
570 
571 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
572 /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
573 /// but it could be generalized for targets with other types of implicit
574 /// widening casts.
575 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
576                                       const APInt &DemandedBits,
577                                       TargetLoweringOpt &TLO) const {
578   assert(Op.getNumOperands() == 2 &&
579          "ShrinkDemandedOp only supports binary operators!");
580   assert(Op.getNode()->getNumValues() == 1 &&
581          "ShrinkDemandedOp only supports nodes with one result!");
582 
583   EVT VT = Op.getValueType();
584   SelectionDAG &DAG = TLO.DAG;
585   SDLoc dl(Op);
586 
587   // Early return, as this function cannot handle vector types.
588   if (VT.isVector())
589     return false;
590 
591   assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
592          Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
593          "ShrinkDemandedOp only supports operands that have the same size!");
594 
595   // Don't do this if the node has another user, which may require the
596   // full value.
597   if (!Op.getNode()->hasOneUse())
598     return false;
599 
600   // Search for the smallest integer type with free casts to and from
601   // Op's type. For expedience, just check power-of-2 integer types.
602   unsigned DemandedSize = DemandedBits.getActiveBits();
603   for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
604        SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
605     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
606     if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
607       // We found a type with free casts.
608       SDValue X = DAG.getNode(
609           Op.getOpcode(), dl, SmallVT,
610           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
611           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
612       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
613       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
614       return TLO.CombineTo(Op, Z);
615     }
616   }
617   return false;
618 }
619 
620 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
621                                           DAGCombinerInfo &DCI) const {
622   SelectionDAG &DAG = DCI.DAG;
623   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
624                         !DCI.isBeforeLegalizeOps());
625   KnownBits Known;
626 
627   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
628   if (Simplified) {
629     DCI.AddToWorklist(Op.getNode());
630     DCI.CommitTargetLoweringOpt(TLO);
631   }
632   return Simplified;
633 }
634 
635 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
636                                           const APInt &DemandedElts,
637                                           DAGCombinerInfo &DCI) const {
638   SelectionDAG &DAG = DCI.DAG;
639   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
640                         !DCI.isBeforeLegalizeOps());
641   KnownBits Known;
642 
643   bool Simplified =
644       SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
645   if (Simplified) {
646     DCI.AddToWorklist(Op.getNode());
647     DCI.CommitTargetLoweringOpt(TLO);
648   }
649   return Simplified;
650 }
651 
652 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
653                                           KnownBits &Known,
654                                           TargetLoweringOpt &TLO,
655                                           unsigned Depth,
656                                           bool AssumeSingleUse) const {
657   EVT VT = Op.getValueType();
658 
659   // Since the number of lanes in a scalable vector is unknown at compile time,
660   // we track one bit which is implicitly broadcast to all lanes.  This means
661   // that all lanes in a scalable vector are considered demanded.
662   APInt DemandedElts = VT.isFixedLengthVector()
663                            ? APInt::getAllOnes(VT.getVectorNumElements())
664                            : APInt(1, 1);
665   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
666                               AssumeSingleUse);
667 }
668 
669 // TODO: Under what circumstances can we create nodes? Constant folding?
670 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
671     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
672     SelectionDAG &DAG, unsigned Depth) const {
673   EVT VT = Op.getValueType();
674 
675   // Limit search depth.
676   if (Depth >= SelectionDAG::MaxRecursionDepth)
677     return SDValue();
678 
679   // Ignore UNDEFs.
680   if (Op.isUndef())
681     return SDValue();
682 
683   // Not demanding any bits/elts from Op.
684   if (DemandedBits == 0 || DemandedElts == 0)
685     return DAG.getUNDEF(VT);
686 
687   bool IsLE = DAG.getDataLayout().isLittleEndian();
688   unsigned NumElts = DemandedElts.getBitWidth();
689   unsigned BitWidth = DemandedBits.getBitWidth();
690   KnownBits LHSKnown, RHSKnown;
691   switch (Op.getOpcode()) {
692   case ISD::BITCAST: {
693     if (VT.isScalableVector())
694       return SDValue();
695 
696     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
697     EVT SrcVT = Src.getValueType();
698     EVT DstVT = Op.getValueType();
699     if (SrcVT == DstVT)
700       return Src;
701 
702     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
703     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
704     if (NumSrcEltBits == NumDstEltBits)
705       if (SDValue V = SimplifyMultipleUseDemandedBits(
706               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
707         return DAG.getBitcast(DstVT, V);
708 
709     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
710       unsigned Scale = NumDstEltBits / NumSrcEltBits;
711       unsigned NumSrcElts = SrcVT.getVectorNumElements();
712       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
713       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
714       for (unsigned i = 0; i != Scale; ++i) {
715         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
716         unsigned BitOffset = EltOffset * NumSrcEltBits;
717         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
718         if (!Sub.isZero()) {
719           DemandedSrcBits |= Sub;
720           for (unsigned j = 0; j != NumElts; ++j)
721             if (DemandedElts[j])
722               DemandedSrcElts.setBit((j * Scale) + i);
723         }
724       }
725 
726       if (SDValue V = SimplifyMultipleUseDemandedBits(
727               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
728         return DAG.getBitcast(DstVT, V);
729     }
730 
731     // TODO - bigendian once we have test coverage.
732     if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
733       unsigned Scale = NumSrcEltBits / NumDstEltBits;
734       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
735       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
736       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
737       for (unsigned i = 0; i != NumElts; ++i)
738         if (DemandedElts[i]) {
739           unsigned Offset = (i % Scale) * NumDstEltBits;
740           DemandedSrcBits.insertBits(DemandedBits, Offset);
741           DemandedSrcElts.setBit(i / Scale);
742         }
743 
744       if (SDValue V = SimplifyMultipleUseDemandedBits(
745               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
746         return DAG.getBitcast(DstVT, V);
747     }
748 
749     break;
750   }
751   case ISD::FREEZE: {
752     SDValue N0 = Op.getOperand(0);
753     if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
754                                              /*PoisonOnly=*/false))
755       return N0;
756     break;
757   }
758   case ISD::AND: {
759     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
760     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
761 
762     // If all of the demanded bits are known 1 on one side, return the other.
763     // These bits cannot contribute to the result of the 'and' in this
764     // context.
765     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
766       return Op.getOperand(0);
767     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
768       return Op.getOperand(1);
769     break;
770   }
771   case ISD::OR: {
772     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
773     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
774 
775     // If all of the demanded bits are known zero on one side, return the
776     // other.  These bits cannot contribute to the result of the 'or' in this
777     // context.
778     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
779       return Op.getOperand(0);
780     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
781       return Op.getOperand(1);
782     break;
783   }
784   case ISD::XOR: {
785     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
786     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
787 
788     // If all of the demanded bits are known zero on one side, return the
789     // other.
790     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
791       return Op.getOperand(0);
792     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
793       return Op.getOperand(1);
794     break;
795   }
796   case ISD::SHL: {
797     // If we are only demanding sign bits then we can use the shift source
798     // directly.
799     if (std::optional<uint64_t> MaxSA =
800             DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
801       SDValue Op0 = Op.getOperand(0);
802       unsigned ShAmt = *MaxSA;
803       unsigned NumSignBits =
804           DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
805       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
806       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
807         return Op0;
808     }
809     break;
810   }
811   case ISD::SRL: {
812     // If we are only demanding sign bits then we can use the shift source
813     // directly.
814     if (std::optional<uint64_t> MaxSA =
815             DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
816       SDValue Op0 = Op.getOperand(0);
817       unsigned ShAmt = *MaxSA;
818       // Must already be signbits in DemandedBits bounds, and can't demand any
819       // shifted in zeroes.
820       if (DemandedBits.countl_zero() >= ShAmt) {
821         unsigned NumSignBits =
822             DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
823         if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
824           return Op0;
825       }
826     }
827     break;
828   }
829   case ISD::SETCC: {
830     SDValue Op0 = Op.getOperand(0);
831     SDValue Op1 = Op.getOperand(1);
832     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
833     // If (1) we only need the sign-bit, (2) the setcc operands are the same
834     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
835     // -1, we may be able to bypass the setcc.
836     if (DemandedBits.isSignMask() &&
837         Op0.getScalarValueSizeInBits() == BitWidth &&
838         getBooleanContents(Op0.getValueType()) ==
839             BooleanContent::ZeroOrNegativeOneBooleanContent) {
840       // If we're testing X < 0, then this compare isn't needed - just use X!
841       // FIXME: We're limiting to integer types here, but this should also work
842       // if we don't care about FP signed-zero. The use of SETLT with FP means
843       // that we don't care about NaNs.
844       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
845           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
846         return Op0;
847     }
848     break;
849   }
850   case ISD::SIGN_EXTEND_INREG: {
851     // If none of the extended bits are demanded, eliminate the sextinreg.
852     SDValue Op0 = Op.getOperand(0);
853     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
854     unsigned ExBits = ExVT.getScalarSizeInBits();
855     if (DemandedBits.getActiveBits() <= ExBits &&
856         shouldRemoveRedundantExtend(Op))
857       return Op0;
858     // If the input is already sign extended, just drop the extension.
859     unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
860     if (NumSignBits >= (BitWidth - ExBits + 1))
861       return Op0;
862     break;
863   }
864   case ISD::ANY_EXTEND_VECTOR_INREG:
865   case ISD::SIGN_EXTEND_VECTOR_INREG:
866   case ISD::ZERO_EXTEND_VECTOR_INREG: {
867     if (VT.isScalableVector())
868       return SDValue();
869 
870     // If we only want the lowest element and none of extended bits, then we can
871     // return the bitcasted source vector.
872     SDValue Src = Op.getOperand(0);
873     EVT SrcVT = Src.getValueType();
874     EVT DstVT = Op.getValueType();
875     if (IsLE && DemandedElts == 1 &&
876         DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
877         DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
878       return DAG.getBitcast(DstVT, Src);
879     }
880     break;
881   }
882   case ISD::INSERT_VECTOR_ELT: {
883     if (VT.isScalableVector())
884       return SDValue();
885 
886     // If we don't demand the inserted element, return the base vector.
887     SDValue Vec = Op.getOperand(0);
888     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
889     EVT VecVT = Vec.getValueType();
890     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
891         !DemandedElts[CIdx->getZExtValue()])
892       return Vec;
893     break;
894   }
895   case ISD::INSERT_SUBVECTOR: {
896     if (VT.isScalableVector())
897       return SDValue();
898 
899     SDValue Vec = Op.getOperand(0);
900     SDValue Sub = Op.getOperand(1);
901     uint64_t Idx = Op.getConstantOperandVal(2);
902     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
903     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
904     // If we don't demand the inserted subvector, return the base vector.
905     if (DemandedSubElts == 0)
906       return Vec;
907     break;
908   }
909   case ISD::VECTOR_SHUFFLE: {
910     assert(!VT.isScalableVector());
911     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
912 
913     // If all the demanded elts are from one operand and are inline,
914     // then we can use the operand directly.
915     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
916     for (unsigned i = 0; i != NumElts; ++i) {
917       int M = ShuffleMask[i];
918       if (M < 0 || !DemandedElts[i])
919         continue;
920       AllUndef = false;
921       IdentityLHS &= (M == (int)i);
922       IdentityRHS &= ((M - NumElts) == i);
923     }
924 
925     if (AllUndef)
926       return DAG.getUNDEF(Op.getValueType());
927     if (IdentityLHS)
928       return Op.getOperand(0);
929     if (IdentityRHS)
930       return Op.getOperand(1);
931     break;
932   }
933   default:
934     // TODO: Probably okay to remove after audit; here to reduce change size
935     // in initial enablement patch for scalable vectors
936     if (VT.isScalableVector())
937       return SDValue();
938 
939     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
940       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
941               Op, DemandedBits, DemandedElts, DAG, Depth))
942         return V;
943     break;
944   }
945   return SDValue();
946 }
947 
948 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
949     SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
950     unsigned Depth) const {
951   EVT VT = Op.getValueType();
952   // Since the number of lanes in a scalable vector is unknown at compile time,
953   // we track one bit which is implicitly broadcast to all lanes.  This means
954   // that all lanes in a scalable vector are considered demanded.
955   APInt DemandedElts = VT.isFixedLengthVector()
956                            ? APInt::getAllOnes(VT.getVectorNumElements())
957                            : APInt(1, 1);
958   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
959                                          Depth);
960 }
961 
962 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
963     SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
964     unsigned Depth) const {
965   APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
966   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
967                                          Depth);
968 }
969 
970 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
971 //      or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
972 static SDValue combineShiftToAVG(SDValue Op,
973                                  TargetLowering::TargetLoweringOpt &TLO,
974                                  const TargetLowering &TLI,
975                                  const APInt &DemandedBits,
976                                  const APInt &DemandedElts, unsigned Depth) {
977   assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
978          "SRL or SRA node is required here!");
979   // Is the right shift using an immediate value of 1?
980   ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
981   if (!N1C || !N1C->isOne())
982     return SDValue();
983 
984   // We are looking for an avgfloor
985   // add(ext, ext)
986   // or one of these as a avgceil
987   // add(add(ext, ext), 1)
988   // add(add(ext, 1), ext)
989   // add(ext, add(ext, 1))
990   SDValue Add = Op.getOperand(0);
991   if (Add.getOpcode() != ISD::ADD)
992     return SDValue();
993 
994   SDValue ExtOpA = Add.getOperand(0);
995   SDValue ExtOpB = Add.getOperand(1);
996   SDValue Add2;
997   auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
998     ConstantSDNode *ConstOp;
999     if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1000         ConstOp->isOne()) {
1001       ExtOpA = Op1;
1002       ExtOpB = Op3;
1003       Add2 = A;
1004       return true;
1005     }
1006     if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1007         ConstOp->isOne()) {
1008       ExtOpA = Op1;
1009       ExtOpB = Op2;
1010       Add2 = A;
1011       return true;
1012     }
1013     return false;
1014   };
1015   bool IsCeil =
1016       (ExtOpA.getOpcode() == ISD::ADD &&
1017        MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1018       (ExtOpB.getOpcode() == ISD::ADD &&
1019        MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1020 
1021   // If the shift is signed (sra):
1022   //  - Needs >= 2 sign bit for both operands.
1023   //  - Needs >= 2 zero bits.
1024   // If the shift is unsigned (srl):
1025   //  - Needs >= 1 zero bit for both operands.
1026   //  - Needs 1 demanded bit zero and >= 2 sign bits.
1027   SelectionDAG &DAG = TLO.DAG;
1028   unsigned ShiftOpc = Op.getOpcode();
1029   bool IsSigned = false;
1030   unsigned KnownBits;
1031   unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1032   unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1033   unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1034   unsigned NumZeroA =
1035       DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1036   unsigned NumZeroB =
1037       DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1038   unsigned NumZero = std::min(NumZeroA, NumZeroB);
1039 
1040   switch (ShiftOpc) {
1041   default:
1042     llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1043   case ISD::SRA: {
1044     if (NumZero >= 2 && NumSigned < NumZero) {
1045       IsSigned = false;
1046       KnownBits = NumZero;
1047       break;
1048     }
1049     if (NumSigned >= 1) {
1050       IsSigned = true;
1051       KnownBits = NumSigned;
1052       break;
1053     }
1054     return SDValue();
1055   }
1056   case ISD::SRL: {
1057     if (NumZero >= 1 && NumSigned < NumZero) {
1058       IsSigned = false;
1059       KnownBits = NumZero;
1060       break;
1061     }
1062     if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1063       IsSigned = true;
1064       KnownBits = NumSigned;
1065       break;
1066     }
1067     return SDValue();
1068   }
1069   }
1070 
1071   unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1072                            : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1073 
1074   // Find the smallest power-2 type that is legal for this vector size and
1075   // operation, given the original type size and the number of known sign/zero
1076   // bits.
1077   EVT VT = Op.getValueType();
1078   unsigned MinWidth =
1079       std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1080   EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1081   if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1082     return SDValue();
1083   if (VT.isVector())
1084     NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1085   if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1086     // If we could not transform, and (both) adds are nuw/nsw, we can use the
1087     // larger type size to do the transform.
1088     if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1089       return SDValue();
1090     if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1091                                Add.getOperand(1)) &&
1092         (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1093                                          Add2.getOperand(1))))
1094       NVT = VT;
1095     else
1096       return SDValue();
1097   }
1098 
1099   // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1100   // this is likely to stop other folds (reassociation, value tracking etc.)
1101   if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1102       (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1103     return SDValue();
1104 
1105   SDLoc DL(Op);
1106   SDValue ResultAVG =
1107       DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1108                   DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1109   return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1110 }
1111 
1112 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1113 /// result of Op are ever used downstream. If we can use this information to
1114 /// simplify Op, create a new simplified DAG node and return true, returning the
1115 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1116 /// return a mask of Known bits for the expression (used to simplify the
1117 /// caller).  The Known bits may only be accurate for those bits in the
1118 /// OriginalDemandedBits and OriginalDemandedElts.
1119 bool TargetLowering::SimplifyDemandedBits(
1120     SDValue Op, const APInt &OriginalDemandedBits,
1121     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1122     unsigned Depth, bool AssumeSingleUse) const {
1123   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1124   assert(Op.getScalarValueSizeInBits() == BitWidth &&
1125          "Mask size mismatches value type size!");
1126 
1127   // Don't know anything.
1128   Known = KnownBits(BitWidth);
1129 
1130   EVT VT = Op.getValueType();
1131   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1132   unsigned NumElts = OriginalDemandedElts.getBitWidth();
1133   assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1134          "Unexpected vector size");
1135 
1136   APInt DemandedBits = OriginalDemandedBits;
1137   APInt DemandedElts = OriginalDemandedElts;
1138   SDLoc dl(Op);
1139 
1140   // Undef operand.
1141   if (Op.isUndef())
1142     return false;
1143 
1144   // We can't simplify target constants.
1145   if (Op.getOpcode() == ISD::TargetConstant)
1146     return false;
1147 
1148   if (Op.getOpcode() == ISD::Constant) {
1149     // We know all of the bits for a constant!
1150     Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1151     return false;
1152   }
1153 
1154   if (Op.getOpcode() == ISD::ConstantFP) {
1155     // We know all of the bits for a floating point constant!
1156     Known = KnownBits::makeConstant(
1157         cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1158     return false;
1159   }
1160 
1161   // Other users may use these bits.
1162   bool HasMultiUse = false;
1163   if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1164     if (Depth >= SelectionDAG::MaxRecursionDepth) {
1165       // Limit search depth.
1166       return false;
1167     }
1168     // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1169     DemandedBits = APInt::getAllOnes(BitWidth);
1170     DemandedElts = APInt::getAllOnes(NumElts);
1171     HasMultiUse = true;
1172   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1173     // Not demanding any bits/elts from Op.
1174     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1175   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1176     // Limit search depth.
1177     return false;
1178   }
1179 
1180   KnownBits Known2;
1181   switch (Op.getOpcode()) {
1182   case ISD::SCALAR_TO_VECTOR: {
1183     if (VT.isScalableVector())
1184       return false;
1185     if (!DemandedElts[0])
1186       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1187 
1188     KnownBits SrcKnown;
1189     SDValue Src = Op.getOperand(0);
1190     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1191     APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1192     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1193       return true;
1194 
1195     // Upper elements are undef, so only get the knownbits if we just demand
1196     // the bottom element.
1197     if (DemandedElts == 1)
1198       Known = SrcKnown.anyextOrTrunc(BitWidth);
1199     break;
1200   }
1201   case ISD::BUILD_VECTOR:
1202     // Collect the known bits that are shared by every demanded element.
1203     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1204     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1205     return false; // Don't fall through, will infinitely loop.
1206   case ISD::SPLAT_VECTOR: {
1207     SDValue Scl = Op.getOperand(0);
1208     APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1209     KnownBits KnownScl;
1210     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1211       return true;
1212 
1213     // Implicitly truncate the bits to match the official semantics of
1214     // SPLAT_VECTOR.
1215     Known = KnownScl.trunc(BitWidth);
1216     break;
1217   }
1218   case ISD::LOAD: {
1219     auto *LD = cast<LoadSDNode>(Op);
1220     if (getTargetConstantFromLoad(LD)) {
1221       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1222       return false; // Don't fall through, will infinitely loop.
1223     }
1224     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1225       // If this is a ZEXTLoad and we are looking at the loaded value.
1226       EVT MemVT = LD->getMemoryVT();
1227       unsigned MemBits = MemVT.getScalarSizeInBits();
1228       Known.Zero.setBitsFrom(MemBits);
1229       return false; // Don't fall through, will infinitely loop.
1230     }
1231     break;
1232   }
1233   case ISD::INSERT_VECTOR_ELT: {
1234     if (VT.isScalableVector())
1235       return false;
1236     SDValue Vec = Op.getOperand(0);
1237     SDValue Scl = Op.getOperand(1);
1238     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1239     EVT VecVT = Vec.getValueType();
1240 
1241     // If index isn't constant, assume we need all vector elements AND the
1242     // inserted element.
1243     APInt DemandedVecElts(DemandedElts);
1244     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1245       unsigned Idx = CIdx->getZExtValue();
1246       DemandedVecElts.clearBit(Idx);
1247 
1248       // Inserted element is not required.
1249       if (!DemandedElts[Idx])
1250         return TLO.CombineTo(Op, Vec);
1251     }
1252 
1253     KnownBits KnownScl;
1254     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1255     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1256     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1257       return true;
1258 
1259     Known = KnownScl.anyextOrTrunc(BitWidth);
1260 
1261     KnownBits KnownVec;
1262     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1263                              Depth + 1))
1264       return true;
1265 
1266     if (!!DemandedVecElts)
1267       Known = Known.intersectWith(KnownVec);
1268 
1269     return false;
1270   }
1271   case ISD::INSERT_SUBVECTOR: {
1272     if (VT.isScalableVector())
1273       return false;
1274     // Demand any elements from the subvector and the remainder from the src its
1275     // inserted into.
1276     SDValue Src = Op.getOperand(0);
1277     SDValue Sub = Op.getOperand(1);
1278     uint64_t Idx = Op.getConstantOperandVal(2);
1279     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1280     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1281     APInt DemandedSrcElts = DemandedElts;
1282     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1283 
1284     KnownBits KnownSub, KnownSrc;
1285     if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1286                              Depth + 1))
1287       return true;
1288     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1289                              Depth + 1))
1290       return true;
1291 
1292     Known.Zero.setAllBits();
1293     Known.One.setAllBits();
1294     if (!!DemandedSubElts)
1295       Known = Known.intersectWith(KnownSub);
1296     if (!!DemandedSrcElts)
1297       Known = Known.intersectWith(KnownSrc);
1298 
1299     // Attempt to avoid multi-use src if we don't need anything from it.
1300     if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1301         !DemandedSrcElts.isAllOnes()) {
1302       SDValue NewSub = SimplifyMultipleUseDemandedBits(
1303           Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1304       SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1305           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1306       if (NewSub || NewSrc) {
1307         NewSub = NewSub ? NewSub : Sub;
1308         NewSrc = NewSrc ? NewSrc : Src;
1309         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1310                                         Op.getOperand(2));
1311         return TLO.CombineTo(Op, NewOp);
1312       }
1313     }
1314     break;
1315   }
1316   case ISD::EXTRACT_SUBVECTOR: {
1317     if (VT.isScalableVector())
1318       return false;
1319     // Offset the demanded elts by the subvector index.
1320     SDValue Src = Op.getOperand(0);
1321     if (Src.getValueType().isScalableVector())
1322       break;
1323     uint64_t Idx = Op.getConstantOperandVal(1);
1324     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1325     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1326 
1327     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1328                              Depth + 1))
1329       return true;
1330 
1331     // Attempt to avoid multi-use src if we don't need anything from it.
1332     if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1333       SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1334           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1335       if (DemandedSrc) {
1336         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1337                                         Op.getOperand(1));
1338         return TLO.CombineTo(Op, NewOp);
1339       }
1340     }
1341     break;
1342   }
1343   case ISD::CONCAT_VECTORS: {
1344     if (VT.isScalableVector())
1345       return false;
1346     Known.Zero.setAllBits();
1347     Known.One.setAllBits();
1348     EVT SubVT = Op.getOperand(0).getValueType();
1349     unsigned NumSubVecs = Op.getNumOperands();
1350     unsigned NumSubElts = SubVT.getVectorNumElements();
1351     for (unsigned i = 0; i != NumSubVecs; ++i) {
1352       APInt DemandedSubElts =
1353           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1354       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1355                                Known2, TLO, Depth + 1))
1356         return true;
1357       // Known bits are shared by every demanded subvector element.
1358       if (!!DemandedSubElts)
1359         Known = Known.intersectWith(Known2);
1360     }
1361     break;
1362   }
1363   case ISD::VECTOR_SHUFFLE: {
1364     assert(!VT.isScalableVector());
1365     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1366 
1367     // Collect demanded elements from shuffle operands..
1368     APInt DemandedLHS, DemandedRHS;
1369     if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1370                                 DemandedRHS))
1371       break;
1372 
1373     if (!!DemandedLHS || !!DemandedRHS) {
1374       SDValue Op0 = Op.getOperand(0);
1375       SDValue Op1 = Op.getOperand(1);
1376 
1377       Known.Zero.setAllBits();
1378       Known.One.setAllBits();
1379       if (!!DemandedLHS) {
1380         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1381                                  Depth + 1))
1382           return true;
1383         Known = Known.intersectWith(Known2);
1384       }
1385       if (!!DemandedRHS) {
1386         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1387                                  Depth + 1))
1388           return true;
1389         Known = Known.intersectWith(Known2);
1390       }
1391 
1392       // Attempt to avoid multi-use ops if we don't need anything from them.
1393       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1394           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1395       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1396           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1397       if (DemandedOp0 || DemandedOp1) {
1398         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1399         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1400         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1401         return TLO.CombineTo(Op, NewOp);
1402       }
1403     }
1404     break;
1405   }
1406   case ISD::AND: {
1407     SDValue Op0 = Op.getOperand(0);
1408     SDValue Op1 = Op.getOperand(1);
1409 
1410     // If the RHS is a constant, check to see if the LHS would be zero without
1411     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1412     // simplify the LHS, here we're using information from the LHS to simplify
1413     // the RHS.
1414     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1415       // Do not increment Depth here; that can cause an infinite loop.
1416       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1417       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1418       if ((LHSKnown.Zero & DemandedBits) ==
1419           (~RHSC->getAPIntValue() & DemandedBits))
1420         return TLO.CombineTo(Op, Op0);
1421 
1422       // If any of the set bits in the RHS are known zero on the LHS, shrink
1423       // the constant.
1424       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1425                                  DemandedElts, TLO))
1426         return true;
1427 
1428       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1429       // constant, but if this 'and' is only clearing bits that were just set by
1430       // the xor, then this 'and' can be eliminated by shrinking the mask of
1431       // the xor. For example, for a 32-bit X:
1432       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1433       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1434           LHSKnown.One == ~RHSC->getAPIntValue()) {
1435         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1436         return TLO.CombineTo(Op, Xor);
1437       }
1438     }
1439 
1440     // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1441     // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1442     if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1443         (Op0.getOperand(0).isUndef() ||
1444          ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
1445         Op0->hasOneUse()) {
1446       unsigned NumSubElts =
1447           Op0.getOperand(1).getValueType().getVectorNumElements();
1448       unsigned SubIdx = Op0.getConstantOperandVal(2);
1449       APInt DemandedSub =
1450           APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1451       KnownBits KnownSubMask =
1452           TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1453       if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1454         SDValue NewAnd =
1455             TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1456         SDValue NewInsert =
1457             TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1458                             Op0.getOperand(1), Op0.getOperand(2));
1459         return TLO.CombineTo(Op, NewInsert);
1460       }
1461     }
1462 
1463     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1464                              Depth + 1))
1465       return true;
1466     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1467                              Known2, TLO, Depth + 1))
1468       return true;
1469 
1470     // If all of the demanded bits are known one on one side, return the other.
1471     // These bits cannot contribute to the result of the 'and'.
1472     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1473       return TLO.CombineTo(Op, Op0);
1474     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1475       return TLO.CombineTo(Op, Op1);
1476     // If all of the demanded bits in the inputs are known zeros, return zero.
1477     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1478       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1479     // If the RHS is a constant, see if we can simplify it.
1480     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1481                                TLO))
1482       return true;
1483     // If the operation can be done in a smaller type, do so.
1484     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1485       return true;
1486 
1487     // Attempt to avoid multi-use ops if we don't need anything from them.
1488     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1489       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1490           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1491       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1492           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1493       if (DemandedOp0 || DemandedOp1) {
1494         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1495         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1496         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1497         return TLO.CombineTo(Op, NewOp);
1498       }
1499     }
1500 
1501     Known &= Known2;
1502     break;
1503   }
1504   case ISD::OR: {
1505     SDValue Op0 = Op.getOperand(0);
1506     SDValue Op1 = Op.getOperand(1);
1507     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1508                              Depth + 1)) {
1509       Op->dropFlags(SDNodeFlags::Disjoint);
1510       return true;
1511     }
1512 
1513     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1514                              Known2, TLO, Depth + 1)) {
1515       Op->dropFlags(SDNodeFlags::Disjoint);
1516       return true;
1517     }
1518 
1519     // If all of the demanded bits are known zero on one side, return the other.
1520     // These bits cannot contribute to the result of the 'or'.
1521     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1522       return TLO.CombineTo(Op, Op0);
1523     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1524       return TLO.CombineTo(Op, Op1);
1525     // If the RHS is a constant, see if we can simplify it.
1526     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1527       return true;
1528     // If the operation can be done in a smaller type, do so.
1529     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1530       return true;
1531 
1532     // Attempt to avoid multi-use ops if we don't need anything from them.
1533     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1534       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1535           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1536       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1537           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1538       if (DemandedOp0 || DemandedOp1) {
1539         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1540         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1541         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1542         return TLO.CombineTo(Op, NewOp);
1543       }
1544     }
1545 
1546     // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1547     // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1548     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1549         Op0->hasOneUse() && Op1->hasOneUse()) {
1550       // Attempt to match all commutations - m_c_Or would've been useful!
1551       for (int I = 0; I != 2; ++I) {
1552         SDValue X = Op.getOperand(I).getOperand(0);
1553         SDValue C1 = Op.getOperand(I).getOperand(1);
1554         SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1555         SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1556         if (Alt.getOpcode() == ISD::OR) {
1557           for (int J = 0; J != 2; ++J) {
1558             if (X == Alt.getOperand(J)) {
1559               SDValue Y = Alt.getOperand(1 - J);
1560               if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1561                                                                {C1, C2})) {
1562                 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1563                 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1564                 return TLO.CombineTo(
1565                     Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1566               }
1567             }
1568           }
1569         }
1570       }
1571     }
1572 
1573     Known |= Known2;
1574     break;
1575   }
1576   case ISD::XOR: {
1577     SDValue Op0 = Op.getOperand(0);
1578     SDValue Op1 = Op.getOperand(1);
1579 
1580     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1581                              Depth + 1))
1582       return true;
1583     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1584                              Depth + 1))
1585       return true;
1586 
1587     // If all of the demanded bits are known zero on one side, return the other.
1588     // These bits cannot contribute to the result of the 'xor'.
1589     if (DemandedBits.isSubsetOf(Known.Zero))
1590       return TLO.CombineTo(Op, Op0);
1591     if (DemandedBits.isSubsetOf(Known2.Zero))
1592       return TLO.CombineTo(Op, Op1);
1593     // If the operation can be done in a smaller type, do so.
1594     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1595       return true;
1596 
1597     // If all of the unknown bits are known to be zero on one side or the other
1598     // turn this into an *inclusive* or.
1599     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1600     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1601       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1602 
1603     ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1604     if (C) {
1605       // If one side is a constant, and all of the set bits in the constant are
1606       // also known set on the other side, turn this into an AND, as we know
1607       // the bits will be cleared.
1608       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1609       // NB: it is okay if more bits are known than are requested
1610       if (C->getAPIntValue() == Known2.One) {
1611         SDValue ANDC =
1612             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1613         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1614       }
1615 
1616       // If the RHS is a constant, see if we can change it. Don't alter a -1
1617       // constant because that's a 'not' op, and that is better for combining
1618       // and codegen.
1619       if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1620         // We're flipping all demanded bits. Flip the undemanded bits too.
1621         SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1622         return TLO.CombineTo(Op, New);
1623       }
1624 
1625       unsigned Op0Opcode = Op0.getOpcode();
1626       if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1627         if (ConstantSDNode *ShiftC =
1628                 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1629           // Don't crash on an oversized shift. We can not guarantee that a
1630           // bogus shift has been simplified to undef.
1631           if (ShiftC->getAPIntValue().ult(BitWidth)) {
1632             uint64_t ShiftAmt = ShiftC->getZExtValue();
1633             APInt Ones = APInt::getAllOnes(BitWidth);
1634             Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1635                                          : Ones.lshr(ShiftAmt);
1636             if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1637                 isDesirableToCommuteXorWithShift(Op.getNode())) {
1638               // If the xor constant is a demanded mask, do a 'not' before the
1639               // shift:
1640               // xor (X << ShiftC), XorC --> (not X) << ShiftC
1641               // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1642               SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1643               return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1644                                                        Op0.getOperand(1)));
1645             }
1646           }
1647         }
1648       }
1649     }
1650 
1651     // If we can't turn this into a 'not', try to shrink the constant.
1652     if (!C || !C->isAllOnes())
1653       if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1654         return true;
1655 
1656     // Attempt to avoid multi-use ops if we don't need anything from them.
1657     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1658       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1659           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1660       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1661           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1662       if (DemandedOp0 || DemandedOp1) {
1663         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1664         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1665         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1666         return TLO.CombineTo(Op, NewOp);
1667       }
1668     }
1669 
1670     Known ^= Known2;
1671     break;
1672   }
1673   case ISD::SELECT:
1674     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1675                              Known, TLO, Depth + 1))
1676       return true;
1677     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1678                              Known2, TLO, Depth + 1))
1679       return true;
1680 
1681     // If the operands are constants, see if we can simplify them.
1682     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1683       return true;
1684 
1685     // Only known if known in both the LHS and RHS.
1686     Known = Known.intersectWith(Known2);
1687     break;
1688   case ISD::VSELECT:
1689     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1690                              Known, TLO, Depth + 1))
1691       return true;
1692     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1693                              Known2, TLO, Depth + 1))
1694       return true;
1695 
1696     // Only known if known in both the LHS and RHS.
1697     Known = Known.intersectWith(Known2);
1698     break;
1699   case ISD::SELECT_CC:
1700     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1701                              Known, TLO, Depth + 1))
1702       return true;
1703     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1704                              Known2, TLO, Depth + 1))
1705       return true;
1706 
1707     // If the operands are constants, see if we can simplify them.
1708     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1709       return true;
1710 
1711     // Only known if known in both the LHS and RHS.
1712     Known = Known.intersectWith(Known2);
1713     break;
1714   case ISD::SETCC: {
1715     SDValue Op0 = Op.getOperand(0);
1716     SDValue Op1 = Op.getOperand(1);
1717     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1718     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1719     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1720     // -1, we may be able to bypass the setcc.
1721     if (DemandedBits.isSignMask() &&
1722         Op0.getScalarValueSizeInBits() == BitWidth &&
1723         getBooleanContents(Op0.getValueType()) ==
1724             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1725       // If we're testing X < 0, then this compare isn't needed - just use X!
1726       // FIXME: We're limiting to integer types here, but this should also work
1727       // if we don't care about FP signed-zero. The use of SETLT with FP means
1728       // that we don't care about NaNs.
1729       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1730           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1731         return TLO.CombineTo(Op, Op0);
1732 
1733       // TODO: Should we check for other forms of sign-bit comparisons?
1734       // Examples: X <= -1, X >= 0
1735     }
1736     if (getBooleanContents(Op0.getValueType()) ==
1737             TargetLowering::ZeroOrOneBooleanContent &&
1738         BitWidth > 1)
1739       Known.Zero.setBitsFrom(1);
1740     break;
1741   }
1742   case ISD::SHL: {
1743     SDValue Op0 = Op.getOperand(0);
1744     SDValue Op1 = Op.getOperand(1);
1745     EVT ShiftVT = Op1.getValueType();
1746 
1747     if (std::optional<uint64_t> KnownSA =
1748             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1749       unsigned ShAmt = *KnownSA;
1750       if (ShAmt == 0)
1751         return TLO.CombineTo(Op, Op0);
1752 
1753       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1754       // single shift.  We can do this if the bottom bits (which are shifted
1755       // out) are never demanded.
1756       // TODO - support non-uniform vector amounts.
1757       if (Op0.getOpcode() == ISD::SRL) {
1758         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1759           if (std::optional<uint64_t> InnerSA =
1760                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1761             unsigned C1 = *InnerSA;
1762             unsigned Opc = ISD::SHL;
1763             int Diff = ShAmt - C1;
1764             if (Diff < 0) {
1765               Diff = -Diff;
1766               Opc = ISD::SRL;
1767             }
1768             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1769             return TLO.CombineTo(
1770                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1771           }
1772         }
1773       }
1774 
1775       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1776       // are not demanded. This will likely allow the anyext to be folded away.
1777       // TODO - support non-uniform vector amounts.
1778       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1779         SDValue InnerOp = Op0.getOperand(0);
1780         EVT InnerVT = InnerOp.getValueType();
1781         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1782         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1783             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1784           SDValue NarrowShl = TLO.DAG.getNode(
1785               ISD::SHL, dl, InnerVT, InnerOp,
1786               TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1787           return TLO.CombineTo(
1788               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1789         }
1790 
1791         // Repeat the SHL optimization above in cases where an extension
1792         // intervenes: (shl (anyext (shr x, c1)), c2) to
1793         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1794         // aren't demanded (as above) and that the shifted upper c1 bits of
1795         // x aren't demanded.
1796         // TODO - support non-uniform vector amounts.
1797         if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1798             InnerOp.hasOneUse()) {
1799           if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1800                   InnerOp, DemandedElts, Depth + 2)) {
1801             unsigned InnerShAmt = *SA2;
1802             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1803                 DemandedBits.getActiveBits() <=
1804                     (InnerBits - InnerShAmt + ShAmt) &&
1805                 DemandedBits.countr_zero() >= ShAmt) {
1806               SDValue NewSA =
1807                   TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1808               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1809                                                InnerOp.getOperand(0));
1810               return TLO.CombineTo(
1811                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1812             }
1813           }
1814         }
1815       }
1816 
1817       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1818       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1819                                Depth + 1)) {
1820         // Disable the nsw and nuw flags. We can no longer guarantee that we
1821         // won't wrap after simplification.
1822         Op->dropFlags(SDNodeFlags::NoWrap);
1823         return true;
1824       }
1825       Known.Zero <<= ShAmt;
1826       Known.One <<= ShAmt;
1827       // low bits known zero.
1828       Known.Zero.setLowBits(ShAmt);
1829 
1830       // Attempt to avoid multi-use ops if we don't need anything from them.
1831       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1832         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1833             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1834         if (DemandedOp0) {
1835           SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1836           return TLO.CombineTo(Op, NewOp);
1837         }
1838       }
1839 
1840       // TODO: Can we merge this fold with the one below?
1841       // Try shrinking the operation as long as the shift amount will still be
1842       // in range.
1843       if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1844           Op.getNode()->hasOneUse()) {
1845         // Search for the smallest integer type with free casts to and from
1846         // Op's type. For expedience, just check power-of-2 integer types.
1847         unsigned DemandedSize = DemandedBits.getActiveBits();
1848         for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1849              SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1850           EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1851           if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1852               isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1853               isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1854               (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1855             assert(DemandedSize <= SmallVTBits &&
1856                    "Narrowed below demanded bits?");
1857             // We found a type with free casts.
1858             SDValue NarrowShl = TLO.DAG.getNode(
1859                 ISD::SHL, dl, SmallVT,
1860                 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1861                 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1862             return TLO.CombineTo(
1863                 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1864           }
1865         }
1866       }
1867 
1868       // Narrow shift to lower half - similar to ShrinkDemandedOp.
1869       // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1870       // Only do this if we demand the upper half so the knownbits are correct.
1871       unsigned HalfWidth = BitWidth / 2;
1872       if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1873           DemandedBits.countLeadingOnes() >= HalfWidth) {
1874         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1875         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1876             isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1877             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1878             (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1879           // If we're demanding the upper bits at all, we must ensure
1880           // that the upper bits of the shift result are known to be zero,
1881           // which is equivalent to the narrow shift being NUW.
1882           if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1883             bool IsNSW = Known.countMinSignBits() > HalfWidth;
1884             SDNodeFlags Flags;
1885             Flags.setNoSignedWrap(IsNSW);
1886             Flags.setNoUnsignedWrap(IsNUW);
1887             SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1888             SDValue NewShiftAmt =
1889                 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1890             SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1891                                                NewShiftAmt, Flags);
1892             SDValue NewExt =
1893                 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1894             return TLO.CombineTo(Op, NewExt);
1895           }
1896         }
1897       }
1898     } else {
1899       // This is a variable shift, so we can't shift the demand mask by a known
1900       // amount. But if we are not demanding high bits, then we are not
1901       // demanding those bits from the pre-shifted operand either.
1902       if (unsigned CTLZ = DemandedBits.countl_zero()) {
1903         APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1904         if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1905                                  Depth + 1)) {
1906           // Disable the nsw and nuw flags. We can no longer guarantee that we
1907           // won't wrap after simplification.
1908           Op->dropFlags(SDNodeFlags::NoWrap);
1909           return true;
1910         }
1911         Known.resetAll();
1912       }
1913     }
1914 
1915     // If we are only demanding sign bits then we can use the shift source
1916     // directly.
1917     if (std::optional<uint64_t> MaxSA =
1918             TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1919       unsigned ShAmt = *MaxSA;
1920       unsigned NumSignBits =
1921           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1922       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1923       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1924         return TLO.CombineTo(Op, Op0);
1925     }
1926     break;
1927   }
1928   case ISD::SRL: {
1929     SDValue Op0 = Op.getOperand(0);
1930     SDValue Op1 = Op.getOperand(1);
1931     EVT ShiftVT = Op1.getValueType();
1932 
1933     if (std::optional<uint64_t> KnownSA =
1934             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1935       unsigned ShAmt = *KnownSA;
1936       if (ShAmt == 0)
1937         return TLO.CombineTo(Op, Op0);
1938 
1939       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1940       // single shift.  We can do this if the top bits (which are shifted out)
1941       // are never demanded.
1942       // TODO - support non-uniform vector amounts.
1943       if (Op0.getOpcode() == ISD::SHL) {
1944         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1945           if (std::optional<uint64_t> InnerSA =
1946                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1947             unsigned C1 = *InnerSA;
1948             unsigned Opc = ISD::SRL;
1949             int Diff = ShAmt - C1;
1950             if (Diff < 0) {
1951               Diff = -Diff;
1952               Opc = ISD::SHL;
1953             }
1954             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1955             return TLO.CombineTo(
1956                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1957           }
1958         }
1959       }
1960 
1961       // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1962       // single sra. We can do this if the top bits are never demanded.
1963       if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1964         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1965           if (std::optional<uint64_t> InnerSA =
1966                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1967             unsigned C1 = *InnerSA;
1968             // Clamp the combined shift amount if it exceeds the bit width.
1969             unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
1970             SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
1971             return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
1972                                                      Op0.getOperand(0), NewSA));
1973           }
1974         }
1975       }
1976 
1977       APInt InDemandedMask = (DemandedBits << ShAmt);
1978 
1979       // If the shift is exact, then it does demand the low bits (and knows that
1980       // they are zero).
1981       if (Op->getFlags().hasExact())
1982         InDemandedMask.setLowBits(ShAmt);
1983 
1984       // Narrow shift to lower half - similar to ShrinkDemandedOp.
1985       // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1986       if ((BitWidth % 2) == 0 && !VT.isVector()) {
1987         APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2);
1988         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1989         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1990             isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1991             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1992             (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1993             ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1994              TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1995           SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1996           SDValue NewShiftAmt =
1997               TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1998           SDValue NewShift =
1999               TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2000           return TLO.CombineTo(
2001               Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2002         }
2003       }
2004 
2005       // Compute the new bits that are at the top now.
2006       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2007                                Depth + 1))
2008         return true;
2009       Known.Zero.lshrInPlace(ShAmt);
2010       Known.One.lshrInPlace(ShAmt);
2011       // High bits known zero.
2012       Known.Zero.setHighBits(ShAmt);
2013 
2014       // Attempt to avoid multi-use ops if we don't need anything from them.
2015       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2016         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2017             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2018         if (DemandedOp0) {
2019           SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2020           return TLO.CombineTo(Op, NewOp);
2021         }
2022       }
2023     } else {
2024       // Use generic knownbits computation as it has support for non-uniform
2025       // shift amounts.
2026       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2027     }
2028 
2029     // Try to match AVG patterns (after shift simplification).
2030     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2031                                         DemandedElts, Depth + 1))
2032       return TLO.CombineTo(Op, AVG);
2033 
2034     break;
2035   }
2036   case ISD::SRA: {
2037     SDValue Op0 = Op.getOperand(0);
2038     SDValue Op1 = Op.getOperand(1);
2039     EVT ShiftVT = Op1.getValueType();
2040 
2041     // If we only want bits that already match the signbit then we don't need
2042     // to shift.
2043     unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2044     if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2045         NumHiDemandedBits)
2046       return TLO.CombineTo(Op, Op0);
2047 
2048     // If this is an arithmetic shift right and only the low-bit is set, we can
2049     // always convert this into a logical shr, even if the shift amount is
2050     // variable.  The low bit of the shift cannot be an input sign bit unless
2051     // the shift amount is >= the size of the datatype, which is undefined.
2052     if (DemandedBits.isOne())
2053       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2054 
2055     if (std::optional<uint64_t> KnownSA =
2056             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2057       unsigned ShAmt = *KnownSA;
2058       if (ShAmt == 0)
2059         return TLO.CombineTo(Op, Op0);
2060 
2061       // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2062       // supports sext_inreg.
2063       if (Op0.getOpcode() == ISD::SHL) {
2064         if (std::optional<uint64_t> InnerSA =
2065                 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2066           unsigned LowBits = BitWidth - ShAmt;
2067           EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2068           if (VT.isVector())
2069             ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2070                                      VT.getVectorElementCount());
2071 
2072           if (*InnerSA == ShAmt) {
2073             if (!TLO.LegalOperations() ||
2074                 getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
2075               return TLO.CombineTo(
2076                   Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2077                                       Op0.getOperand(0),
2078                                       TLO.DAG.getValueType(ExtVT)));
2079 
2080             // Even if we can't convert to sext_inreg, we might be able to
2081             // remove this shift pair if the input is already sign extended.
2082             unsigned NumSignBits =
2083                 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2084             if (NumSignBits > ShAmt)
2085               return TLO.CombineTo(Op, Op0.getOperand(0));
2086           }
2087         }
2088       }
2089 
2090       APInt InDemandedMask = (DemandedBits << ShAmt);
2091 
2092       // If the shift is exact, then it does demand the low bits (and knows that
2093       // they are zero).
2094       if (Op->getFlags().hasExact())
2095         InDemandedMask.setLowBits(ShAmt);
2096 
2097       // If any of the demanded bits are produced by the sign extension, we also
2098       // demand the input sign bit.
2099       if (DemandedBits.countl_zero() < ShAmt)
2100         InDemandedMask.setSignBit();
2101 
2102       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2103                                Depth + 1))
2104         return true;
2105       Known.Zero.lshrInPlace(ShAmt);
2106       Known.One.lshrInPlace(ShAmt);
2107 
2108       // If the input sign bit is known to be zero, or if none of the top bits
2109       // are demanded, turn this into an unsigned shift right.
2110       if (Known.Zero[BitWidth - ShAmt - 1] ||
2111           DemandedBits.countl_zero() >= ShAmt) {
2112         SDNodeFlags Flags;
2113         Flags.setExact(Op->getFlags().hasExact());
2114         return TLO.CombineTo(
2115             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2116       }
2117 
2118       int Log2 = DemandedBits.exactLogBase2();
2119       if (Log2 >= 0) {
2120         // The bit must come from the sign.
2121         SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2122         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2123       }
2124 
2125       if (Known.One[BitWidth - ShAmt - 1])
2126         // New bits are known one.
2127         Known.One.setHighBits(ShAmt);
2128 
2129       // Attempt to avoid multi-use ops if we don't need anything from them.
2130       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2131         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2132             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2133         if (DemandedOp0) {
2134           SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2135           return TLO.CombineTo(Op, NewOp);
2136         }
2137       }
2138     }
2139 
2140     // Try to match AVG patterns (after shift simplification).
2141     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2142                                         DemandedElts, Depth + 1))
2143       return TLO.CombineTo(Op, AVG);
2144 
2145     break;
2146   }
2147   case ISD::FSHL:
2148   case ISD::FSHR: {
2149     SDValue Op0 = Op.getOperand(0);
2150     SDValue Op1 = Op.getOperand(1);
2151     SDValue Op2 = Op.getOperand(2);
2152     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2153 
2154     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2155       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2156 
2157       // For fshl, 0-shift returns the 1st arg.
2158       // For fshr, 0-shift returns the 2nd arg.
2159       if (Amt == 0) {
2160         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2161                                  Known, TLO, Depth + 1))
2162           return true;
2163         break;
2164       }
2165 
2166       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2167       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2168       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2169       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2170       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2171                                Depth + 1))
2172         return true;
2173       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2174                                Depth + 1))
2175         return true;
2176 
2177       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2178       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2179       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2180       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2181       Known = Known.unionWith(Known2);
2182 
2183       // Attempt to avoid multi-use ops if we don't need anything from them.
2184       if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2185           !DemandedElts.isAllOnes()) {
2186         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2187             Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2188         SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2189             Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2190         if (DemandedOp0 || DemandedOp1) {
2191           DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2192           DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2193           SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2194                                           DemandedOp1, Op2);
2195           return TLO.CombineTo(Op, NewOp);
2196         }
2197       }
2198     }
2199 
2200     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2201     if (isPowerOf2_32(BitWidth)) {
2202       APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2203       if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2204                                Known2, TLO, Depth + 1))
2205         return true;
2206     }
2207     break;
2208   }
2209   case ISD::ROTL:
2210   case ISD::ROTR: {
2211     SDValue Op0 = Op.getOperand(0);
2212     SDValue Op1 = Op.getOperand(1);
2213     bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2214 
2215     // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2216     if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2217       return TLO.CombineTo(Op, Op0);
2218 
2219     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2220       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2221       unsigned RevAmt = BitWidth - Amt;
2222 
2223       // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2224       // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2225       APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2226       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2227                                Depth + 1))
2228         return true;
2229 
2230       // rot*(x, 0) --> x
2231       if (Amt == 0)
2232         return TLO.CombineTo(Op, Op0);
2233 
2234       // See if we don't demand either half of the rotated bits.
2235       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2236           DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2237         Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2238         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2239       }
2240       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2241           DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2242         Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2243         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2244       }
2245     }
2246 
2247     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2248     if (isPowerOf2_32(BitWidth)) {
2249       APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2250       if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2251                                Depth + 1))
2252         return true;
2253     }
2254     break;
2255   }
2256   case ISD::SMIN:
2257   case ISD::SMAX:
2258   case ISD::UMIN:
2259   case ISD::UMAX: {
2260     unsigned Opc = Op.getOpcode();
2261     SDValue Op0 = Op.getOperand(0);
2262     SDValue Op1 = Op.getOperand(1);
2263 
2264     // If we're only demanding signbits, then we can simplify to OR/AND node.
2265     unsigned BitOp =
2266         (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2267     unsigned NumSignBits =
2268         std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2269                  TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2270     unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2271     if (NumSignBits >= NumDemandedUpperBits)
2272       return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2273 
2274     // Check if one arg is always less/greater than (or equal) to the other arg.
2275     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2276     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2277     switch (Opc) {
2278     case ISD::SMIN:
2279       if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2280         return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2281       if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2282         return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2283       Known = KnownBits::smin(Known0, Known1);
2284       break;
2285     case ISD::SMAX:
2286       if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2287         return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2288       if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2289         return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2290       Known = KnownBits::smax(Known0, Known1);
2291       break;
2292     case ISD::UMIN:
2293       if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2294         return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2295       if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2296         return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2297       Known = KnownBits::umin(Known0, Known1);
2298       break;
2299     case ISD::UMAX:
2300       if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2301         return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2302       if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2303         return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2304       Known = KnownBits::umax(Known0, Known1);
2305       break;
2306     }
2307     break;
2308   }
2309   case ISD::BITREVERSE: {
2310     SDValue Src = Op.getOperand(0);
2311     APInt DemandedSrcBits = DemandedBits.reverseBits();
2312     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2313                              Depth + 1))
2314       return true;
2315     Known.One = Known2.One.reverseBits();
2316     Known.Zero = Known2.Zero.reverseBits();
2317     break;
2318   }
2319   case ISD::BSWAP: {
2320     SDValue Src = Op.getOperand(0);
2321 
2322     // If the only bits demanded come from one byte of the bswap result,
2323     // just shift the input byte into position to eliminate the bswap.
2324     unsigned NLZ = DemandedBits.countl_zero();
2325     unsigned NTZ = DemandedBits.countr_zero();
2326 
2327     // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
2328     // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
2329     // have 14 leading zeros, round to 8.
2330     NLZ = alignDown(NLZ, 8);
2331     NTZ = alignDown(NTZ, 8);
2332     // If we need exactly one byte, we can do this transformation.
2333     if (BitWidth - NLZ - NTZ == 8) {
2334       // Replace this with either a left or right shift to get the byte into
2335       // the right place.
2336       unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2337       if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2338         unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2339         SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2340         SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2341         return TLO.CombineTo(Op, NewOp);
2342       }
2343     }
2344 
2345     APInt DemandedSrcBits = DemandedBits.byteSwap();
2346     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2347                              Depth + 1))
2348       return true;
2349     Known.One = Known2.One.byteSwap();
2350     Known.Zero = Known2.Zero.byteSwap();
2351     break;
2352   }
2353   case ISD::CTPOP: {
2354     // If only 1 bit is demanded, replace with PARITY as long as we're before
2355     // op legalization.
2356     // FIXME: Limit to scalars for now.
2357     if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2358       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2359                                                Op.getOperand(0)));
2360 
2361     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2362     break;
2363   }
2364   case ISD::SIGN_EXTEND_INREG: {
2365     SDValue Op0 = Op.getOperand(0);
2366     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2367     unsigned ExVTBits = ExVT.getScalarSizeInBits();
2368 
2369     // If we only care about the highest bit, don't bother shifting right.
2370     if (DemandedBits.isSignMask()) {
2371       unsigned MinSignedBits =
2372           TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2373       bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2374       // However if the input is already sign extended we expect the sign
2375       // extension to be dropped altogether later and do not simplify.
2376       if (!AlreadySignExtended) {
2377         // Compute the correct shift amount type, which must be getShiftAmountTy
2378         // for scalar types after legalization.
2379         SDValue ShiftAmt =
2380             TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2381         return TLO.CombineTo(Op,
2382                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2383       }
2384     }
2385 
2386     // If none of the extended bits are demanded, eliminate the sextinreg.
2387     if (DemandedBits.getActiveBits() <= ExVTBits)
2388       return TLO.CombineTo(Op, Op0);
2389 
2390     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2391 
2392     // Since the sign extended bits are demanded, we know that the sign
2393     // bit is demanded.
2394     InputDemandedBits.setBit(ExVTBits - 1);
2395 
2396     if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2397                              Depth + 1))
2398       return true;
2399 
2400     // If the sign bit of the input is known set or clear, then we know the
2401     // top bits of the result.
2402 
2403     // If the input sign bit is known zero, convert this into a zero extension.
2404     if (Known.Zero[ExVTBits - 1])
2405       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2406 
2407     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2408     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2409       Known.One.setBitsFrom(ExVTBits);
2410       Known.Zero &= Mask;
2411     } else { // Input sign bit unknown
2412       Known.Zero &= Mask;
2413       Known.One &= Mask;
2414     }
2415     break;
2416   }
2417   case ISD::BUILD_PAIR: {
2418     EVT HalfVT = Op.getOperand(0).getValueType();
2419     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2420 
2421     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2422     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2423 
2424     KnownBits KnownLo, KnownHi;
2425 
2426     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2427       return true;
2428 
2429     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2430       return true;
2431 
2432     Known = KnownHi.concat(KnownLo);
2433     break;
2434   }
2435   case ISD::ZERO_EXTEND_VECTOR_INREG:
2436     if (VT.isScalableVector())
2437       return false;
2438     [[fallthrough]];
2439   case ISD::ZERO_EXTEND: {
2440     SDValue Src = Op.getOperand(0);
2441     EVT SrcVT = Src.getValueType();
2442     unsigned InBits = SrcVT.getScalarSizeInBits();
2443     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2444     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2445 
2446     // If none of the top bits are demanded, convert this into an any_extend.
2447     if (DemandedBits.getActiveBits() <= InBits) {
2448       // If we only need the non-extended bits of the bottom element
2449       // then we can just bitcast to the result.
2450       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2451           VT.getSizeInBits() == SrcVT.getSizeInBits())
2452         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2453 
2454       unsigned Opc =
2455           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2456       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2457         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2458     }
2459 
2460     APInt InDemandedBits = DemandedBits.trunc(InBits);
2461     APInt InDemandedElts = DemandedElts.zext(InElts);
2462     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2463                              Depth + 1)) {
2464       Op->dropFlags(SDNodeFlags::NonNeg);
2465       return true;
2466     }
2467     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2468     Known = Known.zext(BitWidth);
2469 
2470     // Attempt to avoid multi-use ops if we don't need anything from them.
2471     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2472             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2473       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2474     break;
2475   }
2476   case ISD::SIGN_EXTEND_VECTOR_INREG:
2477     if (VT.isScalableVector())
2478       return false;
2479     [[fallthrough]];
2480   case ISD::SIGN_EXTEND: {
2481     SDValue Src = Op.getOperand(0);
2482     EVT SrcVT = Src.getValueType();
2483     unsigned InBits = SrcVT.getScalarSizeInBits();
2484     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2485     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2486 
2487     APInt InDemandedElts = DemandedElts.zext(InElts);
2488     APInt InDemandedBits = DemandedBits.trunc(InBits);
2489 
2490     // Since some of the sign extended bits are demanded, we know that the sign
2491     // bit is demanded.
2492     InDemandedBits.setBit(InBits - 1);
2493 
2494     // If none of the top bits are demanded, convert this into an any_extend.
2495     if (DemandedBits.getActiveBits() <= InBits) {
2496       // If we only need the non-extended bits of the bottom element
2497       // then we can just bitcast to the result.
2498       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2499           VT.getSizeInBits() == SrcVT.getSizeInBits())
2500         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2501 
2502       // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2503       if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
2504           TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2505               InBits) {
2506         unsigned Opc =
2507             IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2508         if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2509           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2510       }
2511     }
2512 
2513     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2514                              Depth + 1))
2515       return true;
2516     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2517 
2518     // If the sign bit is known one, the top bits match.
2519     Known = Known.sext(BitWidth);
2520 
2521     // If the sign bit is known zero, convert this to a zero extend.
2522     if (Known.isNonNegative()) {
2523       unsigned Opc =
2524           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2525       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2526         SDNodeFlags Flags;
2527         if (!IsVecInReg)
2528           Flags |= SDNodeFlags::NonNeg;
2529         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2530       }
2531     }
2532 
2533     // Attempt to avoid multi-use ops if we don't need anything from them.
2534     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2535             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2536       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2537     break;
2538   }
2539   case ISD::ANY_EXTEND_VECTOR_INREG:
2540     if (VT.isScalableVector())
2541       return false;
2542     [[fallthrough]];
2543   case ISD::ANY_EXTEND: {
2544     SDValue Src = Op.getOperand(0);
2545     EVT SrcVT = Src.getValueType();
2546     unsigned InBits = SrcVT.getScalarSizeInBits();
2547     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2548     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2549 
2550     // If we only need the bottom element then we can just bitcast.
2551     // TODO: Handle ANY_EXTEND?
2552     if (IsLE && IsVecInReg && DemandedElts == 1 &&
2553         VT.getSizeInBits() == SrcVT.getSizeInBits())
2554       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2555 
2556     APInt InDemandedBits = DemandedBits.trunc(InBits);
2557     APInt InDemandedElts = DemandedElts.zext(InElts);
2558     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2559                              Depth + 1))
2560       return true;
2561     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2562     Known = Known.anyext(BitWidth);
2563 
2564     // Attempt to avoid multi-use ops if we don't need anything from them.
2565     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2566             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2567       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2568     break;
2569   }
2570   case ISD::TRUNCATE: {
2571     SDValue Src = Op.getOperand(0);
2572 
2573     // Simplify the input, using demanded bit information, and compute the known
2574     // zero/one bits live out.
2575     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2576     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2577     if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2578                              Depth + 1))
2579       return true;
2580     Known = Known.trunc(BitWidth);
2581 
2582     // Attempt to avoid multi-use ops if we don't need anything from them.
2583     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2584             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2585       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2586 
2587     // If the input is only used by this truncate, see if we can shrink it based
2588     // on the known demanded bits.
2589     switch (Src.getOpcode()) {
2590     default:
2591       break;
2592     case ISD::SRL:
2593       // Shrink SRL by a constant if none of the high bits shifted in are
2594       // demanded.
2595       if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2596         // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2597         // undesirable.
2598         break;
2599 
2600       if (Src.getNode()->hasOneUse()) {
2601         if (isTruncateFree(Src, VT) &&
2602             !isTruncateFree(Src.getValueType(), VT)) {
2603           // If truncate is only free at trunc(srl), do not turn it into
2604           // srl(trunc). The check is done by first check the truncate is free
2605           // at Src's opcode(srl), then check the truncate is not done by
2606           // referencing sub-register. In test, if both trunc(srl) and
2607           // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2608           // trunc(srl)'s trunc is free, trunc(srl) is better.
2609           break;
2610         }
2611 
2612         std::optional<uint64_t> ShAmtC =
2613             TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2614         if (!ShAmtC || *ShAmtC >= BitWidth)
2615           break;
2616         uint64_t ShVal = *ShAmtC;
2617 
2618         APInt HighBits =
2619             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2620         HighBits.lshrInPlace(ShVal);
2621         HighBits = HighBits.trunc(BitWidth);
2622         if (!(HighBits & DemandedBits)) {
2623           // None of the shifted in bits are needed.  Add a truncate of the
2624           // shift input, then shift it.
2625           SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2626           SDValue NewTrunc =
2627               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2628           return TLO.CombineTo(
2629               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2630         }
2631       }
2632       break;
2633     }
2634 
2635     break;
2636   }
2637   case ISD::AssertZext: {
2638     // AssertZext demands all of the high bits, plus any of the low bits
2639     // demanded by its users.
2640     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2641     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2642     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2643                              TLO, Depth + 1))
2644       return true;
2645 
2646     Known.Zero |= ~InMask;
2647     Known.One &= (~Known.Zero);
2648     break;
2649   }
2650   case ISD::EXTRACT_VECTOR_ELT: {
2651     SDValue Src = Op.getOperand(0);
2652     SDValue Idx = Op.getOperand(1);
2653     ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2654     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2655 
2656     if (SrcEltCnt.isScalable())
2657       return false;
2658 
2659     // Demand the bits from every vector element without a constant index.
2660     unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2661     APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2662     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2663       if (CIdx->getAPIntValue().ult(NumSrcElts))
2664         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2665 
2666     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2667     // anything about the extended bits.
2668     APInt DemandedSrcBits = DemandedBits;
2669     if (BitWidth > EltBitWidth)
2670       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2671 
2672     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2673                              Depth + 1))
2674       return true;
2675 
2676     // Attempt to avoid multi-use ops if we don't need anything from them.
2677     if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2678       if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2679               Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2680         SDValue NewOp =
2681             TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2682         return TLO.CombineTo(Op, NewOp);
2683       }
2684     }
2685 
2686     Known = Known2;
2687     if (BitWidth > EltBitWidth)
2688       Known = Known.anyext(BitWidth);
2689     break;
2690   }
2691   case ISD::BITCAST: {
2692     if (VT.isScalableVector())
2693       return false;
2694     SDValue Src = Op.getOperand(0);
2695     EVT SrcVT = Src.getValueType();
2696     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2697 
2698     // If this is an FP->Int bitcast and if the sign bit is the only
2699     // thing demanded, turn this into a FGETSIGN.
2700     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2701         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2702         SrcVT.isFloatingPoint()) {
2703       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2704       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2705       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2706           SrcVT != MVT::f128) {
2707         // Cannot eliminate/lower SHL for f128 yet.
2708         EVT Ty = OpVTLegal ? VT : MVT::i32;
2709         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2710         // place.  We expect the SHL to be eliminated by other optimizations.
2711         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2712         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2713         if (!OpVTLegal && OpVTSizeInBits > 32)
2714           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2715         unsigned ShVal = Op.getValueSizeInBits() - 1;
2716         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2717         return TLO.CombineTo(Op,
2718                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2719       }
2720     }
2721 
2722     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2723     // Demand the elt/bit if any of the original elts/bits are demanded.
2724     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2725       unsigned Scale = BitWidth / NumSrcEltBits;
2726       unsigned NumSrcElts = SrcVT.getVectorNumElements();
2727       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2728       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2729       for (unsigned i = 0; i != Scale; ++i) {
2730         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2731         unsigned BitOffset = EltOffset * NumSrcEltBits;
2732         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2733         if (!Sub.isZero()) {
2734           DemandedSrcBits |= Sub;
2735           for (unsigned j = 0; j != NumElts; ++j)
2736             if (DemandedElts[j])
2737               DemandedSrcElts.setBit((j * Scale) + i);
2738         }
2739       }
2740 
2741       APInt KnownSrcUndef, KnownSrcZero;
2742       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2743                                      KnownSrcZero, TLO, Depth + 1))
2744         return true;
2745 
2746       KnownBits KnownSrcBits;
2747       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2748                                KnownSrcBits, TLO, Depth + 1))
2749         return true;
2750     } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2751       // TODO - bigendian once we have test coverage.
2752       unsigned Scale = NumSrcEltBits / BitWidth;
2753       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2754       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2755       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2756       for (unsigned i = 0; i != NumElts; ++i)
2757         if (DemandedElts[i]) {
2758           unsigned Offset = (i % Scale) * BitWidth;
2759           DemandedSrcBits.insertBits(DemandedBits, Offset);
2760           DemandedSrcElts.setBit(i / Scale);
2761         }
2762 
2763       if (SrcVT.isVector()) {
2764         APInt KnownSrcUndef, KnownSrcZero;
2765         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2766                                        KnownSrcZero, TLO, Depth + 1))
2767           return true;
2768       }
2769 
2770       KnownBits KnownSrcBits;
2771       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2772                                KnownSrcBits, TLO, Depth + 1))
2773         return true;
2774 
2775       // Attempt to avoid multi-use ops if we don't need anything from them.
2776       if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2777         if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2778                 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2779           SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2780           return TLO.CombineTo(Op, NewOp);
2781         }
2782       }
2783     }
2784 
2785     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2786     // recursive call where Known may be useful to the caller.
2787     if (Depth > 0) {
2788       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2789       return false;
2790     }
2791     break;
2792   }
2793   case ISD::MUL:
2794     if (DemandedBits.isPowerOf2()) {
2795       // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2796       // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2797       // odd (has LSB set), then the left-shifted low bit of X is the answer.
2798       unsigned CTZ = DemandedBits.countr_zero();
2799       ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2800       if (C && C->getAPIntValue().countr_zero() == CTZ) {
2801         SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2802         SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2803         return TLO.CombineTo(Op, Shl);
2804       }
2805     }
2806     // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2807     // X * X is odd iff X is odd.
2808     // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2809     if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2810       SDValue One = TLO.DAG.getConstant(1, dl, VT);
2811       SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2812       return TLO.CombineTo(Op, And1);
2813     }
2814     [[fallthrough]];
2815   case ISD::ADD:
2816   case ISD::SUB: {
2817     // Add, Sub, and Mul don't demand any bits in positions beyond that
2818     // of the highest bit demanded of them.
2819     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2820     SDNodeFlags Flags = Op.getNode()->getFlags();
2821     unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2822     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2823     KnownBits KnownOp0, KnownOp1;
2824     auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2825                                       const KnownBits &KnownRHS) {
2826       if (Op.getOpcode() == ISD::MUL)
2827         Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2828       return Demanded;
2829     };
2830     if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2831                              Depth + 1) ||
2832         SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2833                              DemandedElts, KnownOp0, TLO, Depth + 1) ||
2834         // See if the operation should be performed at a smaller bit width.
2835         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2836       // Disable the nsw and nuw flags. We can no longer guarantee that we
2837       // won't wrap after simplification.
2838       Op->dropFlags(SDNodeFlags::NoWrap);
2839       return true;
2840     }
2841 
2842     // neg x with only low bit demanded is simply x.
2843     if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2844         isNullConstant(Op0))
2845       return TLO.CombineTo(Op, Op1);
2846 
2847     // Attempt to avoid multi-use ops if we don't need anything from them.
2848     if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2849       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2850           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2851       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2852           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2853       if (DemandedOp0 || DemandedOp1) {
2854         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2855         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2856         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2857                                         Flags & ~SDNodeFlags::NoWrap);
2858         return TLO.CombineTo(Op, NewOp);
2859       }
2860     }
2861 
2862     // If we have a constant operand, we may be able to turn it into -1 if we
2863     // do not demand the high bits. This can make the constant smaller to
2864     // encode, allow more general folding, or match specialized instruction
2865     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2866     // is probably not useful (and could be detrimental).
2867     ConstantSDNode *C = isConstOrConstSplat(Op1);
2868     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2869     if (C && !C->isAllOnes() && !C->isOne() &&
2870         (C->getAPIntValue() | HighMask).isAllOnes()) {
2871       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2872       // Disable the nsw and nuw flags. We can no longer guarantee that we
2873       // won't wrap after simplification.
2874       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2875                                       Flags & ~SDNodeFlags::NoWrap);
2876       return TLO.CombineTo(Op, NewOp);
2877     }
2878 
2879     // Match a multiply with a disguised negated-power-of-2 and convert to a
2880     // an equivalent shift-left amount.
2881     // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2882     auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2883       if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2884         return 0;
2885 
2886       // Don't touch opaque constants. Also, ignore zero and power-of-2
2887       // multiplies. Those will get folded later.
2888       ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2889       if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2890           !MulC->getAPIntValue().isPowerOf2()) {
2891         APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2892         if (UnmaskedC.isNegatedPowerOf2())
2893           return (-UnmaskedC).logBase2();
2894       }
2895       return 0;
2896     };
2897 
2898     auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2899                        unsigned ShlAmt) {
2900       SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2901       SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2902       SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2903       return TLO.CombineTo(Op, Res);
2904     };
2905 
2906     if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2907       if (Op.getOpcode() == ISD::ADD) {
2908         // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2909         if (unsigned ShAmt = getShiftLeftAmt(Op0))
2910           return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2911         // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2912         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2913           return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2914       }
2915       if (Op.getOpcode() == ISD::SUB) {
2916         // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2917         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2918           return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2919       }
2920     }
2921 
2922     if (Op.getOpcode() == ISD::MUL) {
2923       Known = KnownBits::mul(KnownOp0, KnownOp1);
2924     } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2925       Known = KnownBits::computeForAddSub(
2926           Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2927           Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2928     }
2929     break;
2930   }
2931   default:
2932     // We also ask the target about intrinsics (which could be specific to it).
2933     if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2934         Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2935       // TODO: Probably okay to remove after audit; here to reduce change size
2936       // in initial enablement patch for scalable vectors
2937       if (Op.getValueType().isScalableVector())
2938         break;
2939       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2940                                             Known, TLO, Depth))
2941         return true;
2942       break;
2943     }
2944 
2945     // Just use computeKnownBits to compute output bits.
2946     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2947     break;
2948   }
2949 
2950   // If we know the value of all of the demanded bits, return this as a
2951   // constant.
2952   if (!isTargetCanonicalConstantNode(Op) &&
2953       DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2954     // Avoid folding to a constant if any OpaqueConstant is involved.
2955     const SDNode *N = Op.getNode();
2956     for (SDNode *Op :
2957          llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
2958       if (auto *C = dyn_cast<ConstantSDNode>(Op))
2959         if (C->isOpaque())
2960           return false;
2961     }
2962     if (VT.isInteger())
2963       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2964     if (VT.isFloatingPoint())
2965       return TLO.CombineTo(
2966           Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
2967                                     dl, VT));
2968   }
2969 
2970   // A multi use 'all demanded elts' simplify failed to find any knownbits.
2971   // Try again just for the original demanded elts.
2972   // Ensure we do this AFTER constant folding above.
2973   if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2974     Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2975 
2976   return false;
2977 }
2978 
2979 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2980                                                 const APInt &DemandedElts,
2981                                                 DAGCombinerInfo &DCI) const {
2982   SelectionDAG &DAG = DCI.DAG;
2983   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2984                         !DCI.isBeforeLegalizeOps());
2985 
2986   APInt KnownUndef, KnownZero;
2987   bool Simplified =
2988       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2989   if (Simplified) {
2990     DCI.AddToWorklist(Op.getNode());
2991     DCI.CommitTargetLoweringOpt(TLO);
2992   }
2993 
2994   return Simplified;
2995 }
2996 
2997 /// Given a vector binary operation and known undefined elements for each input
2998 /// operand, compute whether each element of the output is undefined.
2999 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
3000                                          const APInt &UndefOp0,
3001                                          const APInt &UndefOp1) {
3002   EVT VT = BO.getValueType();
3003   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3004          "Vector binop only");
3005 
3006   EVT EltVT = VT.getVectorElementType();
3007   unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3008   assert(UndefOp0.getBitWidth() == NumElts &&
3009          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3010 
3011   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3012                                    const APInt &UndefVals) {
3013     if (UndefVals[Index])
3014       return DAG.getUNDEF(EltVT);
3015 
3016     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3017       // Try hard to make sure that the getNode() call is not creating temporary
3018       // nodes. Ignore opaque integers because they do not constant fold.
3019       SDValue Elt = BV->getOperand(Index);
3020       auto *C = dyn_cast<ConstantSDNode>(Elt);
3021       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3022         return Elt;
3023     }
3024 
3025     return SDValue();
3026   };
3027 
3028   APInt KnownUndef = APInt::getZero(NumElts);
3029   for (unsigned i = 0; i != NumElts; ++i) {
3030     // If both inputs for this element are either constant or undef and match
3031     // the element type, compute the constant/undef result for this element of
3032     // the vector.
3033     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3034     // not handle FP constants. The code within getNode() should be refactored
3035     // to avoid the danger of creating a bogus temporary node here.
3036     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3037     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3038     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3039       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3040         KnownUndef.setBit(i);
3041   }
3042   return KnownUndef;
3043 }
3044 
3045 bool TargetLowering::SimplifyDemandedVectorElts(
3046     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3047     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3048     bool AssumeSingleUse) const {
3049   EVT VT = Op.getValueType();
3050   unsigned Opcode = Op.getOpcode();
3051   APInt DemandedElts = OriginalDemandedElts;
3052   unsigned NumElts = DemandedElts.getBitWidth();
3053   assert(VT.isVector() && "Expected vector op");
3054 
3055   KnownUndef = KnownZero = APInt::getZero(NumElts);
3056 
3057   if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3058     return false;
3059 
3060   // TODO: For now we assume we know nothing about scalable vectors.
3061   if (VT.isScalableVector())
3062     return false;
3063 
3064   assert(VT.getVectorNumElements() == NumElts &&
3065          "Mask size mismatches value type element count!");
3066 
3067   // Undef operand.
3068   if (Op.isUndef()) {
3069     KnownUndef.setAllBits();
3070     return false;
3071   }
3072 
3073   // If Op has other users, assume that all elements are needed.
3074   if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3075     DemandedElts.setAllBits();
3076 
3077   // Not demanding any elements from Op.
3078   if (DemandedElts == 0) {
3079     KnownUndef.setAllBits();
3080     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3081   }
3082 
3083   // Limit search depth.
3084   if (Depth >= SelectionDAG::MaxRecursionDepth)
3085     return false;
3086 
3087   SDLoc DL(Op);
3088   unsigned EltSizeInBits = VT.getScalarSizeInBits();
3089   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3090 
3091   // Helper for demanding the specified elements and all the bits of both binary
3092   // operands.
3093   auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3094     SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3095                                                            TLO.DAG, Depth + 1);
3096     SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3097                                                            TLO.DAG, Depth + 1);
3098     if (NewOp0 || NewOp1) {
3099       SDValue NewOp =
3100           TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3101                           NewOp1 ? NewOp1 : Op1, Op->getFlags());
3102       return TLO.CombineTo(Op, NewOp);
3103     }
3104     return false;
3105   };
3106 
3107   switch (Opcode) {
3108   case ISD::SCALAR_TO_VECTOR: {
3109     if (!DemandedElts[0]) {
3110       KnownUndef.setAllBits();
3111       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3112     }
3113     SDValue ScalarSrc = Op.getOperand(0);
3114     if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3115       SDValue Src = ScalarSrc.getOperand(0);
3116       SDValue Idx = ScalarSrc.getOperand(1);
3117       EVT SrcVT = Src.getValueType();
3118 
3119       ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3120 
3121       if (SrcEltCnt.isScalable())
3122         return false;
3123 
3124       unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3125       if (isNullConstant(Idx)) {
3126         APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3127         APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3128         APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3129         if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3130                                        TLO, Depth + 1))
3131           return true;
3132       }
3133     }
3134     KnownUndef.setHighBits(NumElts - 1);
3135     break;
3136   }
3137   case ISD::BITCAST: {
3138     SDValue Src = Op.getOperand(0);
3139     EVT SrcVT = Src.getValueType();
3140 
3141     // We only handle vectors here.
3142     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3143     if (!SrcVT.isVector())
3144       break;
3145 
3146     // Fast handling of 'identity' bitcasts.
3147     unsigned NumSrcElts = SrcVT.getVectorNumElements();
3148     if (NumSrcElts == NumElts)
3149       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3150                                         KnownZero, TLO, Depth + 1);
3151 
3152     APInt SrcDemandedElts, SrcZero, SrcUndef;
3153 
3154     // Bitcast from 'large element' src vector to 'small element' vector, we
3155     // must demand a source element if any DemandedElt maps to it.
3156     if ((NumElts % NumSrcElts) == 0) {
3157       unsigned Scale = NumElts / NumSrcElts;
3158       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3159       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3160                                      TLO, Depth + 1))
3161         return true;
3162 
3163       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3164       // of the large element.
3165       // TODO - bigendian once we have test coverage.
3166       if (IsLE) {
3167         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3168         APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3169         for (unsigned i = 0; i != NumElts; ++i)
3170           if (DemandedElts[i]) {
3171             unsigned Ofs = (i % Scale) * EltSizeInBits;
3172             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3173           }
3174 
3175         KnownBits Known;
3176         if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3177                                  TLO, Depth + 1))
3178           return true;
3179 
3180         // The bitcast has split each wide element into a number of
3181         // narrow subelements. We have just computed the Known bits
3182         // for wide elements. See if element splitting results in
3183         // some subelements being zero. Only for demanded elements!
3184         for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3185           if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3186                    .isAllOnes())
3187             continue;
3188           for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3189             unsigned Elt = Scale * SrcElt + SubElt;
3190             if (DemandedElts[Elt])
3191               KnownZero.setBit(Elt);
3192           }
3193         }
3194       }
3195 
3196       // If the src element is zero/undef then all the output elements will be -
3197       // only demanded elements are guaranteed to be correct.
3198       for (unsigned i = 0; i != NumSrcElts; ++i) {
3199         if (SrcDemandedElts[i]) {
3200           if (SrcZero[i])
3201             KnownZero.setBits(i * Scale, (i + 1) * Scale);
3202           if (SrcUndef[i])
3203             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3204         }
3205       }
3206     }
3207 
3208     // Bitcast from 'small element' src vector to 'large element' vector, we
3209     // demand all smaller source elements covered by the larger demanded element
3210     // of this vector.
3211     if ((NumSrcElts % NumElts) == 0) {
3212       unsigned Scale = NumSrcElts / NumElts;
3213       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3214       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3215                                      TLO, Depth + 1))
3216         return true;
3217 
3218       // If all the src elements covering an output element are zero/undef, then
3219       // the output element will be as well, assuming it was demanded.
3220       for (unsigned i = 0; i != NumElts; ++i) {
3221         if (DemandedElts[i]) {
3222           if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3223             KnownZero.setBit(i);
3224           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3225             KnownUndef.setBit(i);
3226         }
3227       }
3228     }
3229     break;
3230   }
3231   case ISD::FREEZE: {
3232     SDValue N0 = Op.getOperand(0);
3233     if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3234                                                  /*PoisonOnly=*/false))
3235       return TLO.CombineTo(Op, N0);
3236 
3237     // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3238     // freeze(op(x, ...)) -> op(freeze(x), ...).
3239     if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3240       return TLO.CombineTo(
3241           Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
3242                               TLO.DAG.getFreeze(N0.getOperand(0))));
3243     break;
3244   }
3245   case ISD::BUILD_VECTOR: {
3246     // Check all elements and simplify any unused elements with UNDEF.
3247     if (!DemandedElts.isAllOnes()) {
3248       // Don't simplify BROADCASTS.
3249       if (llvm::any_of(Op->op_values(),
3250                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3251         SmallVector<SDValue, 32> Ops(Op->ops());
3252         bool Updated = false;
3253         for (unsigned i = 0; i != NumElts; ++i) {
3254           if (!DemandedElts[i] && !Ops[i].isUndef()) {
3255             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3256             KnownUndef.setBit(i);
3257             Updated = true;
3258           }
3259         }
3260         if (Updated)
3261           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3262       }
3263     }
3264     for (unsigned i = 0; i != NumElts; ++i) {
3265       SDValue SrcOp = Op.getOperand(i);
3266       if (SrcOp.isUndef()) {
3267         KnownUndef.setBit(i);
3268       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3269                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
3270         KnownZero.setBit(i);
3271       }
3272     }
3273     break;
3274   }
3275   case ISD::CONCAT_VECTORS: {
3276     EVT SubVT = Op.getOperand(0).getValueType();
3277     unsigned NumSubVecs = Op.getNumOperands();
3278     unsigned NumSubElts = SubVT.getVectorNumElements();
3279     for (unsigned i = 0; i != NumSubVecs; ++i) {
3280       SDValue SubOp = Op.getOperand(i);
3281       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3282       APInt SubUndef, SubZero;
3283       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3284                                      Depth + 1))
3285         return true;
3286       KnownUndef.insertBits(SubUndef, i * NumSubElts);
3287       KnownZero.insertBits(SubZero, i * NumSubElts);
3288     }
3289 
3290     // Attempt to avoid multi-use ops if we don't need anything from them.
3291     if (!DemandedElts.isAllOnes()) {
3292       bool FoundNewSub = false;
3293       SmallVector<SDValue, 2> DemandedSubOps;
3294       for (unsigned i = 0; i != NumSubVecs; ++i) {
3295         SDValue SubOp = Op.getOperand(i);
3296         APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3297         SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3298             SubOp, SubElts, TLO.DAG, Depth + 1);
3299         DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3300         FoundNewSub = NewSubOp ? true : FoundNewSub;
3301       }
3302       if (FoundNewSub) {
3303         SDValue NewOp =
3304             TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3305         return TLO.CombineTo(Op, NewOp);
3306       }
3307     }
3308     break;
3309   }
3310   case ISD::INSERT_SUBVECTOR: {
3311     // Demand any elements from the subvector and the remainder from the src its
3312     // inserted into.
3313     SDValue Src = Op.getOperand(0);
3314     SDValue Sub = Op.getOperand(1);
3315     uint64_t Idx = Op.getConstantOperandVal(2);
3316     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3317     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3318     APInt DemandedSrcElts = DemandedElts;
3319     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3320 
3321     APInt SubUndef, SubZero;
3322     if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3323                                    Depth + 1))
3324       return true;
3325 
3326     // If none of the src operand elements are demanded, replace it with undef.
3327     if (!DemandedSrcElts && !Src.isUndef())
3328       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3329                                                TLO.DAG.getUNDEF(VT), Sub,
3330                                                Op.getOperand(2)));
3331 
3332     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3333                                    TLO, Depth + 1))
3334       return true;
3335     KnownUndef.insertBits(SubUndef, Idx);
3336     KnownZero.insertBits(SubZero, Idx);
3337 
3338     // Attempt to avoid multi-use ops if we don't need anything from them.
3339     if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3340       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3341           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3342       SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3343           Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3344       if (NewSrc || NewSub) {
3345         NewSrc = NewSrc ? NewSrc : Src;
3346         NewSub = NewSub ? NewSub : Sub;
3347         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3348                                         NewSub, Op.getOperand(2));
3349         return TLO.CombineTo(Op, NewOp);
3350       }
3351     }
3352     break;
3353   }
3354   case ISD::EXTRACT_SUBVECTOR: {
3355     // Offset the demanded elts by the subvector index.
3356     SDValue Src = Op.getOperand(0);
3357     if (Src.getValueType().isScalableVector())
3358       break;
3359     uint64_t Idx = Op.getConstantOperandVal(1);
3360     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3361     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3362 
3363     APInt SrcUndef, SrcZero;
3364     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3365                                    Depth + 1))
3366       return true;
3367     KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3368     KnownZero = SrcZero.extractBits(NumElts, Idx);
3369 
3370     // Attempt to avoid multi-use ops if we don't need anything from them.
3371     if (!DemandedElts.isAllOnes()) {
3372       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3373           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3374       if (NewSrc) {
3375         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3376                                         Op.getOperand(1));
3377         return TLO.CombineTo(Op, NewOp);
3378       }
3379     }
3380     break;
3381   }
3382   case ISD::INSERT_VECTOR_ELT: {
3383     SDValue Vec = Op.getOperand(0);
3384     SDValue Scl = Op.getOperand(1);
3385     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3386 
3387     // For a legal, constant insertion index, if we don't need this insertion
3388     // then strip it, else remove it from the demanded elts.
3389     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3390       unsigned Idx = CIdx->getZExtValue();
3391       if (!DemandedElts[Idx])
3392         return TLO.CombineTo(Op, Vec);
3393 
3394       APInt DemandedVecElts(DemandedElts);
3395       DemandedVecElts.clearBit(Idx);
3396       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3397                                      KnownZero, TLO, Depth + 1))
3398         return true;
3399 
3400       KnownUndef.setBitVal(Idx, Scl.isUndef());
3401 
3402       KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3403       break;
3404     }
3405 
3406     APInt VecUndef, VecZero;
3407     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3408                                    Depth + 1))
3409       return true;
3410     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3411     break;
3412   }
3413   case ISD::VSELECT: {
3414     SDValue Sel = Op.getOperand(0);
3415     SDValue LHS = Op.getOperand(1);
3416     SDValue RHS = Op.getOperand(2);
3417 
3418     // Try to transform the select condition based on the current demanded
3419     // elements.
3420     APInt UndefSel, ZeroSel;
3421     if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3422                                    Depth + 1))
3423       return true;
3424 
3425     // See if we can simplify either vselect operand.
3426     APInt DemandedLHS(DemandedElts);
3427     APInt DemandedRHS(DemandedElts);
3428     APInt UndefLHS, ZeroLHS;
3429     APInt UndefRHS, ZeroRHS;
3430     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3431                                    Depth + 1))
3432       return true;
3433     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3434                                    Depth + 1))
3435       return true;
3436 
3437     KnownUndef = UndefLHS & UndefRHS;
3438     KnownZero = ZeroLHS & ZeroRHS;
3439 
3440     // If we know that the selected element is always zero, we don't need the
3441     // select value element.
3442     APInt DemandedSel = DemandedElts & ~KnownZero;
3443     if (DemandedSel != DemandedElts)
3444       if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3445                                      Depth + 1))
3446         return true;
3447 
3448     break;
3449   }
3450   case ISD::VECTOR_SHUFFLE: {
3451     SDValue LHS = Op.getOperand(0);
3452     SDValue RHS = Op.getOperand(1);
3453     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3454 
3455     // Collect demanded elements from shuffle operands..
3456     APInt DemandedLHS(NumElts, 0);
3457     APInt DemandedRHS(NumElts, 0);
3458     for (unsigned i = 0; i != NumElts; ++i) {
3459       int M = ShuffleMask[i];
3460       if (M < 0 || !DemandedElts[i])
3461         continue;
3462       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3463       if (M < (int)NumElts)
3464         DemandedLHS.setBit(M);
3465       else
3466         DemandedRHS.setBit(M - NumElts);
3467     }
3468 
3469     // See if we can simplify either shuffle operand.
3470     APInt UndefLHS, ZeroLHS;
3471     APInt UndefRHS, ZeroRHS;
3472     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3473                                    Depth + 1))
3474       return true;
3475     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3476                                    Depth + 1))
3477       return true;
3478 
3479     // Simplify mask using undef elements from LHS/RHS.
3480     bool Updated = false;
3481     bool IdentityLHS = true, IdentityRHS = true;
3482     SmallVector<int, 32> NewMask(ShuffleMask);
3483     for (unsigned i = 0; i != NumElts; ++i) {
3484       int &M = NewMask[i];
3485       if (M < 0)
3486         continue;
3487       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3488           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3489         Updated = true;
3490         M = -1;
3491       }
3492       IdentityLHS &= (M < 0) || (M == (int)i);
3493       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3494     }
3495 
3496     // Update legal shuffle masks based on demanded elements if it won't reduce
3497     // to Identity which can cause premature removal of the shuffle mask.
3498     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3499       SDValue LegalShuffle =
3500           buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3501       if (LegalShuffle)
3502         return TLO.CombineTo(Op, LegalShuffle);
3503     }
3504 
3505     // Propagate undef/zero elements from LHS/RHS.
3506     for (unsigned i = 0; i != NumElts; ++i) {
3507       int M = ShuffleMask[i];
3508       if (M < 0) {
3509         KnownUndef.setBit(i);
3510       } else if (M < (int)NumElts) {
3511         if (UndefLHS[M])
3512           KnownUndef.setBit(i);
3513         if (ZeroLHS[M])
3514           KnownZero.setBit(i);
3515       } else {
3516         if (UndefRHS[M - NumElts])
3517           KnownUndef.setBit(i);
3518         if (ZeroRHS[M - NumElts])
3519           KnownZero.setBit(i);
3520       }
3521     }
3522     break;
3523   }
3524   case ISD::ANY_EXTEND_VECTOR_INREG:
3525   case ISD::SIGN_EXTEND_VECTOR_INREG:
3526   case ISD::ZERO_EXTEND_VECTOR_INREG: {
3527     APInt SrcUndef, SrcZero;
3528     SDValue Src = Op.getOperand(0);
3529     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3530     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3531     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3532                                    Depth + 1))
3533       return true;
3534     KnownZero = SrcZero.zextOrTrunc(NumElts);
3535     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3536 
3537     if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3538         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3539         DemandedSrcElts == 1) {
3540       // aext - if we just need the bottom element then we can bitcast.
3541       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3542     }
3543 
3544     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3545       // zext(undef) upper bits are guaranteed to be zero.
3546       if (DemandedElts.isSubsetOf(KnownUndef))
3547         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3548       KnownUndef.clearAllBits();
3549 
3550       // zext - if we just need the bottom element then we can mask:
3551       // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3552       if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3553           Op->isOnlyUserOf(Src.getNode()) &&
3554           Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3555         SDLoc DL(Op);
3556         EVT SrcVT = Src.getValueType();
3557         EVT SrcSVT = SrcVT.getScalarType();
3558         SmallVector<SDValue> MaskElts;
3559         MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3560         MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3561         SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3562         if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3563                 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3564           Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3565           return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3566         }
3567       }
3568     }
3569     break;
3570   }
3571 
3572   // TODO: There are more binop opcodes that could be handled here - MIN,
3573   // MAX, saturated math, etc.
3574   case ISD::ADD: {
3575     SDValue Op0 = Op.getOperand(0);
3576     SDValue Op1 = Op.getOperand(1);
3577     if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3578       APInt UndefLHS, ZeroLHS;
3579       if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3580                                      Depth + 1, /*AssumeSingleUse*/ true))
3581         return true;
3582     }
3583     [[fallthrough]];
3584   }
3585   case ISD::AVGCEILS:
3586   case ISD::AVGCEILU:
3587   case ISD::AVGFLOORS:
3588   case ISD::AVGFLOORU:
3589   case ISD::OR:
3590   case ISD::XOR:
3591   case ISD::SUB:
3592   case ISD::FADD:
3593   case ISD::FSUB:
3594   case ISD::FMUL:
3595   case ISD::FDIV:
3596   case ISD::FREM: {
3597     SDValue Op0 = Op.getOperand(0);
3598     SDValue Op1 = Op.getOperand(1);
3599 
3600     APInt UndefRHS, ZeroRHS;
3601     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3602                                    Depth + 1))
3603       return true;
3604     APInt UndefLHS, ZeroLHS;
3605     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3606                                    Depth + 1))
3607       return true;
3608 
3609     KnownZero = ZeroLHS & ZeroRHS;
3610     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3611 
3612     // Attempt to avoid multi-use ops if we don't need anything from them.
3613     // TODO - use KnownUndef to relax the demandedelts?
3614     if (!DemandedElts.isAllOnes())
3615       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3616         return true;
3617     break;
3618   }
3619   case ISD::SHL:
3620   case ISD::SRL:
3621   case ISD::SRA:
3622   case ISD::ROTL:
3623   case ISD::ROTR: {
3624     SDValue Op0 = Op.getOperand(0);
3625     SDValue Op1 = Op.getOperand(1);
3626 
3627     APInt UndefRHS, ZeroRHS;
3628     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3629                                    Depth + 1))
3630       return true;
3631     APInt UndefLHS, ZeroLHS;
3632     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3633                                    Depth + 1))
3634       return true;
3635 
3636     KnownZero = ZeroLHS;
3637     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3638 
3639     // Attempt to avoid multi-use ops if we don't need anything from them.
3640     // TODO - use KnownUndef to relax the demandedelts?
3641     if (!DemandedElts.isAllOnes())
3642       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3643         return true;
3644     break;
3645   }
3646   case ISD::MUL:
3647   case ISD::MULHU:
3648   case ISD::MULHS:
3649   case ISD::AND: {
3650     SDValue Op0 = Op.getOperand(0);
3651     SDValue Op1 = Op.getOperand(1);
3652 
3653     APInt SrcUndef, SrcZero;
3654     if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3655                                    Depth + 1))
3656       return true;
3657     // If we know that a demanded element was zero in Op1 we don't need to
3658     // demand it in Op0 - its guaranteed to be zero.
3659     APInt DemandedElts0 = DemandedElts & ~SrcZero;
3660     if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3661                                    TLO, Depth + 1))
3662       return true;
3663 
3664     KnownUndef &= DemandedElts0;
3665     KnownZero &= DemandedElts0;
3666 
3667     // If every element pair has a zero/undef then just fold to zero.
3668     // fold (and x, undef) -> 0  /  (and x, 0) -> 0
3669     // fold (mul x, undef) -> 0  /  (mul x, 0) -> 0
3670     if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3671       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3672 
3673     // If either side has a zero element, then the result element is zero, even
3674     // if the other is an UNDEF.
3675     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3676     // and then handle 'and' nodes with the rest of the binop opcodes.
3677     KnownZero |= SrcZero;
3678     KnownUndef &= SrcUndef;
3679     KnownUndef &= ~KnownZero;
3680 
3681     // Attempt to avoid multi-use ops if we don't need anything from them.
3682     if (!DemandedElts.isAllOnes())
3683       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3684         return true;
3685     break;
3686   }
3687   case ISD::TRUNCATE:
3688   case ISD::SIGN_EXTEND:
3689   case ISD::ZERO_EXTEND:
3690     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3691                                    KnownZero, TLO, Depth + 1))
3692       return true;
3693 
3694     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3695       // zext(undef) upper bits are guaranteed to be zero.
3696       if (DemandedElts.isSubsetOf(KnownUndef))
3697         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3698       KnownUndef.clearAllBits();
3699     }
3700     break;
3701   default: {
3702     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3703       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3704                                                   KnownZero, TLO, Depth))
3705         return true;
3706     } else {
3707       KnownBits Known;
3708       APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3709       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3710                                TLO, Depth, AssumeSingleUse))
3711         return true;
3712     }
3713     break;
3714   }
3715   }
3716   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3717 
3718   // Constant fold all undef cases.
3719   // TODO: Handle zero cases as well.
3720   if (DemandedElts.isSubsetOf(KnownUndef))
3721     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3722 
3723   return false;
3724 }
3725 
3726 /// Determine which of the bits specified in Mask are known to be either zero or
3727 /// one and return them in the Known.
3728 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3729                                                    KnownBits &Known,
3730                                                    const APInt &DemandedElts,
3731                                                    const SelectionDAG &DAG,
3732                                                    unsigned Depth) const {
3733   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3734           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3735           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3736           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3737          "Should use MaskedValueIsZero if you don't know whether Op"
3738          " is a target node!");
3739   Known.resetAll();
3740 }
3741 
3742 void TargetLowering::computeKnownBitsForTargetInstr(
3743     GISelKnownBits &Analysis, Register R, KnownBits &Known,
3744     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3745     unsigned Depth) const {
3746   Known.resetAll();
3747 }
3748 
3749 void TargetLowering::computeKnownBitsForFrameIndex(
3750   const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3751   // The low bits are known zero if the pointer is aligned.
3752   Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3753 }
3754 
3755 Align TargetLowering::computeKnownAlignForTargetInstr(
3756   GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3757   unsigned Depth) const {
3758   return Align(1);
3759 }
3760 
3761 /// This method can be implemented by targets that want to expose additional
3762 /// information about sign bits to the DAG Combiner.
3763 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3764                                                          const APInt &,
3765                                                          const SelectionDAG &,
3766                                                          unsigned Depth) const {
3767   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3768           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3769           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3770           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3771          "Should use ComputeNumSignBits if you don't know whether Op"
3772          " is a target node!");
3773   return 1;
3774 }
3775 
3776 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3777   GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3778   const MachineRegisterInfo &MRI, unsigned Depth) const {
3779   return 1;
3780 }
3781 
3782 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3783     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3784     TargetLoweringOpt &TLO, unsigned Depth) const {
3785   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3786           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3787           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3788           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3789          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3790          " is a target node!");
3791   return false;
3792 }
3793 
3794 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3795     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3796     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3797   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3798           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3799           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3800           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3801          "Should use SimplifyDemandedBits if you don't know whether Op"
3802          " is a target node!");
3803   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3804   return false;
3805 }
3806 
3807 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3808     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3809     SelectionDAG &DAG, unsigned Depth) const {
3810   assert(
3811       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3812        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3813        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3814        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3815       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3816       " is a target node!");
3817   return SDValue();
3818 }
3819 
3820 SDValue
3821 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3822                                         SDValue N1, MutableArrayRef<int> Mask,
3823                                         SelectionDAG &DAG) const {
3824   bool LegalMask = isShuffleMaskLegal(Mask, VT);
3825   if (!LegalMask) {
3826     std::swap(N0, N1);
3827     ShuffleVectorSDNode::commuteMask(Mask);
3828     LegalMask = isShuffleMaskLegal(Mask, VT);
3829   }
3830 
3831   if (!LegalMask)
3832     return SDValue();
3833 
3834   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3835 }
3836 
3837 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3838   return nullptr;
3839 }
3840 
3841 bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3842     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3843     bool PoisonOnly, unsigned Depth) const {
3844   assert(
3845       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3846        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3847        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3848        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3849       "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3850       " is a target node!");
3851 
3852   // If Op can't create undef/poison and none of its operands are undef/poison
3853   // then Op is never undef/poison.
3854   return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3855                                               /*ConsiderFlags*/ true, Depth) &&
3856          all_of(Op->ops(), [&](SDValue V) {
3857            return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3858                                                        Depth + 1);
3859          });
3860 }
3861 
3862 bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3863     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3864     bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3865   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3866           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3867           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3868           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3869          "Should use canCreateUndefOrPoison if you don't know whether Op"
3870          " is a target node!");
3871   // Be conservative and return true.
3872   return true;
3873 }
3874 
3875 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3876                                                   const SelectionDAG &DAG,
3877                                                   bool SNaN,
3878                                                   unsigned Depth) const {
3879   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3880           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3881           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3882           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3883          "Should use isKnownNeverNaN if you don't know whether Op"
3884          " is a target node!");
3885   return false;
3886 }
3887 
3888 bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3889                                                const APInt &DemandedElts,
3890                                                APInt &UndefElts,
3891                                                const SelectionDAG &DAG,
3892                                                unsigned Depth) const {
3893   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3894           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3895           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3896           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3897          "Should use isSplatValue if you don't know whether Op"
3898          " is a target node!");
3899   return false;
3900 }
3901 
3902 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3903 // work with truncating build vectors and vectors with elements of less than
3904 // 8 bits.
3905 bool TargetLowering::isConstTrueVal(SDValue N) const {
3906   if (!N)
3907     return false;
3908 
3909   unsigned EltWidth;
3910   APInt CVal;
3911   if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3912                                                /*AllowTruncation=*/true)) {
3913     CVal = CN->getAPIntValue();
3914     EltWidth = N.getValueType().getScalarSizeInBits();
3915   } else
3916     return false;
3917 
3918   // If this is a truncating splat, truncate the splat value.
3919   // Otherwise, we may fail to match the expected values below.
3920   if (EltWidth < CVal.getBitWidth())
3921     CVal = CVal.trunc(EltWidth);
3922 
3923   switch (getBooleanContents(N.getValueType())) {
3924   case UndefinedBooleanContent:
3925     return CVal[0];
3926   case ZeroOrOneBooleanContent:
3927     return CVal.isOne();
3928   case ZeroOrNegativeOneBooleanContent:
3929     return CVal.isAllOnes();
3930   }
3931 
3932   llvm_unreachable("Invalid boolean contents");
3933 }
3934 
3935 bool TargetLowering::isConstFalseVal(SDValue N) const {
3936   if (!N)
3937     return false;
3938 
3939   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3940   if (!CN) {
3941     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3942     if (!BV)
3943       return false;
3944 
3945     // Only interested in constant splats, we don't care about undef
3946     // elements in identifying boolean constants and getConstantSplatNode
3947     // returns NULL if all ops are undef;
3948     CN = BV->getConstantSplatNode();
3949     if (!CN)
3950       return false;
3951   }
3952 
3953   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3954     return !CN->getAPIntValue()[0];
3955 
3956   return CN->isZero();
3957 }
3958 
3959 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3960                                        bool SExt) const {
3961   if (VT == MVT::i1)
3962     return N->isOne();
3963 
3964   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3965   switch (Cnt) {
3966   case TargetLowering::ZeroOrOneBooleanContent:
3967     // An extended value of 1 is always true, unless its original type is i1,
3968     // in which case it will be sign extended to -1.
3969     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3970   case TargetLowering::UndefinedBooleanContent:
3971   case TargetLowering::ZeroOrNegativeOneBooleanContent:
3972     return N->isAllOnes() && SExt;
3973   }
3974   llvm_unreachable("Unexpected enumeration.");
3975 }
3976 
3977 /// This helper function of SimplifySetCC tries to optimize the comparison when
3978 /// either operand of the SetCC node is a bitwise-and instruction.
3979 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3980                                          ISD::CondCode Cond, const SDLoc &DL,
3981                                          DAGCombinerInfo &DCI) const {
3982   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3983     std::swap(N0, N1);
3984 
3985   SelectionDAG &DAG = DCI.DAG;
3986   EVT OpVT = N0.getValueType();
3987   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3988       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3989     return SDValue();
3990 
3991   // (X & Y) != 0 --> zextOrTrunc(X & Y)
3992   // iff everything but LSB is known zero:
3993   if (Cond == ISD::SETNE && isNullConstant(N1) &&
3994       (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
3995        getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3996     unsigned NumEltBits = OpVT.getScalarSizeInBits();
3997     APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3998     if (DAG.MaskedValueIsZero(N0, UpperBits))
3999       return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4000   }
4001 
4002   // Try to eliminate a power-of-2 mask constant by converting to a signbit
4003   // test in a narrow type that we can truncate to with no cost. Examples:
4004   // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4005   // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4006   // TODO: This conservatively checks for type legality on the source and
4007   //       destination types. That may inhibit optimizations, but it also
4008   //       allows setcc->shift transforms that may be more beneficial.
4009   auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4010   if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4011       isTypeLegal(OpVT) && N0.hasOneUse()) {
4012     EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4013                                      AndC->getAPIntValue().getActiveBits());
4014     if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4015       SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4016       SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4017       return DAG.getSetCC(DL, VT, Trunc, Zero,
4018                           Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4019     }
4020   }
4021 
4022   // Match these patterns in any of their permutations:
4023   // (X & Y) == Y
4024   // (X & Y) != Y
4025   SDValue X, Y;
4026   if (N0.getOperand(0) == N1) {
4027     X = N0.getOperand(1);
4028     Y = N0.getOperand(0);
4029   } else if (N0.getOperand(1) == N1) {
4030     X = N0.getOperand(0);
4031     Y = N0.getOperand(1);
4032   } else {
4033     return SDValue();
4034   }
4035 
4036   // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4037   // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4038   // its liable to create and infinite loop.
4039   SDValue Zero = DAG.getConstant(0, DL, OpVT);
4040   if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4041       DAG.isKnownToBeAPowerOfTwo(Y)) {
4042     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4043     // Note that where Y is variable and is known to have at most one bit set
4044     // (for example, if it is Z & 1) we cannot do this; the expressions are not
4045     // equivalent when Y == 0.
4046     assert(OpVT.isInteger());
4047     Cond = ISD::getSetCCInverse(Cond, OpVT);
4048     if (DCI.isBeforeLegalizeOps() ||
4049         isCondCodeLegal(Cond, N0.getSimpleValueType()))
4050       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4051   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4052     // If the target supports an 'and-not' or 'and-complement' logic operation,
4053     // try to use that to make a comparison operation more efficient.
4054     // But don't do this transform if the mask is a single bit because there are
4055     // more efficient ways to deal with that case (for example, 'bt' on x86 or
4056     // 'rlwinm' on PPC).
4057 
4058     // Bail out if the compare operand that we want to turn into a zero is
4059     // already a zero (otherwise, infinite loop).
4060     if (isNullConstant(Y))
4061       return SDValue();
4062 
4063     // Transform this into: ~X & Y == 0.
4064     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4065     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4066     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4067   }
4068 
4069   return SDValue();
4070 }
4071 
4072 /// There are multiple IR patterns that could be checking whether certain
4073 /// truncation of a signed number would be lossy or not. The pattern which is
4074 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
4075 /// We are looking for the following pattern: (KeptBits is a constant)
4076 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4077 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4078 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
4079 /// We will unfold it into the natural trunc+sext pattern:
4080 ///   ((%x << C) a>> C) dstcond %x
4081 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
4082 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4083     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4084     const SDLoc &DL) const {
4085   // We must be comparing with a constant.
4086   ConstantSDNode *C1;
4087   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4088     return SDValue();
4089 
4090   // N0 should be:  add %x, (1 << (KeptBits-1))
4091   if (N0->getOpcode() != ISD::ADD)
4092     return SDValue();
4093 
4094   // And we must be 'add'ing a constant.
4095   ConstantSDNode *C01;
4096   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4097     return SDValue();
4098 
4099   SDValue X = N0->getOperand(0);
4100   EVT XVT = X.getValueType();
4101 
4102   // Validate constants ...
4103 
4104   APInt I1 = C1->getAPIntValue();
4105 
4106   ISD::CondCode NewCond;
4107   if (Cond == ISD::CondCode::SETULT) {
4108     NewCond = ISD::CondCode::SETEQ;
4109   } else if (Cond == ISD::CondCode::SETULE) {
4110     NewCond = ISD::CondCode::SETEQ;
4111     // But need to 'canonicalize' the constant.
4112     I1 += 1;
4113   } else if (Cond == ISD::CondCode::SETUGT) {
4114     NewCond = ISD::CondCode::SETNE;
4115     // But need to 'canonicalize' the constant.
4116     I1 += 1;
4117   } else if (Cond == ISD::CondCode::SETUGE) {
4118     NewCond = ISD::CondCode::SETNE;
4119   } else
4120     return SDValue();
4121 
4122   APInt I01 = C01->getAPIntValue();
4123 
4124   auto checkConstants = [&I1, &I01]() -> bool {
4125     // Both of them must be power-of-two, and the constant from setcc is bigger.
4126     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4127   };
4128 
4129   if (checkConstants()) {
4130     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
4131   } else {
4132     // What if we invert constants? (and the target predicate)
4133     I1.negate();
4134     I01.negate();
4135     assert(XVT.isInteger());
4136     NewCond = getSetCCInverse(NewCond, XVT);
4137     if (!checkConstants())
4138       return SDValue();
4139     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
4140   }
4141 
4142   // They are power-of-two, so which bit is set?
4143   const unsigned KeptBits = I1.logBase2();
4144   const unsigned KeptBitsMinusOne = I01.logBase2();
4145 
4146   // Magic!
4147   if (KeptBits != (KeptBitsMinusOne + 1))
4148     return SDValue();
4149   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4150 
4151   // We don't want to do this in every single case.
4152   SelectionDAG &DAG = DCI.DAG;
4153   if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4154     return SDValue();
4155 
4156   // Unfold into:  sext_inreg(%x) cond %x
4157   // Where 'cond' will be either 'eq' or 'ne'.
4158   SDValue SExtInReg = DAG.getNode(
4159       ISD::SIGN_EXTEND_INREG, DL, XVT, X,
4160       DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4161   return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4162 }
4163 
4164 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4165 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4166     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4167     DAGCombinerInfo &DCI, const SDLoc &DL) const {
4168   assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4169          "Should be a comparison with 0.");
4170   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4171          "Valid only for [in]equality comparisons.");
4172 
4173   unsigned NewShiftOpcode;
4174   SDValue X, C, Y;
4175 
4176   SelectionDAG &DAG = DCI.DAG;
4177 
4178   // Look for '(C l>>/<< Y)'.
4179   auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4180     // The shift should be one-use.
4181     if (!V.hasOneUse())
4182       return false;
4183     unsigned OldShiftOpcode = V.getOpcode();
4184     switch (OldShiftOpcode) {
4185     case ISD::SHL:
4186       NewShiftOpcode = ISD::SRL;
4187       break;
4188     case ISD::SRL:
4189       NewShiftOpcode = ISD::SHL;
4190       break;
4191     default:
4192       return false; // must be a logical shift.
4193     }
4194     // We should be shifting a constant.
4195     // FIXME: best to use isConstantOrConstantVector().
4196     C = V.getOperand(0);
4197     ConstantSDNode *CC =
4198         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4199     if (!CC)
4200       return false;
4201     Y = V.getOperand(1);
4202 
4203     ConstantSDNode *XC =
4204         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4205     return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4206         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4207   };
4208 
4209   // LHS of comparison should be an one-use 'and'.
4210   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4211     return SDValue();
4212 
4213   X = N0.getOperand(0);
4214   SDValue Mask = N0.getOperand(1);
4215 
4216   // 'and' is commutative!
4217   if (!Match(Mask)) {
4218     std::swap(X, Mask);
4219     if (!Match(Mask))
4220       return SDValue();
4221   }
4222 
4223   EVT VT = X.getValueType();
4224 
4225   // Produce:
4226   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4227   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4228   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4229   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4230   return T2;
4231 }
4232 
4233 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4234 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4235 /// handle the commuted versions of these patterns.
4236 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4237                                            ISD::CondCode Cond, const SDLoc &DL,
4238                                            DAGCombinerInfo &DCI) const {
4239   unsigned BOpcode = N0.getOpcode();
4240   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4241          "Unexpected binop");
4242   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4243 
4244   // (X + Y) == X --> Y == 0
4245   // (X - Y) == X --> Y == 0
4246   // (X ^ Y) == X --> Y == 0
4247   SelectionDAG &DAG = DCI.DAG;
4248   EVT OpVT = N0.getValueType();
4249   SDValue X = N0.getOperand(0);
4250   SDValue Y = N0.getOperand(1);
4251   if (X == N1)
4252     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4253 
4254   if (Y != N1)
4255     return SDValue();
4256 
4257   // (X + Y) == Y --> X == 0
4258   // (X ^ Y) == Y --> X == 0
4259   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4260     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4261 
4262   // The shift would not be valid if the operands are boolean (i1).
4263   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4264     return SDValue();
4265 
4266   // (X - Y) == Y --> X == Y << 1
4267   SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4268   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4269   if (!DCI.isCalledByLegalizer())
4270     DCI.AddToWorklist(YShl1.getNode());
4271   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4272 }
4273 
4274 static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4275                                       SDValue N0, const APInt &C1,
4276                                       ISD::CondCode Cond, const SDLoc &dl,
4277                                       SelectionDAG &DAG) {
4278   // Look through truncs that don't change the value of a ctpop.
4279   // FIXME: Add vector support? Need to be careful with setcc result type below.
4280   SDValue CTPOP = N0;
4281   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4282       N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
4283     CTPOP = N0.getOperand(0);
4284 
4285   if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4286     return SDValue();
4287 
4288   EVT CTVT = CTPOP.getValueType();
4289   SDValue CTOp = CTPOP.getOperand(0);
4290 
4291   // Expand a power-of-2-or-zero comparison based on ctpop:
4292   // (ctpop x) u< 2 -> (x & x-1) == 0
4293   // (ctpop x) u> 1 -> (x & x-1) != 0
4294   if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4295     // Keep the CTPOP if it is a cheap vector op.
4296     if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4297       return SDValue();
4298 
4299     unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4300     if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4301       return SDValue();
4302     if (C1 == 0 && (Cond == ISD::SETULT))
4303       return SDValue(); // This is handled elsewhere.
4304 
4305     unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4306 
4307     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4308     SDValue Result = CTOp;
4309     for (unsigned i = 0; i < Passes; i++) {
4310       SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4311       Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4312     }
4313     ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4314     return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4315   }
4316 
4317   // Expand a power-of-2 comparison based on ctpop
4318   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4319     // Keep the CTPOP if it is cheap.
4320     if (TLI.isCtpopFast(CTVT))
4321       return SDValue();
4322 
4323     SDValue Zero = DAG.getConstant(0, dl, CTVT);
4324     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4325     assert(CTVT.isInteger());
4326     SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4327 
4328     // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4329     // check before emitting a potentially unnecessary op.
4330     if (DAG.isKnownNeverZero(CTOp)) {
4331       // (ctpop x) == 1 --> (x & x-1) == 0
4332       // (ctpop x) != 1 --> (x & x-1) != 0
4333       SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4334       SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4335       return RHS;
4336     }
4337 
4338     // (ctpop x) == 1 --> (x ^ x-1) >  x-1
4339     // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4340     SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4341     ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4342     return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4343   }
4344 
4345   return SDValue();
4346 }
4347 
4348 static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4349                                    ISD::CondCode Cond, const SDLoc &dl,
4350                                    SelectionDAG &DAG) {
4351   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4352     return SDValue();
4353 
4354   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4355   if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4356     return SDValue();
4357 
4358   auto getRotateSource = [](SDValue X) {
4359     if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4360       return X.getOperand(0);
4361     return SDValue();
4362   };
4363 
4364   // Peek through a rotated value compared against 0 or -1:
4365   // (rot X, Y) == 0/-1 --> X == 0/-1
4366   // (rot X, Y) != 0/-1 --> X != 0/-1
4367   if (SDValue R = getRotateSource(N0))
4368     return DAG.getSetCC(dl, VT, R, N1, Cond);
4369 
4370   // Peek through an 'or' of a rotated value compared against 0:
4371   // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4372   // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4373   //
4374   // TODO: Add the 'and' with -1 sibling.
4375   // TODO: Recurse through a series of 'or' ops to find the rotate.
4376   EVT OpVT = N0.getValueType();
4377   if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4378     if (SDValue R = getRotateSource(N0.getOperand(0))) {
4379       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4380       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4381     }
4382     if (SDValue R = getRotateSource(N0.getOperand(1))) {
4383       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4384       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4385     }
4386   }
4387 
4388   return SDValue();
4389 }
4390 
4391 static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4392                                         ISD::CondCode Cond, const SDLoc &dl,
4393                                         SelectionDAG &DAG) {
4394   // If we are testing for all-bits-clear, we might be able to do that with
4395   // less shifting since bit-order does not matter.
4396   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4397     return SDValue();
4398 
4399   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4400   if (!C1 || !C1->isZero())
4401     return SDValue();
4402 
4403   if (!N0.hasOneUse() ||
4404       (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4405     return SDValue();
4406 
4407   unsigned BitWidth = N0.getScalarValueSizeInBits();
4408   auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4409   if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4410     return SDValue();
4411 
4412   // Canonicalize fshr as fshl to reduce pattern-matching.
4413   unsigned ShAmt = ShAmtC->getZExtValue();
4414   if (N0.getOpcode() == ISD::FSHR)
4415     ShAmt = BitWidth - ShAmt;
4416 
4417   // Match an 'or' with a specific operand 'Other' in either commuted variant.
4418   SDValue X, Y;
4419   auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4420     if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4421       return false;
4422     if (Or.getOperand(0) == Other) {
4423       X = Or.getOperand(0);
4424       Y = Or.getOperand(1);
4425       return true;
4426     }
4427     if (Or.getOperand(1) == Other) {
4428       X = Or.getOperand(1);
4429       Y = Or.getOperand(0);
4430       return true;
4431     }
4432     return false;
4433   };
4434 
4435   EVT OpVT = N0.getValueType();
4436   EVT ShAmtVT = N0.getOperand(2).getValueType();
4437   SDValue F0 = N0.getOperand(0);
4438   SDValue F1 = N0.getOperand(1);
4439   if (matchOr(F0, F1)) {
4440     // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4441     SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4442     SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4443     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4444     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4445   }
4446   if (matchOr(F1, F0)) {
4447     // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4448     SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4449     SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4450     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4451     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4452   }
4453 
4454   return SDValue();
4455 }
4456 
4457 /// Try to simplify a setcc built with the specified operands and cc. If it is
4458 /// unable to simplify it, return a null SDValue.
4459 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4460                                       ISD::CondCode Cond, bool foldBooleans,
4461                                       DAGCombinerInfo &DCI,
4462                                       const SDLoc &dl) const {
4463   SelectionDAG &DAG = DCI.DAG;
4464   const DataLayout &Layout = DAG.getDataLayout();
4465   EVT OpVT = N0.getValueType();
4466   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4467 
4468   // Constant fold or commute setcc.
4469   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4470     return Fold;
4471 
4472   bool N0ConstOrSplat =
4473       isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4474   bool N1ConstOrSplat =
4475       isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4476 
4477   // Canonicalize toward having the constant on the RHS.
4478   // TODO: Handle non-splat vector constants. All undef causes trouble.
4479   // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4480   // infinite loop here when we encounter one.
4481   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
4482   if (N0ConstOrSplat && !N1ConstOrSplat &&
4483       (DCI.isBeforeLegalizeOps() ||
4484        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4485     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4486 
4487   // If we have a subtract with the same 2 non-constant operands as this setcc
4488   // -- but in reverse order -- then try to commute the operands of this setcc
4489   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4490   // instruction on some targets.
4491   if (!N0ConstOrSplat && !N1ConstOrSplat &&
4492       (DCI.isBeforeLegalizeOps() ||
4493        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4494       DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4495       !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4496     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4497 
4498   if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4499     return V;
4500 
4501   if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4502     return V;
4503 
4504   if (auto *N1C = isConstOrConstSplat(N1)) {
4505     const APInt &C1 = N1C->getAPIntValue();
4506 
4507     // Optimize some CTPOP cases.
4508     if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4509       return V;
4510 
4511     // For equality to 0 of a no-wrap multiply, decompose and test each op:
4512     // X * Y == 0 --> (X == 0) || (Y == 0)
4513     // X * Y != 0 --> (X != 0) && (Y != 0)
4514     // TODO: This bails out if minsize is set, but if the target doesn't have a
4515     //       single instruction multiply for this type, it would likely be
4516     //       smaller to decompose.
4517     if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4518         N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4519         (N0->getFlags().hasNoUnsignedWrap() ||
4520          N0->getFlags().hasNoSignedWrap()) &&
4521         !Attr.hasFnAttr(Attribute::MinSize)) {
4522       SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4523       SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4524       unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4525       return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4526     }
4527 
4528     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4529     // equality comparison, then we're just comparing whether X itself is
4530     // zero.
4531     if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4532         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4533         llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4534       if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4535         if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4536             ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4537           if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4538             // (srl (ctlz x), 5) == 0  -> X != 0
4539             // (srl (ctlz x), 5) != 1  -> X != 0
4540             Cond = ISD::SETNE;
4541           } else {
4542             // (srl (ctlz x), 5) != 0  -> X == 0
4543             // (srl (ctlz x), 5) == 1  -> X == 0
4544             Cond = ISD::SETEQ;
4545           }
4546           SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4547           return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4548                               Cond);
4549         }
4550       }
4551     }
4552   }
4553 
4554   // FIXME: Support vectors.
4555   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4556     const APInt &C1 = N1C->getAPIntValue();
4557 
4558     // (zext x) == C --> x == (trunc C)
4559     // (sext x) == C --> x == (trunc C)
4560     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4561         DCI.isBeforeLegalize() && N0->hasOneUse()) {
4562       unsigned MinBits = N0.getValueSizeInBits();
4563       SDValue PreExt;
4564       bool Signed = false;
4565       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4566         // ZExt
4567         MinBits = N0->getOperand(0).getValueSizeInBits();
4568         PreExt = N0->getOperand(0);
4569       } else if (N0->getOpcode() == ISD::AND) {
4570         // DAGCombine turns costly ZExts into ANDs
4571         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4572           if ((C->getAPIntValue()+1).isPowerOf2()) {
4573             MinBits = C->getAPIntValue().countr_one();
4574             PreExt = N0->getOperand(0);
4575           }
4576       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4577         // SExt
4578         MinBits = N0->getOperand(0).getValueSizeInBits();
4579         PreExt = N0->getOperand(0);
4580         Signed = true;
4581       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4582         // ZEXTLOAD / SEXTLOAD
4583         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4584           MinBits = LN0->getMemoryVT().getSizeInBits();
4585           PreExt = N0;
4586         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4587           Signed = true;
4588           MinBits = LN0->getMemoryVT().getSizeInBits();
4589           PreExt = N0;
4590         }
4591       }
4592 
4593       // Figure out how many bits we need to preserve this constant.
4594       unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4595 
4596       // Make sure we're not losing bits from the constant.
4597       if (MinBits > 0 &&
4598           MinBits < C1.getBitWidth() &&
4599           MinBits >= ReqdBits) {
4600         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4601         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4602           // Will get folded away.
4603           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4604           if (MinBits == 1 && C1 == 1)
4605             // Invert the condition.
4606             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4607                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4608           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4609           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4610         }
4611 
4612         // If truncating the setcc operands is not desirable, we can still
4613         // simplify the expression in some cases:
4614         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4615         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4616         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4617         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4618         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4619         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4620         SDValue TopSetCC = N0->getOperand(0);
4621         unsigned N0Opc = N0->getOpcode();
4622         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4623         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4624             TopSetCC.getOpcode() == ISD::SETCC &&
4625             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4626             (isConstFalseVal(N1) ||
4627              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4628 
4629           bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4630                          (!N1C->isZero() && Cond == ISD::SETNE);
4631 
4632           if (!Inverse)
4633             return TopSetCC;
4634 
4635           ISD::CondCode InvCond = ISD::getSetCCInverse(
4636               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4637               TopSetCC.getOperand(0).getValueType());
4638           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4639                                       TopSetCC.getOperand(1),
4640                                       InvCond);
4641         }
4642       }
4643     }
4644 
4645     // If the LHS is '(and load, const)', the RHS is 0, the test is for
4646     // equality or unsigned, and all 1 bits of the const are in the same
4647     // partial word, see if we can shorten the load.
4648     if (DCI.isBeforeLegalize() &&
4649         !ISD::isSignedIntSetCC(Cond) &&
4650         N0.getOpcode() == ISD::AND && C1 == 0 &&
4651         N0.getNode()->hasOneUse() &&
4652         isa<LoadSDNode>(N0.getOperand(0)) &&
4653         N0.getOperand(0).getNode()->hasOneUse() &&
4654         isa<ConstantSDNode>(N0.getOperand(1))) {
4655       auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4656       APInt bestMask;
4657       unsigned bestWidth = 0, bestOffset = 0;
4658       if (Lod->isSimple() && Lod->isUnindexed() &&
4659           (Lod->getMemoryVT().isByteSized() ||
4660            isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4661         unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4662         unsigned origWidth = N0.getValueSizeInBits();
4663         unsigned maskWidth = origWidth;
4664         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4665         // 8 bits, but have to be careful...
4666         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4667           origWidth = Lod->getMemoryVT().getSizeInBits();
4668         const APInt &Mask = N0.getConstantOperandAPInt(1);
4669         // Only consider power-of-2 widths (and at least one byte) as candiates
4670         // for the narrowed load.
4671         for (unsigned width = 8; width < origWidth; width *= 2) {
4672           EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4673           if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4674             continue;
4675           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4676           // Avoid accessing any padding here for now (we could use memWidth
4677           // instead of origWidth here otherwise).
4678           unsigned maxOffset = origWidth - width;
4679           for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4680             if (Mask.isSubsetOf(newMask)) {
4681               unsigned ptrOffset =
4682                   Layout.isLittleEndian() ? offset : memWidth - width - offset;
4683               unsigned IsFast = 0;
4684               Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4685               if (allowsMemoryAccess(
4686                       *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4687                       NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4688                   IsFast) {
4689                 bestOffset = ptrOffset / 8;
4690                 bestMask = Mask.lshr(offset);
4691                 bestWidth = width;
4692                 break;
4693               }
4694             }
4695             newMask <<= 8;
4696           }
4697           if (bestWidth)
4698             break;
4699         }
4700       }
4701       if (bestWidth) {
4702         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4703         SDValue Ptr = Lod->getBasePtr();
4704         if (bestOffset != 0)
4705           Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4706         SDValue NewLoad =
4707             DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4708                         Lod->getPointerInfo().getWithOffset(bestOffset),
4709                         Lod->getOriginalAlign());
4710         SDValue And =
4711             DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4712                         DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4713         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4714       }
4715     }
4716 
4717     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4718     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4719       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4720 
4721       // If the comparison constant has bits in the upper part, the
4722       // zero-extended value could never match.
4723       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4724                                               C1.getBitWidth() - InSize))) {
4725         switch (Cond) {
4726         case ISD::SETUGT:
4727         case ISD::SETUGE:
4728         case ISD::SETEQ:
4729           return DAG.getConstant(0, dl, VT);
4730         case ISD::SETULT:
4731         case ISD::SETULE:
4732         case ISD::SETNE:
4733           return DAG.getConstant(1, dl, VT);
4734         case ISD::SETGT:
4735         case ISD::SETGE:
4736           // True if the sign bit of C1 is set.
4737           return DAG.getConstant(C1.isNegative(), dl, VT);
4738         case ISD::SETLT:
4739         case ISD::SETLE:
4740           // True if the sign bit of C1 isn't set.
4741           return DAG.getConstant(C1.isNonNegative(), dl, VT);
4742         default:
4743           break;
4744         }
4745       }
4746 
4747       // Otherwise, we can perform the comparison with the low bits.
4748       switch (Cond) {
4749       case ISD::SETEQ:
4750       case ISD::SETNE:
4751       case ISD::SETUGT:
4752       case ISD::SETUGE:
4753       case ISD::SETULT:
4754       case ISD::SETULE: {
4755         EVT newVT = N0.getOperand(0).getValueType();
4756         // FIXME: Should use isNarrowingProfitable.
4757         if (DCI.isBeforeLegalizeOps() ||
4758             (isOperationLegal(ISD::SETCC, newVT) &&
4759              isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4760              isTypeDesirableForOp(ISD::SETCC, newVT))) {
4761           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4762           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4763 
4764           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4765                                           NewConst, Cond);
4766           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4767         }
4768         break;
4769       }
4770       default:
4771         break; // todo, be more careful with signed comparisons
4772       }
4773     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4774                (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4775                !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4776                                       OpVT)) {
4777       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4778       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4779       EVT ExtDstTy = N0.getValueType();
4780       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4781 
4782       // If the constant doesn't fit into the number of bits for the source of
4783       // the sign extension, it is impossible for both sides to be equal.
4784       if (C1.getSignificantBits() > ExtSrcTyBits)
4785         return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4786 
4787       assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4788              ExtDstTy != ExtSrcTy && "Unexpected types!");
4789       APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4790       SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4791                                    DAG.getConstant(Imm, dl, ExtDstTy));
4792       if (!DCI.isCalledByLegalizer())
4793         DCI.AddToWorklist(ZextOp.getNode());
4794       // Otherwise, make this a use of a zext.
4795       return DAG.getSetCC(dl, VT, ZextOp,
4796                           DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4797     } else if ((N1C->isZero() || N1C->isOne()) &&
4798                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4799       // SETCC (X), [0|1], [EQ|NE]  -> X if X is known 0/1. i1 types are
4800       // excluded as they are handled below whilst checking for foldBooleans.
4801       if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4802           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4803           (N0.getValueType() == MVT::i1 ||
4804            getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4805           DAG.MaskedValueIsZero(
4806               N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {
4807         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4808         if (TrueWhenTrue)
4809           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4810         // Invert the condition.
4811         if (N0.getOpcode() == ISD::SETCC) {
4812           ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4813           CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4814           if (DCI.isBeforeLegalizeOps() ||
4815               isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4816             return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4817         }
4818       }
4819 
4820       if ((N0.getOpcode() == ISD::XOR ||
4821            (N0.getOpcode() == ISD::AND &&
4822             N0.getOperand(0).getOpcode() == ISD::XOR &&
4823             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4824           isOneConstant(N0.getOperand(1))) {
4825         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
4826         // can only do this if the top bits are known zero.
4827         unsigned BitWidth = N0.getValueSizeInBits();
4828         if (DAG.MaskedValueIsZero(N0,
4829                                   APInt::getHighBitsSet(BitWidth,
4830                                                         BitWidth-1))) {
4831           // Okay, get the un-inverted input value.
4832           SDValue Val;
4833           if (N0.getOpcode() == ISD::XOR) {
4834             Val = N0.getOperand(0);
4835           } else {
4836             assert(N0.getOpcode() == ISD::AND &&
4837                     N0.getOperand(0).getOpcode() == ISD::XOR);
4838             // ((X^1)&1)^1 -> X & 1
4839             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4840                               N0.getOperand(0).getOperand(0),
4841                               N0.getOperand(1));
4842           }
4843 
4844           return DAG.getSetCC(dl, VT, Val, N1,
4845                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4846         }
4847       } else if (N1C->isOne()) {
4848         SDValue Op0 = N0;
4849         if (Op0.getOpcode() == ISD::TRUNCATE)
4850           Op0 = Op0.getOperand(0);
4851 
4852         if ((Op0.getOpcode() == ISD::XOR) &&
4853             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4854             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4855           SDValue XorLHS = Op0.getOperand(0);
4856           SDValue XorRHS = Op0.getOperand(1);
4857           // Ensure that the input setccs return an i1 type or 0/1 value.
4858           if (Op0.getValueType() == MVT::i1 ||
4859               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4860                       ZeroOrOneBooleanContent &&
4861                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4862                         ZeroOrOneBooleanContent)) {
4863             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4864             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4865             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4866           }
4867         }
4868         if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4869           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4870           if (Op0.getValueType().bitsGT(VT))
4871             Op0 = DAG.getNode(ISD::AND, dl, VT,
4872                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4873                           DAG.getConstant(1, dl, VT));
4874           else if (Op0.getValueType().bitsLT(VT))
4875             Op0 = DAG.getNode(ISD::AND, dl, VT,
4876                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4877                         DAG.getConstant(1, dl, VT));
4878 
4879           return DAG.getSetCC(dl, VT, Op0,
4880                               DAG.getConstant(0, dl, Op0.getValueType()),
4881                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4882         }
4883         if (Op0.getOpcode() == ISD::AssertZext &&
4884             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4885           return DAG.getSetCC(dl, VT, Op0,
4886                               DAG.getConstant(0, dl, Op0.getValueType()),
4887                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4888       }
4889     }
4890 
4891     // Given:
4892     //   icmp eq/ne (urem %x, %y), 0
4893     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4894     //   icmp eq/ne %x, 0
4895     if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4896         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4897       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4898       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4899       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4900         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4901     }
4902 
4903     // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4904     //  and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4905     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4906         N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4907         N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4908         N1C->isAllOnes()) {
4909       return DAG.getSetCC(dl, VT, N0.getOperand(0),
4910                           DAG.getConstant(0, dl, OpVT),
4911                           Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4912     }
4913 
4914     if (SDValue V =
4915             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4916       return V;
4917   }
4918 
4919   // These simplifications apply to splat vectors as well.
4920   // TODO: Handle more splat vector cases.
4921   if (auto *N1C = isConstOrConstSplat(N1)) {
4922     const APInt &C1 = N1C->getAPIntValue();
4923 
4924     APInt MinVal, MaxVal;
4925     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4926     if (ISD::isSignedIntSetCC(Cond)) {
4927       MinVal = APInt::getSignedMinValue(OperandBitSize);
4928       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4929     } else {
4930       MinVal = APInt::getMinValue(OperandBitSize);
4931       MaxVal = APInt::getMaxValue(OperandBitSize);
4932     }
4933 
4934     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4935     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4936       // X >= MIN --> true
4937       if (C1 == MinVal)
4938         return DAG.getBoolConstant(true, dl, VT, OpVT);
4939 
4940       if (!VT.isVector()) { // TODO: Support this for vectors.
4941         // X >= C0 --> X > (C0 - 1)
4942         APInt C = C1 - 1;
4943         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4944         if ((DCI.isBeforeLegalizeOps() ||
4945              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4946             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4947                                   isLegalICmpImmediate(C.getSExtValue())))) {
4948           return DAG.getSetCC(dl, VT, N0,
4949                               DAG.getConstant(C, dl, N1.getValueType()),
4950                               NewCC);
4951         }
4952       }
4953     }
4954 
4955     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4956       // X <= MAX --> true
4957       if (C1 == MaxVal)
4958         return DAG.getBoolConstant(true, dl, VT, OpVT);
4959 
4960       // X <= C0 --> X < (C0 + 1)
4961       if (!VT.isVector()) { // TODO: Support this for vectors.
4962         APInt C = C1 + 1;
4963         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4964         if ((DCI.isBeforeLegalizeOps() ||
4965              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4966             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4967                                   isLegalICmpImmediate(C.getSExtValue())))) {
4968           return DAG.getSetCC(dl, VT, N0,
4969                               DAG.getConstant(C, dl, N1.getValueType()),
4970                               NewCC);
4971         }
4972       }
4973     }
4974 
4975     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4976       if (C1 == MinVal)
4977         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4978 
4979       // TODO: Support this for vectors after legalize ops.
4980       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4981         // Canonicalize setlt X, Max --> setne X, Max
4982         if (C1 == MaxVal)
4983           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4984 
4985         // If we have setult X, 1, turn it into seteq X, 0
4986         if (C1 == MinVal+1)
4987           return DAG.getSetCC(dl, VT, N0,
4988                               DAG.getConstant(MinVal, dl, N0.getValueType()),
4989                               ISD::SETEQ);
4990       }
4991     }
4992 
4993     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4994       if (C1 == MaxVal)
4995         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4996 
4997       // TODO: Support this for vectors after legalize ops.
4998       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4999         // Canonicalize setgt X, Min --> setne X, Min
5000         if (C1 == MinVal)
5001           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5002 
5003         // If we have setugt X, Max-1, turn it into seteq X, Max
5004         if (C1 == MaxVal-1)
5005           return DAG.getSetCC(dl, VT, N0,
5006                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
5007                               ISD::SETEQ);
5008       }
5009     }
5010 
5011     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5012       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
5013       if (C1.isZero())
5014         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5015                 VT, N0, N1, Cond, DCI, dl))
5016           return CC;
5017 
5018       // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5019       // For example, when high 32-bits of i64 X are known clear:
5020       // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0
5021       // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1
5022       bool CmpZero = N1C->isZero();
5023       bool CmpNegOne = N1C->isAllOnes();
5024       if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5025         // Match or(lo,shl(hi,bw/2)) pattern.
5026         auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5027           unsigned EltBits = V.getScalarValueSizeInBits();
5028           if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5029             return false;
5030           SDValue LHS = V.getOperand(0);
5031           SDValue RHS = V.getOperand(1);
5032           APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5033           // Unshifted element must have zero upperbits.
5034           if (RHS.getOpcode() == ISD::SHL &&
5035               isa<ConstantSDNode>(RHS.getOperand(1)) &&
5036               RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5037               DAG.MaskedValueIsZero(LHS, HiBits)) {
5038             Lo = LHS;
5039             Hi = RHS.getOperand(0);
5040             return true;
5041           }
5042           if (LHS.getOpcode() == ISD::SHL &&
5043               isa<ConstantSDNode>(LHS.getOperand(1)) &&
5044               LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5045               DAG.MaskedValueIsZero(RHS, HiBits)) {
5046             Lo = RHS;
5047             Hi = LHS.getOperand(0);
5048             return true;
5049           }
5050           return false;
5051         };
5052 
5053         auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5054           unsigned EltBits = N0.getScalarValueSizeInBits();
5055           unsigned HalfBits = EltBits / 2;
5056           APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5057           SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5058           SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5059           SDValue NewN0 =
5060               DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5061           SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5062           return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5063         };
5064 
5065         SDValue Lo, Hi;
5066         if (IsConcat(N0, Lo, Hi))
5067           return MergeConcat(Lo, Hi);
5068 
5069         if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5070           SDValue Lo0, Lo1, Hi0, Hi1;
5071           if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5072               IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5073             return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5074                                DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5075           }
5076         }
5077       }
5078     }
5079 
5080     // If we have "setcc X, C0", check to see if we can shrink the immediate
5081     // by changing cc.
5082     // TODO: Support this for vectors after legalize ops.
5083     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5084       // SETUGT X, SINTMAX  -> SETLT X, 0
5085       // SETUGE X, SINTMIN -> SETLT X, 0
5086       if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5087           (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5088         return DAG.getSetCC(dl, VT, N0,
5089                             DAG.getConstant(0, dl, N1.getValueType()),
5090                             ISD::SETLT);
5091 
5092       // SETULT X, SINTMIN  -> SETGT X, -1
5093       // SETULE X, SINTMAX  -> SETGT X, -1
5094       if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5095           (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5096         return DAG.getSetCC(dl, VT, N0,
5097                             DAG.getAllOnesConstant(dl, N1.getValueType()),
5098                             ISD::SETGT);
5099     }
5100   }
5101 
5102   // Back to non-vector simplifications.
5103   // TODO: Can we do these for vector splats?
5104   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5105     const APInt &C1 = N1C->getAPIntValue();
5106     EVT ShValTy = N0.getValueType();
5107 
5108     // Fold bit comparisons when we can. This will result in an
5109     // incorrect value when boolean false is negative one, unless
5110     // the bitsize is 1 in which case the false value is the same
5111     // in practice regardless of the representation.
5112     if ((VT.getSizeInBits() == 1 ||
5113          getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
5114         (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5115         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5116         N0.getOpcode() == ISD::AND) {
5117       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5118         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
5119           // Perform the xform if the AND RHS is a single bit.
5120           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5121           if (AndRHS->getAPIntValue().isPowerOf2() &&
5122               !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5123             return DAG.getNode(
5124                 ISD::TRUNCATE, dl, VT,
5125                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5126                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5127           }
5128         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5129           // (X & 8) == 8  -->  (X & 8) >> 3
5130           // Perform the xform if C1 is a single bit.
5131           unsigned ShCt = C1.logBase2();
5132           if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5133             return DAG.getNode(
5134                 ISD::TRUNCATE, dl, VT,
5135                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5136                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5137           }
5138         }
5139       }
5140     }
5141 
5142     if (C1.getSignificantBits() <= 64 &&
5143         !isLegalICmpImmediate(C1.getSExtValue())) {
5144       // (X & -256) == 256 -> (X >> 8) == 1
5145       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5146           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5147         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5148           const APInt &AndRHSC = AndRHS->getAPIntValue();
5149           if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5150             unsigned ShiftBits = AndRHSC.countr_zero();
5151             if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5152               SDValue Shift = DAG.getNode(
5153                   ISD::SRL, dl, ShValTy, N0.getOperand(0),
5154                   DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5155               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5156               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5157             }
5158           }
5159         }
5160       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5161                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5162         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5163         // X <  0x100000000 -> (X >> 32) <  1
5164         // X >= 0x100000000 -> (X >> 32) >= 1
5165         // X <= 0x0ffffffff -> (X >> 32) <  1
5166         // X >  0x0ffffffff -> (X >> 32) >= 1
5167         unsigned ShiftBits;
5168         APInt NewC = C1;
5169         ISD::CondCode NewCond = Cond;
5170         if (AdjOne) {
5171           ShiftBits = C1.countr_one();
5172           NewC = NewC + 1;
5173           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5174         } else {
5175           ShiftBits = C1.countr_zero();
5176         }
5177         NewC.lshrInPlace(ShiftBits);
5178         if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5179             isLegalICmpImmediate(NewC.getSExtValue()) &&
5180             !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5181           SDValue Shift =
5182               DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5183                           DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5184           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5185           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5186         }
5187       }
5188     }
5189   }
5190 
5191   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5192     auto *CFP = cast<ConstantFPSDNode>(N1);
5193     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5194 
5195     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
5196     // constant if knowing that the operand is non-nan is enough.  We prefer to
5197     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5198     // materialize 0.0.
5199     if (Cond == ISD::SETO || Cond == ISD::SETUO)
5200       return DAG.getSetCC(dl, VT, N0, N0, Cond);
5201 
5202     // setcc (fneg x), C -> setcc swap(pred) x, -C
5203     if (N0.getOpcode() == ISD::FNEG) {
5204       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
5205       if (DCI.isBeforeLegalizeOps() ||
5206           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5207         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5208         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5209       }
5210     }
5211 
5212     // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5213     if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
5214         !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5215       bool IsFabs = N0.getOpcode() == ISD::FABS;
5216       SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5217       if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5218         FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5219                                              : (IsFabs ? fcInf : fcPosInf);
5220         if (Cond == ISD::SETUEQ)
5221           Flag |= fcNan;
5222         return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5223                            DAG.getTargetConstant(Flag, dl, MVT::i32));
5224       }
5225     }
5226 
5227     // If the condition is not legal, see if we can find an equivalent one
5228     // which is legal.
5229     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
5230       // If the comparison was an awkward floating-point == or != and one of
5231       // the comparison operands is infinity or negative infinity, convert the
5232       // condition to a less-awkward <= or >=.
5233       if (CFP->getValueAPF().isInfinity()) {
5234         bool IsNegInf = CFP->getValueAPF().isNegative();
5235         ISD::CondCode NewCond = ISD::SETCC_INVALID;
5236         switch (Cond) {
5237         case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5238         case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5239         case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5240         case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5241         default: break;
5242         }
5243         if (NewCond != ISD::SETCC_INVALID &&
5244             isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5245           return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5246       }
5247     }
5248   }
5249 
5250   if (N0 == N1) {
5251     // The sext(setcc()) => setcc() optimization relies on the appropriate
5252     // constant being emitted.
5253     assert(!N0.getValueType().isInteger() &&
5254            "Integer types should be handled by FoldSetCC");
5255 
5256     bool EqTrue = ISD::isTrueWhenEqual(Cond);
5257     unsigned UOF = ISD::getUnorderedFlavor(Cond);
5258     if (UOF == 2) // FP operators that are undefined on NaNs.
5259       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5260     if (UOF == unsigned(EqTrue))
5261       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5262     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
5263     // if it is not already.
5264     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5265     if (NewCond != Cond &&
5266         (DCI.isBeforeLegalizeOps() ||
5267                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5268       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5269   }
5270 
5271   // ~X > ~Y --> Y > X
5272   // ~X < ~Y --> Y < X
5273   // ~X < C --> X > ~C
5274   // ~X > C --> X < ~C
5275   if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5276       N0.getValueType().isInteger()) {
5277     if (isBitwiseNot(N0)) {
5278       if (isBitwiseNot(N1))
5279         return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5280 
5281       if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
5282           !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
5283         SDValue Not = DAG.getNOT(dl, N1, OpVT);
5284         return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5285       }
5286     }
5287   }
5288 
5289   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5290       N0.getValueType().isInteger()) {
5291     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5292         N0.getOpcode() == ISD::XOR) {
5293       // Simplify (X+Y) == (X+Z) -->  Y == Z
5294       if (N0.getOpcode() == N1.getOpcode()) {
5295         if (N0.getOperand(0) == N1.getOperand(0))
5296           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5297         if (N0.getOperand(1) == N1.getOperand(1))
5298           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5299         if (isCommutativeBinOp(N0.getOpcode())) {
5300           // If X op Y == Y op X, try other combinations.
5301           if (N0.getOperand(0) == N1.getOperand(1))
5302             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5303                                 Cond);
5304           if (N0.getOperand(1) == N1.getOperand(0))
5305             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5306                                 Cond);
5307         }
5308       }
5309 
5310       // If RHS is a legal immediate value for a compare instruction, we need
5311       // to be careful about increasing register pressure needlessly.
5312       bool LegalRHSImm = false;
5313 
5314       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5315         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5316           // Turn (X+C1) == C2 --> X == C2-C1
5317           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5318             return DAG.getSetCC(
5319                 dl, VT, N0.getOperand(0),
5320                 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5321                                 dl, N0.getValueType()),
5322                 Cond);
5323 
5324           // Turn (X^C1) == C2 --> X == C1^C2
5325           if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5326             return DAG.getSetCC(
5327                 dl, VT, N0.getOperand(0),
5328                 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5329                                 dl, N0.getValueType()),
5330                 Cond);
5331         }
5332 
5333         // Turn (C1-X) == C2 --> X == C1-C2
5334         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5335           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5336             return DAG.getSetCC(
5337                 dl, VT, N0.getOperand(1),
5338                 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5339                                 dl, N0.getValueType()),
5340                 Cond);
5341 
5342         // Could RHSC fold directly into a compare?
5343         if (RHSC->getValueType(0).getSizeInBits() <= 64)
5344           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5345       }
5346 
5347       // (X+Y) == X --> Y == 0 and similar folds.
5348       // Don't do this if X is an immediate that can fold into a cmp
5349       // instruction and X+Y has other uses. It could be an induction variable
5350       // chain, and the transform would increase register pressure.
5351       if (!LegalRHSImm || N0.hasOneUse())
5352         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5353           return V;
5354     }
5355 
5356     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5357         N1.getOpcode() == ISD::XOR)
5358       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5359         return V;
5360 
5361     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5362       return V;
5363   }
5364 
5365   // Fold remainder of division by a constant.
5366   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5367       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5368     // When division is cheap or optimizing for minimum size,
5369     // fall through to DIVREM creation by skipping this fold.
5370     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5371       if (N0.getOpcode() == ISD::UREM) {
5372         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5373           return Folded;
5374       } else if (N0.getOpcode() == ISD::SREM) {
5375         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5376           return Folded;
5377       }
5378     }
5379   }
5380 
5381   // Fold away ALL boolean setcc's.
5382   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5383     SDValue Temp;
5384     switch (Cond) {
5385     default: llvm_unreachable("Unknown integer setcc!");
5386     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
5387       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5388       N0 = DAG.getNOT(dl, Temp, OpVT);
5389       if (!DCI.isCalledByLegalizer())
5390         DCI.AddToWorklist(Temp.getNode());
5391       break;
5392     case ISD::SETNE:  // X != Y   -->  (X^Y)
5393       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5394       break;
5395     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
5396     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
5397       Temp = DAG.getNOT(dl, N0, OpVT);
5398       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5399       if (!DCI.isCalledByLegalizer())
5400         DCI.AddToWorklist(Temp.getNode());
5401       break;
5402     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
5403     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
5404       Temp = DAG.getNOT(dl, N1, OpVT);
5405       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5406       if (!DCI.isCalledByLegalizer())
5407         DCI.AddToWorklist(Temp.getNode());
5408       break;
5409     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
5410     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
5411       Temp = DAG.getNOT(dl, N0, OpVT);
5412       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5413       if (!DCI.isCalledByLegalizer())
5414         DCI.AddToWorklist(Temp.getNode());
5415       break;
5416     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
5417     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
5418       Temp = DAG.getNOT(dl, N1, OpVT);
5419       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5420       break;
5421     }
5422     if (VT.getScalarType() != MVT::i1) {
5423       if (!DCI.isCalledByLegalizer())
5424         DCI.AddToWorklist(N0.getNode());
5425       // FIXME: If running after legalize, we probably can't do this.
5426       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
5427       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5428     }
5429     return N0;
5430   }
5431 
5432   // Could not fold it.
5433   return SDValue();
5434 }
5435 
5436 /// Returns true (and the GlobalValue and the offset) if the node is a
5437 /// GlobalAddress + offset.
5438 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5439                                     int64_t &Offset) const {
5440 
5441   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5442 
5443   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5444     GA = GASD->getGlobal();
5445     Offset += GASD->getOffset();
5446     return true;
5447   }
5448 
5449   if (N->getOpcode() == ISD::ADD) {
5450     SDValue N1 = N->getOperand(0);
5451     SDValue N2 = N->getOperand(1);
5452     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5453       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5454         Offset += V->getSExtValue();
5455         return true;
5456       }
5457     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5458       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5459         Offset += V->getSExtValue();
5460         return true;
5461       }
5462     }
5463   }
5464 
5465   return false;
5466 }
5467 
5468 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5469                                           DAGCombinerInfo &DCI) const {
5470   // Default implementation: no optimization.
5471   return SDValue();
5472 }
5473 
5474 //===----------------------------------------------------------------------===//
5475 //  Inline Assembler Implementation Methods
5476 //===----------------------------------------------------------------------===//
5477 
5478 TargetLowering::ConstraintType
5479 TargetLowering::getConstraintType(StringRef Constraint) const {
5480   unsigned S = Constraint.size();
5481 
5482   if (S == 1) {
5483     switch (Constraint[0]) {
5484     default: break;
5485     case 'r':
5486       return C_RegisterClass;
5487     case 'm': // memory
5488     case 'o': // offsetable
5489     case 'V': // not offsetable
5490       return C_Memory;
5491     case 'p': // Address.
5492       return C_Address;
5493     case 'n': // Simple Integer
5494     case 'E': // Floating Point Constant
5495     case 'F': // Floating Point Constant
5496       return C_Immediate;
5497     case 'i': // Simple Integer or Relocatable Constant
5498     case 's': // Relocatable Constant
5499     case 'X': // Allow ANY value.
5500     case 'I': // Target registers.
5501     case 'J':
5502     case 'K':
5503     case 'L':
5504     case 'M':
5505     case 'N':
5506     case 'O':
5507     case 'P':
5508     case '<':
5509     case '>':
5510       return C_Other;
5511     }
5512   }
5513 
5514   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5515     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5516       return C_Memory;
5517     return C_Register;
5518   }
5519   return C_Unknown;
5520 }
5521 
5522 /// Try to replace an X constraint, which matches anything, with another that
5523 /// has more specific requirements based on the type of the corresponding
5524 /// operand.
5525 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5526   if (ConstraintVT.isInteger())
5527     return "r";
5528   if (ConstraintVT.isFloatingPoint())
5529     return "f"; // works for many targets
5530   return nullptr;
5531 }
5532 
5533 SDValue TargetLowering::LowerAsmOutputForConstraint(
5534     SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5535     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5536   return SDValue();
5537 }
5538 
5539 /// Lower the specified operand into the Ops vector.
5540 /// If it is invalid, don't add anything to Ops.
5541 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5542                                                   StringRef Constraint,
5543                                                   std::vector<SDValue> &Ops,
5544                                                   SelectionDAG &DAG) const {
5545 
5546   if (Constraint.size() > 1)
5547     return;
5548 
5549   char ConstraintLetter = Constraint[0];
5550   switch (ConstraintLetter) {
5551   default: break;
5552   case 'X':    // Allows any operand
5553   case 'i':    // Simple Integer or Relocatable Constant
5554   case 'n':    // Simple Integer
5555   case 's': {  // Relocatable Constant
5556 
5557     ConstantSDNode *C;
5558     uint64_t Offset = 0;
5559 
5560     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5561     // etc., since getelementpointer is variadic. We can't use
5562     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5563     // while in this case the GA may be furthest from the root node which is
5564     // likely an ISD::ADD.
5565     while (true) {
5566       if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5567         // gcc prints these as sign extended.  Sign extend value to 64 bits
5568         // now; without this it would get ZExt'd later in
5569         // ScheduleDAGSDNodes::EmitNode, which is very generic.
5570         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5571         BooleanContent BCont = getBooleanContents(MVT::i64);
5572         ISD::NodeType ExtOpc =
5573             IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5574         int64_t ExtVal =
5575             ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5576         Ops.push_back(
5577             DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5578         return;
5579       }
5580       if (ConstraintLetter != 'n') {
5581         if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5582           Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5583                                                    GA->getValueType(0),
5584                                                    Offset + GA->getOffset()));
5585           return;
5586         }
5587         if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5588           Ops.push_back(DAG.getTargetBlockAddress(
5589               BA->getBlockAddress(), BA->getValueType(0),
5590               Offset + BA->getOffset(), BA->getTargetFlags()));
5591           return;
5592         }
5593         if (isa<BasicBlockSDNode>(Op)) {
5594           Ops.push_back(Op);
5595           return;
5596         }
5597       }
5598       const unsigned OpCode = Op.getOpcode();
5599       if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5600         if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5601           Op = Op.getOperand(1);
5602         // Subtraction is not commutative.
5603         else if (OpCode == ISD::ADD &&
5604                  (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5605           Op = Op.getOperand(0);
5606         else
5607           return;
5608         Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5609         continue;
5610       }
5611       return;
5612     }
5613     break;
5614   }
5615   }
5616 }
5617 
5618 void TargetLowering::CollectTargetIntrinsicOperands(
5619     const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5620 }
5621 
5622 std::pair<unsigned, const TargetRegisterClass *>
5623 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5624                                              StringRef Constraint,
5625                                              MVT VT) const {
5626   if (!Constraint.starts_with("{"))
5627     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5628   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5629 
5630   // Remove the braces from around the name.
5631   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5632 
5633   std::pair<unsigned, const TargetRegisterClass *> R =
5634       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5635 
5636   // Figure out which register class contains this reg.
5637   for (const TargetRegisterClass *RC : RI->regclasses()) {
5638     // If none of the value types for this register class are valid, we
5639     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5640     if (!isLegalRC(*RI, *RC))
5641       continue;
5642 
5643     for (const MCPhysReg &PR : *RC) {
5644       if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5645         std::pair<unsigned, const TargetRegisterClass *> S =
5646             std::make_pair(PR, RC);
5647 
5648         // If this register class has the requested value type, return it,
5649         // otherwise keep searching and return the first class found
5650         // if no other is found which explicitly has the requested type.
5651         if (RI->isTypeLegalForClass(*RC, VT))
5652           return S;
5653         if (!R.second)
5654           R = S;
5655       }
5656     }
5657   }
5658 
5659   return R;
5660 }
5661 
5662 //===----------------------------------------------------------------------===//
5663 // Constraint Selection.
5664 
5665 /// Return true of this is an input operand that is a matching constraint like
5666 /// "4".
5667 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5668   assert(!ConstraintCode.empty() && "No known constraint!");
5669   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5670 }
5671 
5672 /// If this is an input matching constraint, this method returns the output
5673 /// operand it matches.
5674 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5675   assert(!ConstraintCode.empty() && "No known constraint!");
5676   return atoi(ConstraintCode.c_str());
5677 }
5678 
5679 /// Split up the constraint string from the inline assembly value into the
5680 /// specific constraints and their prefixes, and also tie in the associated
5681 /// operand values.
5682 /// If this returns an empty vector, and if the constraint string itself
5683 /// isn't empty, there was an error parsing.
5684 TargetLowering::AsmOperandInfoVector
5685 TargetLowering::ParseConstraints(const DataLayout &DL,
5686                                  const TargetRegisterInfo *TRI,
5687                                  const CallBase &Call) const {
5688   /// Information about all of the constraints.
5689   AsmOperandInfoVector ConstraintOperands;
5690   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5691   unsigned maCount = 0; // Largest number of multiple alternative constraints.
5692 
5693   // Do a prepass over the constraints, canonicalizing them, and building up the
5694   // ConstraintOperands list.
5695   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5696   unsigned ResNo = 0; // ResNo - The result number of the next output.
5697   unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5698 
5699   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5700     ConstraintOperands.emplace_back(std::move(CI));
5701     AsmOperandInfo &OpInfo = ConstraintOperands.back();
5702 
5703     // Update multiple alternative constraint count.
5704     if (OpInfo.multipleAlternatives.size() > maCount)
5705       maCount = OpInfo.multipleAlternatives.size();
5706 
5707     OpInfo.ConstraintVT = MVT::Other;
5708 
5709     // Compute the value type for each operand.
5710     switch (OpInfo.Type) {
5711     case InlineAsm::isOutput:
5712       // Indirect outputs just consume an argument.
5713       if (OpInfo.isIndirect) {
5714         OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5715         break;
5716       }
5717 
5718       // The return value of the call is this value.  As such, there is no
5719       // corresponding argument.
5720       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5721       if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5722         OpInfo.ConstraintVT =
5723             getSimpleValueType(DL, STy->getElementType(ResNo));
5724       } else {
5725         assert(ResNo == 0 && "Asm only has one result!");
5726         OpInfo.ConstraintVT =
5727             getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5728       }
5729       ++ResNo;
5730       break;
5731     case InlineAsm::isInput:
5732       OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5733       break;
5734     case InlineAsm::isLabel:
5735       OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5736       ++LabelNo;
5737       continue;
5738     case InlineAsm::isClobber:
5739       // Nothing to do.
5740       break;
5741     }
5742 
5743     if (OpInfo.CallOperandVal) {
5744       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5745       if (OpInfo.isIndirect) {
5746         OpTy = Call.getParamElementType(ArgNo);
5747         assert(OpTy && "Indirect operand must have elementtype attribute");
5748       }
5749 
5750       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5751       if (StructType *STy = dyn_cast<StructType>(OpTy))
5752         if (STy->getNumElements() == 1)
5753           OpTy = STy->getElementType(0);
5754 
5755       // If OpTy is not a single value, it may be a struct/union that we
5756       // can tile with integers.
5757       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5758         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5759         switch (BitSize) {
5760         default: break;
5761         case 1:
5762         case 8:
5763         case 16:
5764         case 32:
5765         case 64:
5766         case 128:
5767           OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5768           break;
5769         }
5770       }
5771 
5772       EVT VT = getAsmOperandValueType(DL, OpTy, true);
5773       OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5774       ArgNo++;
5775     }
5776   }
5777 
5778   // If we have multiple alternative constraints, select the best alternative.
5779   if (!ConstraintOperands.empty()) {
5780     if (maCount) {
5781       unsigned bestMAIndex = 0;
5782       int bestWeight = -1;
5783       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
5784       int weight = -1;
5785       unsigned maIndex;
5786       // Compute the sums of the weights for each alternative, keeping track
5787       // of the best (highest weight) one so far.
5788       for (maIndex = 0; maIndex < maCount; ++maIndex) {
5789         int weightSum = 0;
5790         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5791              cIndex != eIndex; ++cIndex) {
5792           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5793           if (OpInfo.Type == InlineAsm::isClobber)
5794             continue;
5795 
5796           // If this is an output operand with a matching input operand,
5797           // look up the matching input. If their types mismatch, e.g. one
5798           // is an integer, the other is floating point, or their sizes are
5799           // different, flag it as an maCantMatch.
5800           if (OpInfo.hasMatchingInput()) {
5801             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5802             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5803               if ((OpInfo.ConstraintVT.isInteger() !=
5804                    Input.ConstraintVT.isInteger()) ||
5805                   (OpInfo.ConstraintVT.getSizeInBits() !=
5806                    Input.ConstraintVT.getSizeInBits())) {
5807                 weightSum = -1; // Can't match.
5808                 break;
5809               }
5810             }
5811           }
5812           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5813           if (weight == -1) {
5814             weightSum = -1;
5815             break;
5816           }
5817           weightSum += weight;
5818         }
5819         // Update best.
5820         if (weightSum > bestWeight) {
5821           bestWeight = weightSum;
5822           bestMAIndex = maIndex;
5823         }
5824       }
5825 
5826       // Now select chosen alternative in each constraint.
5827       for (AsmOperandInfo &cInfo : ConstraintOperands)
5828         if (cInfo.Type != InlineAsm::isClobber)
5829           cInfo.selectAlternative(bestMAIndex);
5830     }
5831   }
5832 
5833   // Check and hook up tied operands, choose constraint code to use.
5834   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5835        cIndex != eIndex; ++cIndex) {
5836     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5837 
5838     // If this is an output operand with a matching input operand, look up the
5839     // matching input. If their types mismatch, e.g. one is an integer, the
5840     // other is floating point, or their sizes are different, flag it as an
5841     // error.
5842     if (OpInfo.hasMatchingInput()) {
5843       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5844 
5845       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5846         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5847             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5848                                          OpInfo.ConstraintVT);
5849         std::pair<unsigned, const TargetRegisterClass *> InputRC =
5850             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5851                                          Input.ConstraintVT);
5852         const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
5853                                     OpInfo.ConstraintVT.isFloatingPoint();
5854         const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
5855                                    Input.ConstraintVT.isFloatingPoint();
5856         if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
5857             (MatchRC.second != InputRC.second)) {
5858           report_fatal_error("Unsupported asm: input constraint"
5859                              " with a matching output constraint of"
5860                              " incompatible type!");
5861         }
5862       }
5863     }
5864   }
5865 
5866   return ConstraintOperands;
5867 }
5868 
5869 /// Return a number indicating our preference for chosing a type of constraint
5870 /// over another, for the purpose of sorting them. Immediates are almost always
5871 /// preferrable (when they can be emitted). A higher return value means a
5872 /// stronger preference for one constraint type relative to another.
5873 /// FIXME: We should prefer registers over memory but doing so may lead to
5874 /// unrecoverable register exhaustion later.
5875 /// https://github.com/llvm/llvm-project/issues/20571
5876 static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5877   switch (CT) {
5878   case TargetLowering::C_Immediate:
5879   case TargetLowering::C_Other:
5880     return 4;
5881   case TargetLowering::C_Memory:
5882   case TargetLowering::C_Address:
5883     return 3;
5884   case TargetLowering::C_RegisterClass:
5885     return 2;
5886   case TargetLowering::C_Register:
5887     return 1;
5888   case TargetLowering::C_Unknown:
5889     return 0;
5890   }
5891   llvm_unreachable("Invalid constraint type");
5892 }
5893 
5894 /// Examine constraint type and operand type and determine a weight value.
5895 /// This object must already have been set up with the operand type
5896 /// and the current alternative constraint selected.
5897 TargetLowering::ConstraintWeight
5898   TargetLowering::getMultipleConstraintMatchWeight(
5899     AsmOperandInfo &info, int maIndex) const {
5900   InlineAsm::ConstraintCodeVector *rCodes;
5901   if (maIndex >= (int)info.multipleAlternatives.size())
5902     rCodes = &info.Codes;
5903   else
5904     rCodes = &info.multipleAlternatives[maIndex].Codes;
5905   ConstraintWeight BestWeight = CW_Invalid;
5906 
5907   // Loop over the options, keeping track of the most general one.
5908   for (const std::string &rCode : *rCodes) {
5909     ConstraintWeight weight =
5910         getSingleConstraintMatchWeight(info, rCode.c_str());
5911     if (weight > BestWeight)
5912       BestWeight = weight;
5913   }
5914 
5915   return BestWeight;
5916 }
5917 
5918 /// Examine constraint type and operand type and determine a weight value.
5919 /// This object must already have been set up with the operand type
5920 /// and the current alternative constraint selected.
5921 TargetLowering::ConstraintWeight
5922   TargetLowering::getSingleConstraintMatchWeight(
5923     AsmOperandInfo &info, const char *constraint) const {
5924   ConstraintWeight weight = CW_Invalid;
5925   Value *CallOperandVal = info.CallOperandVal;
5926     // If we don't have a value, we can't do a match,
5927     // but allow it at the lowest weight.
5928   if (!CallOperandVal)
5929     return CW_Default;
5930   // Look at the constraint type.
5931   switch (*constraint) {
5932     case 'i': // immediate integer.
5933     case 'n': // immediate integer with a known value.
5934       if (isa<ConstantInt>(CallOperandVal))
5935         weight = CW_Constant;
5936       break;
5937     case 's': // non-explicit intregal immediate.
5938       if (isa<GlobalValue>(CallOperandVal))
5939         weight = CW_Constant;
5940       break;
5941     case 'E': // immediate float if host format.
5942     case 'F': // immediate float.
5943       if (isa<ConstantFP>(CallOperandVal))
5944         weight = CW_Constant;
5945       break;
5946     case '<': // memory operand with autodecrement.
5947     case '>': // memory operand with autoincrement.
5948     case 'm': // memory operand.
5949     case 'o': // offsettable memory operand
5950     case 'V': // non-offsettable memory operand
5951       weight = CW_Memory;
5952       break;
5953     case 'r': // general register.
5954     case 'g': // general register, memory operand or immediate integer.
5955               // note: Clang converts "g" to "imr".
5956       if (CallOperandVal->getType()->isIntegerTy())
5957         weight = CW_Register;
5958       break;
5959     case 'X': // any operand.
5960   default:
5961     weight = CW_Default;
5962     break;
5963   }
5964   return weight;
5965 }
5966 
5967 /// If there are multiple different constraints that we could pick for this
5968 /// operand (e.g. "imr") try to pick the 'best' one.
5969 /// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5970 /// into seven classes:
5971 ///    Register      -> one specific register
5972 ///    RegisterClass -> a group of regs
5973 ///    Memory        -> memory
5974 ///    Address       -> a symbolic memory reference
5975 ///    Immediate     -> immediate values
5976 ///    Other         -> magic values (such as "Flag Output Operands")
5977 ///    Unknown       -> something we don't recognize yet and can't handle
5978 /// Ideally, we would pick the most specific constraint possible: if we have
5979 /// something that fits into a register, we would pick it.  The problem here
5980 /// is that if we have something that could either be in a register or in
5981 /// memory that use of the register could cause selection of *other*
5982 /// operands to fail: they might only succeed if we pick memory.  Because of
5983 /// this the heuristic we use is:
5984 ///
5985 ///  1) If there is an 'other' constraint, and if the operand is valid for
5986 ///     that constraint, use it.  This makes us take advantage of 'i'
5987 ///     constraints when available.
5988 ///  2) Otherwise, pick the most general constraint present.  This prefers
5989 ///     'm' over 'r', for example.
5990 ///
5991 TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
5992     TargetLowering::AsmOperandInfo &OpInfo) const {
5993   ConstraintGroup Ret;
5994 
5995   Ret.reserve(OpInfo.Codes.size());
5996   for (StringRef Code : OpInfo.Codes) {
5997     TargetLowering::ConstraintType CType = getConstraintType(Code);
5998 
5999     // Indirect 'other' or 'immediate' constraints are not allowed.
6000     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6001                                CType == TargetLowering::C_Register ||
6002                                CType == TargetLowering::C_RegisterClass))
6003       continue;
6004 
6005     // Things with matching constraints can only be registers, per gcc
6006     // documentation.  This mainly affects "g" constraints.
6007     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6008       continue;
6009 
6010     Ret.emplace_back(Code, CType);
6011   }
6012 
6013   std::stable_sort(
6014       Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
6015         return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6016       });
6017 
6018   return Ret;
6019 }
6020 
6021 /// If we have an immediate, see if we can lower it. Return true if we can,
6022 /// false otherwise.
6023 static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6024                                      SDValue Op, SelectionDAG *DAG,
6025                                      const TargetLowering &TLI) {
6026 
6027   assert((P.second == TargetLowering::C_Other ||
6028           P.second == TargetLowering::C_Immediate) &&
6029          "need immediate or other");
6030 
6031   if (!Op.getNode())
6032     return false;
6033 
6034   std::vector<SDValue> ResultOps;
6035   TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6036   return !ResultOps.empty();
6037 }
6038 
6039 /// Determines the constraint code and constraint type to use for the specific
6040 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6041 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6042                                             SDValue Op,
6043                                             SelectionDAG *DAG) const {
6044   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6045 
6046   // Single-letter constraints ('r') are very common.
6047   if (OpInfo.Codes.size() == 1) {
6048     OpInfo.ConstraintCode = OpInfo.Codes[0];
6049     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6050   } else {
6051     ConstraintGroup G = getConstraintPreferences(OpInfo);
6052     if (G.empty())
6053       return;
6054 
6055     unsigned BestIdx = 0;
6056     for (const unsigned E = G.size();
6057          BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6058                          G[BestIdx].second == TargetLowering::C_Immediate);
6059          ++BestIdx) {
6060       if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6061         break;
6062       // If we're out of constraints, just pick the first one.
6063       if (BestIdx + 1 == E) {
6064         BestIdx = 0;
6065         break;
6066       }
6067     }
6068 
6069     OpInfo.ConstraintCode = G[BestIdx].first;
6070     OpInfo.ConstraintType = G[BestIdx].second;
6071   }
6072 
6073   // 'X' matches anything.
6074   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6075     // Constants are handled elsewhere.  For Functions, the type here is the
6076     // type of the result, which is not what we want to look at; leave them
6077     // alone.
6078     Value *v = OpInfo.CallOperandVal;
6079     if (isa<ConstantInt>(v) || isa<Function>(v)) {
6080       return;
6081     }
6082 
6083     if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6084       OpInfo.ConstraintCode = "i";
6085       return;
6086     }
6087 
6088     // Otherwise, try to resolve it to something we know about by looking at
6089     // the actual operand type.
6090     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6091       OpInfo.ConstraintCode = Repl;
6092       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6093     }
6094   }
6095 }
6096 
6097 /// Given an exact SDIV by a constant, create a multiplication
6098 /// with the multiplicative inverse of the constant.
6099 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6100 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6101                               const SDLoc &dl, SelectionDAG &DAG,
6102                               SmallVectorImpl<SDNode *> &Created) {
6103   SDValue Op0 = N->getOperand(0);
6104   SDValue Op1 = N->getOperand(1);
6105   EVT VT = N->getValueType(0);
6106   EVT SVT = VT.getScalarType();
6107   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6108   EVT ShSVT = ShVT.getScalarType();
6109 
6110   bool UseSRA = false;
6111   SmallVector<SDValue, 16> Shifts, Factors;
6112 
6113   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6114     if (C->isZero())
6115       return false;
6116     APInt Divisor = C->getAPIntValue();
6117     unsigned Shift = Divisor.countr_zero();
6118     if (Shift) {
6119       Divisor.ashrInPlace(Shift);
6120       UseSRA = true;
6121     }
6122     APInt Factor = Divisor.multiplicativeInverse();
6123     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6124     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6125     return true;
6126   };
6127 
6128   // Collect all magic values from the build vector.
6129   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6130     return SDValue();
6131 
6132   SDValue Shift, Factor;
6133   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6134     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6135     Factor = DAG.getBuildVector(VT, dl, Factors);
6136   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6137     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6138            "Expected matchUnaryPredicate to return one element for scalable "
6139            "vectors");
6140     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6141     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6142   } else {
6143     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6144     Shift = Shifts[0];
6145     Factor = Factors[0];
6146   }
6147 
6148   SDValue Res = Op0;
6149   if (UseSRA) {
6150     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6151     Created.push_back(Res.getNode());
6152   }
6153 
6154   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6155 }
6156 
6157 /// Given an exact UDIV by a constant, create a multiplication
6158 /// with the multiplicative inverse of the constant.
6159 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6160 static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6161                               const SDLoc &dl, SelectionDAG &DAG,
6162                               SmallVectorImpl<SDNode *> &Created) {
6163   EVT VT = N->getValueType(0);
6164   EVT SVT = VT.getScalarType();
6165   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6166   EVT ShSVT = ShVT.getScalarType();
6167 
6168   bool UseSRL = false;
6169   SmallVector<SDValue, 16> Shifts, Factors;
6170 
6171   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6172     if (C->isZero())
6173       return false;
6174     APInt Divisor = C->getAPIntValue();
6175     unsigned Shift = Divisor.countr_zero();
6176     if (Shift) {
6177       Divisor.lshrInPlace(Shift);
6178       UseSRL = true;
6179     }
6180     // Calculate the multiplicative inverse modulo BW.
6181     APInt Factor = Divisor.multiplicativeInverse();
6182     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6183     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6184     return true;
6185   };
6186 
6187   SDValue Op1 = N->getOperand(1);
6188 
6189   // Collect all magic values from the build vector.
6190   if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6191     return SDValue();
6192 
6193   SDValue Shift, Factor;
6194   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6195     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6196     Factor = DAG.getBuildVector(VT, dl, Factors);
6197   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6198     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6199            "Expected matchUnaryPredicate to return one element for scalable "
6200            "vectors");
6201     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6202     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6203   } else {
6204     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6205     Shift = Shifts[0];
6206     Factor = Factors[0];
6207   }
6208 
6209   SDValue Res = N->getOperand(0);
6210   if (UseSRL) {
6211     Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6212     Created.push_back(Res.getNode());
6213   }
6214 
6215   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6216 }
6217 
6218 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6219                               SelectionDAG &DAG,
6220                               SmallVectorImpl<SDNode *> &Created) const {
6221   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6222   if (isIntDivCheap(N->getValueType(0), Attr))
6223     return SDValue(N, 0); // Lower SDIV as SDIV
6224   return SDValue();
6225 }
6226 
6227 SDValue
6228 TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6229                               SelectionDAG &DAG,
6230                               SmallVectorImpl<SDNode *> &Created) const {
6231   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6232   if (isIntDivCheap(N->getValueType(0), Attr))
6233     return SDValue(N, 0); // Lower SREM as SREM
6234   return SDValue();
6235 }
6236 
6237 /// Build sdiv by power-of-2 with conditional move instructions
6238 /// Ref: "Hacker's Delight" by Henry Warren 10-1
6239 /// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6240 ///   bgez x, label
6241 ///   add x, x, 2**k-1
6242 /// label:
6243 ///   sra res, x, k
6244 ///   neg res, res (when the divisor is negative)
6245 SDValue TargetLowering::buildSDIVPow2WithCMov(
6246     SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6247     SmallVectorImpl<SDNode *> &Created) const {
6248   unsigned Lg2 = Divisor.countr_zero();
6249   EVT VT = N->getValueType(0);
6250 
6251   SDLoc DL(N);
6252   SDValue N0 = N->getOperand(0);
6253   SDValue Zero = DAG.getConstant(0, DL, VT);
6254   APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6255   SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6256 
6257   // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6258   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6259   SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6260   SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6261   SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6262 
6263   Created.push_back(Cmp.getNode());
6264   Created.push_back(Add.getNode());
6265   Created.push_back(CMov.getNode());
6266 
6267   // Divide by pow2.
6268   SDValue SRA =
6269       DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6270 
6271   // If we're dividing by a positive value, we're done.  Otherwise, we must
6272   // negate the result.
6273   if (Divisor.isNonNegative())
6274     return SRA;
6275 
6276   Created.push_back(SRA.getNode());
6277   return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6278 }
6279 
6280 /// Given an ISD::SDIV node expressing a divide by constant,
6281 /// return a DAG expression to select that will generate the same value by
6282 /// multiplying by a magic number.
6283 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6284 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6285                                   bool IsAfterLegalization,
6286                                   bool IsAfterLegalTypes,
6287                                   SmallVectorImpl<SDNode *> &Created) const {
6288   SDLoc dl(N);
6289   EVT VT = N->getValueType(0);
6290   EVT SVT = VT.getScalarType();
6291   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6292   EVT ShSVT = ShVT.getScalarType();
6293   unsigned EltBits = VT.getScalarSizeInBits();
6294   EVT MulVT;
6295 
6296   // Check to see if we can do this.
6297   // FIXME: We should be more aggressive here.
6298   if (!isTypeLegal(VT)) {
6299     // Limit this to simple scalars for now.
6300     if (VT.isVector() || !VT.isSimple())
6301       return SDValue();
6302 
6303     // If this type will be promoted to a large enough type with a legal
6304     // multiply operation, we can go ahead and do this transform.
6305     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6306       return SDValue();
6307 
6308     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6309     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6310         !isOperationLegal(ISD::MUL, MulVT))
6311       return SDValue();
6312   }
6313 
6314   // If the sdiv has an 'exact' bit we can use a simpler lowering.
6315   if (N->getFlags().hasExact())
6316     return BuildExactSDIV(*this, N, dl, DAG, Created);
6317 
6318   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6319 
6320   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6321     if (C->isZero())
6322       return false;
6323 
6324     const APInt &Divisor = C->getAPIntValue();
6325     SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
6326     int NumeratorFactor = 0;
6327     int ShiftMask = -1;
6328 
6329     if (Divisor.isOne() || Divisor.isAllOnes()) {
6330       // If d is +1/-1, we just multiply the numerator by +1/-1.
6331       NumeratorFactor = Divisor.getSExtValue();
6332       magics.Magic = 0;
6333       magics.ShiftAmount = 0;
6334       ShiftMask = 0;
6335     } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6336       // If d > 0 and m < 0, add the numerator.
6337       NumeratorFactor = 1;
6338     } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6339       // If d < 0 and m > 0, subtract the numerator.
6340       NumeratorFactor = -1;
6341     }
6342 
6343     MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6344     Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6345     Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6346     ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6347     return true;
6348   };
6349 
6350   SDValue N0 = N->getOperand(0);
6351   SDValue N1 = N->getOperand(1);
6352 
6353   // Collect the shifts / magic values from each element.
6354   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6355     return SDValue();
6356 
6357   SDValue MagicFactor, Factor, Shift, ShiftMask;
6358   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6359     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6360     Factor = DAG.getBuildVector(VT, dl, Factors);
6361     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6362     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6363   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6364     assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6365            Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6366            "Expected matchUnaryPredicate to return one element for scalable "
6367            "vectors");
6368     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6369     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6370     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6371     ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6372   } else {
6373     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6374     MagicFactor = MagicFactors[0];
6375     Factor = Factors[0];
6376     Shift = Shifts[0];
6377     ShiftMask = ShiftMasks[0];
6378   }
6379 
6380   // Multiply the numerator (operand 0) by the magic value.
6381   // FIXME: We should support doing a MUL in a wider type.
6382   auto GetMULHS = [&](SDValue X, SDValue Y) {
6383     // If the type isn't legal, use a wider mul of the type calculated
6384     // earlier.
6385     if (!isTypeLegal(VT)) {
6386       X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6387       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6388       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6389       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6390                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6391       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6392     }
6393 
6394     if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6395       return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6396     if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6397       SDValue LoHi =
6398           DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6399       return SDValue(LoHi.getNode(), 1);
6400     }
6401     // If type twice as wide legal, widen and use a mul plus a shift.
6402     unsigned Size = VT.getScalarSizeInBits();
6403     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6404     if (VT.isVector())
6405       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6406                                 VT.getVectorElementCount());
6407     // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6408     // custom lowered. This is very expensive so avoid it at all costs for
6409     // constant divisors.
6410     if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6411          isOperationCustom(ISD::SDIVREM, VT.getScalarType())) ||
6412         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6413       X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6414       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6415       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6416       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6417                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6418       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6419     }
6420     return SDValue();
6421   };
6422 
6423   SDValue Q = GetMULHS(N0, MagicFactor);
6424   if (!Q)
6425     return SDValue();
6426 
6427   Created.push_back(Q.getNode());
6428 
6429   // (Optionally) Add/subtract the numerator using Factor.
6430   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6431   Created.push_back(Factor.getNode());
6432   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6433   Created.push_back(Q.getNode());
6434 
6435   // Shift right algebraic by shift value.
6436   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6437   Created.push_back(Q.getNode());
6438 
6439   // Extract the sign bit, mask it and add it to the quotient.
6440   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6441   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6442   Created.push_back(T.getNode());
6443   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6444   Created.push_back(T.getNode());
6445   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6446 }
6447 
6448 /// Given an ISD::UDIV node expressing a divide by constant,
6449 /// return a DAG expression to select that will generate the same value by
6450 /// multiplying by a magic number.
6451 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6452 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6453                                   bool IsAfterLegalization,
6454                                   bool IsAfterLegalTypes,
6455                                   SmallVectorImpl<SDNode *> &Created) const {
6456   SDLoc dl(N);
6457   EVT VT = N->getValueType(0);
6458   EVT SVT = VT.getScalarType();
6459   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6460   EVT ShSVT = ShVT.getScalarType();
6461   unsigned EltBits = VT.getScalarSizeInBits();
6462   EVT MulVT;
6463 
6464   // Check to see if we can do this.
6465   // FIXME: We should be more aggressive here.
6466   if (!isTypeLegal(VT)) {
6467     // Limit this to simple scalars for now.
6468     if (VT.isVector() || !VT.isSimple())
6469       return SDValue();
6470 
6471     // If this type will be promoted to a large enough type with a legal
6472     // multiply operation, we can go ahead and do this transform.
6473     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6474       return SDValue();
6475 
6476     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6477     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6478         !isOperationLegal(ISD::MUL, MulVT))
6479       return SDValue();
6480   }
6481 
6482   // If the udiv has an 'exact' bit we can use a simpler lowering.
6483   if (N->getFlags().hasExact())
6484     return BuildExactUDIV(*this, N, dl, DAG, Created);
6485 
6486   SDValue N0 = N->getOperand(0);
6487   SDValue N1 = N->getOperand(1);
6488 
6489   // Try to use leading zeros of the dividend to reduce the multiplier and
6490   // avoid expensive fixups.
6491   unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6492 
6493   bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6494   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6495 
6496   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6497     if (C->isZero())
6498       return false;
6499     const APInt& Divisor = C->getAPIntValue();
6500 
6501     SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6502 
6503     // Magic algorithm doesn't work for division by 1. We need to emit a select
6504     // at the end.
6505     if (Divisor.isOne()) {
6506       PreShift = PostShift = DAG.getUNDEF(ShSVT);
6507       MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6508     } else {
6509       UnsignedDivisionByConstantInfo magics =
6510           UnsignedDivisionByConstantInfo::get(
6511               Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6512 
6513       MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6514 
6515       assert(magics.PreShift < Divisor.getBitWidth() &&
6516              "We shouldn't generate an undefined shift!");
6517       assert(magics.PostShift < Divisor.getBitWidth() &&
6518              "We shouldn't generate an undefined shift!");
6519       assert((!magics.IsAdd || magics.PreShift == 0) &&
6520              "Unexpected pre-shift");
6521       PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6522       PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6523       NPQFactor = DAG.getConstant(
6524           magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6525                        : APInt::getZero(EltBits),
6526           dl, SVT);
6527       UseNPQ |= magics.IsAdd;
6528       UsePreShift |= magics.PreShift != 0;
6529       UsePostShift |= magics.PostShift != 0;
6530     }
6531 
6532     PreShifts.push_back(PreShift);
6533     MagicFactors.push_back(MagicFactor);
6534     NPQFactors.push_back(NPQFactor);
6535     PostShifts.push_back(PostShift);
6536     return true;
6537   };
6538 
6539   // Collect the shifts/magic values from each element.
6540   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6541     return SDValue();
6542 
6543   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6544   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6545     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6546     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6547     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6548     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6549   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6550     assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6551            NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6552            "Expected matchUnaryPredicate to return one for scalable vectors");
6553     PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6554     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6555     NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6556     PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6557   } else {
6558     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6559     PreShift = PreShifts[0];
6560     MagicFactor = MagicFactors[0];
6561     PostShift = PostShifts[0];
6562   }
6563 
6564   SDValue Q = N0;
6565   if (UsePreShift) {
6566     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6567     Created.push_back(Q.getNode());
6568   }
6569 
6570   // FIXME: We should support doing a MUL in a wider type.
6571   auto GetMULHU = [&](SDValue X, SDValue Y) {
6572     // If the type isn't legal, use a wider mul of the type calculated
6573     // earlier.
6574     if (!isTypeLegal(VT)) {
6575       X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6576       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6577       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6578       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6579                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6580       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6581     }
6582 
6583     if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6584       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6585     if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6586       SDValue LoHi =
6587           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6588       return SDValue(LoHi.getNode(), 1);
6589     }
6590     // If type twice as wide legal, widen and use a mul plus a shift.
6591     unsigned Size = VT.getScalarSizeInBits();
6592     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6593     if (VT.isVector())
6594       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6595                                 VT.getVectorElementCount());
6596     // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6597     // custom lowered. This is very expensive so avoid it at all costs for
6598     // constant divisors.
6599     if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6600          isOperationCustom(ISD::UDIVREM, VT.getScalarType())) ||
6601         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6602       X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6603       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6604       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6605       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6606                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6607       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6608     }
6609     return SDValue(); // No mulhu or equivalent
6610   };
6611 
6612   // Multiply the numerator (operand 0) by the magic value.
6613   Q = GetMULHU(Q, MagicFactor);
6614   if (!Q)
6615     return SDValue();
6616 
6617   Created.push_back(Q.getNode());
6618 
6619   if (UseNPQ) {
6620     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6621     Created.push_back(NPQ.getNode());
6622 
6623     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6624     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6625     if (VT.isVector())
6626       NPQ = GetMULHU(NPQ, NPQFactor);
6627     else
6628       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6629 
6630     Created.push_back(NPQ.getNode());
6631 
6632     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6633     Created.push_back(Q.getNode());
6634   }
6635 
6636   if (UsePostShift) {
6637     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6638     Created.push_back(Q.getNode());
6639   }
6640 
6641   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6642 
6643   SDValue One = DAG.getConstant(1, dl, VT);
6644   SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6645   return DAG.getSelect(dl, VT, IsOne, N0, Q);
6646 }
6647 
6648 /// If all values in Values that *don't* match the predicate are same 'splat'
6649 /// value, then replace all values with that splat value.
6650 /// Else, if AlternativeReplacement was provided, then replace all values that
6651 /// do match predicate with AlternativeReplacement value.
6652 static void
6653 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6654                           std::function<bool(SDValue)> Predicate,
6655                           SDValue AlternativeReplacement = SDValue()) {
6656   SDValue Replacement;
6657   // Is there a value for which the Predicate does *NOT* match? What is it?
6658   auto SplatValue = llvm::find_if_not(Values, Predicate);
6659   if (SplatValue != Values.end()) {
6660     // Does Values consist only of SplatValue's and values matching Predicate?
6661     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6662           return Value == *SplatValue || Predicate(Value);
6663         })) // Then we shall replace values matching predicate with SplatValue.
6664       Replacement = *SplatValue;
6665   }
6666   if (!Replacement) {
6667     // Oops, we did not find the "baseline" splat value.
6668     if (!AlternativeReplacement)
6669       return; // Nothing to do.
6670     // Let's replace with provided value then.
6671     Replacement = AlternativeReplacement;
6672   }
6673   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6674 }
6675 
6676 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6677 /// where the divisor is constant and the comparison target is zero,
6678 /// return a DAG expression that will generate the same comparison result
6679 /// using only multiplications, additions and shifts/rotations.
6680 /// Ref: "Hacker's Delight" 10-17.
6681 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6682                                         SDValue CompTargetNode,
6683                                         ISD::CondCode Cond,
6684                                         DAGCombinerInfo &DCI,
6685                                         const SDLoc &DL) const {
6686   SmallVector<SDNode *, 5> Built;
6687   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6688                                          DCI, DL, Built)) {
6689     for (SDNode *N : Built)
6690       DCI.AddToWorklist(N);
6691     return Folded;
6692   }
6693 
6694   return SDValue();
6695 }
6696 
6697 SDValue
6698 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6699                                   SDValue CompTargetNode, ISD::CondCode Cond,
6700                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6701                                   SmallVectorImpl<SDNode *> &Created) const {
6702   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6703   // - D must be constant, with D = D0 * 2^K where D0 is odd
6704   // - P is the multiplicative inverse of D0 modulo 2^W
6705   // - Q = floor(((2^W) - 1) / D)
6706   // where W is the width of the common type of N and D.
6707   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6708          "Only applicable for (in)equality comparisons.");
6709 
6710   SelectionDAG &DAG = DCI.DAG;
6711 
6712   EVT VT = REMNode.getValueType();
6713   EVT SVT = VT.getScalarType();
6714   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6715   EVT ShSVT = ShVT.getScalarType();
6716 
6717   // If MUL is unavailable, we cannot proceed in any case.
6718   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6719     return SDValue();
6720 
6721   bool ComparingWithAllZeros = true;
6722   bool AllComparisonsWithNonZerosAreTautological = true;
6723   bool HadTautologicalLanes = false;
6724   bool AllLanesAreTautological = true;
6725   bool HadEvenDivisor = false;
6726   bool AllDivisorsArePowerOfTwo = true;
6727   bool HadTautologicalInvertedLanes = false;
6728   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6729 
6730   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6731     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6732     if (CDiv->isZero())
6733       return false;
6734 
6735     const APInt &D = CDiv->getAPIntValue();
6736     const APInt &Cmp = CCmp->getAPIntValue();
6737 
6738     ComparingWithAllZeros &= Cmp.isZero();
6739 
6740     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6741     // if C2 is not less than C1, the comparison is always false.
6742     // But we will only be able to produce the comparison that will give the
6743     // opposive tautological answer. So this lane would need to be fixed up.
6744     bool TautologicalInvertedLane = D.ule(Cmp);
6745     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6746 
6747     // If all lanes are tautological (either all divisors are ones, or divisor
6748     // is not greater than the constant we are comparing with),
6749     // we will prefer to avoid the fold.
6750     bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6751     HadTautologicalLanes |= TautologicalLane;
6752     AllLanesAreTautological &= TautologicalLane;
6753 
6754     // If we are comparing with non-zero, we need'll need  to subtract said
6755     // comparison value from the LHS. But there is no point in doing that if
6756     // every lane where we are comparing with non-zero is tautological..
6757     if (!Cmp.isZero())
6758       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6759 
6760     // Decompose D into D0 * 2^K
6761     unsigned K = D.countr_zero();
6762     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6763     APInt D0 = D.lshr(K);
6764 
6765     // D is even if it has trailing zeros.
6766     HadEvenDivisor |= (K != 0);
6767     // D is a power-of-two if D0 is one.
6768     // If all divisors are power-of-two, we will prefer to avoid the fold.
6769     AllDivisorsArePowerOfTwo &= D0.isOne();
6770 
6771     // P = inv(D0, 2^W)
6772     // 2^W requires W + 1 bits, so we have to extend and then truncate.
6773     unsigned W = D.getBitWidth();
6774     APInt P = D0.multiplicativeInverse();
6775     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6776 
6777     // Q = floor((2^W - 1) u/ D)
6778     // R = ((2^W - 1) u% D)
6779     APInt Q, R;
6780     APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6781 
6782     // If we are comparing with zero, then that comparison constant is okay,
6783     // else it may need to be one less than that.
6784     if (Cmp.ugt(R))
6785       Q -= 1;
6786 
6787     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6788            "We are expecting that K is always less than all-ones for ShSVT");
6789 
6790     // If the lane is tautological the result can be constant-folded.
6791     if (TautologicalLane) {
6792       // Set P and K amount to a bogus values so we can try to splat them.
6793       P = 0;
6794       K = -1;
6795       // And ensure that comparison constant is tautological,
6796       // it will always compare true/false.
6797       Q = -1;
6798     }
6799 
6800     PAmts.push_back(DAG.getConstant(P, DL, SVT));
6801     KAmts.push_back(
6802         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
6803                               /*implicitTrunc=*/true),
6804                         DL, ShSVT));
6805     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6806     return true;
6807   };
6808 
6809   SDValue N = REMNode.getOperand(0);
6810   SDValue D = REMNode.getOperand(1);
6811 
6812   // Collect the values from each element.
6813   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6814     return SDValue();
6815 
6816   // If all lanes are tautological, the result can be constant-folded.
6817   if (AllLanesAreTautological)
6818     return SDValue();
6819 
6820   // If this is a urem by a powers-of-two, avoid the fold since it can be
6821   // best implemented as a bit test.
6822   if (AllDivisorsArePowerOfTwo)
6823     return SDValue();
6824 
6825   SDValue PVal, KVal, QVal;
6826   if (D.getOpcode() == ISD::BUILD_VECTOR) {
6827     if (HadTautologicalLanes) {
6828       // Try to turn PAmts into a splat, since we don't care about the values
6829       // that are currently '0'. If we can't, just keep '0'`s.
6830       turnVectorIntoSplatVector(PAmts, isNullConstant);
6831       // Try to turn KAmts into a splat, since we don't care about the values
6832       // that are currently '-1'. If we can't, change them to '0'`s.
6833       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6834                                 DAG.getConstant(0, DL, ShSVT));
6835     }
6836 
6837     PVal = DAG.getBuildVector(VT, DL, PAmts);
6838     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6839     QVal = DAG.getBuildVector(VT, DL, QAmts);
6840   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6841     assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6842            "Expected matchBinaryPredicate to return one element for "
6843            "SPLAT_VECTORs");
6844     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6845     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6846     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6847   } else {
6848     PVal = PAmts[0];
6849     KVal = KAmts[0];
6850     QVal = QAmts[0];
6851   }
6852 
6853   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6854     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6855       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6856     assert(CompTargetNode.getValueType() == N.getValueType() &&
6857            "Expecting that the types on LHS and RHS of comparisons match.");
6858     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6859   }
6860 
6861   // (mul N, P)
6862   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6863   Created.push_back(Op0.getNode());
6864 
6865   // Rotate right only if any divisor was even. We avoid rotates for all-odd
6866   // divisors as a performance improvement, since rotating by 0 is a no-op.
6867   if (HadEvenDivisor) {
6868     // We need ROTR to do this.
6869     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6870       return SDValue();
6871     // UREM: (rotr (mul N, P), K)
6872     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6873     Created.push_back(Op0.getNode());
6874   }
6875 
6876   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6877   SDValue NewCC =
6878       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6879                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6880   if (!HadTautologicalInvertedLanes)
6881     return NewCC;
6882 
6883   // If any lanes previously compared always-false, the NewCC will give
6884   // always-true result for them, so we need to fixup those lanes.
6885   // Or the other way around for inequality predicate.
6886   assert(VT.isVector() && "Can/should only get here for vectors.");
6887   Created.push_back(NewCC.getNode());
6888 
6889   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6890   // if C2 is not less than C1, the comparison is always false.
6891   // But we have produced the comparison that will give the
6892   // opposive tautological answer. So these lanes would need to be fixed up.
6893   SDValue TautologicalInvertedChannels =
6894       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6895   Created.push_back(TautologicalInvertedChannels.getNode());
6896 
6897   // NOTE: we avoid letting illegal types through even if we're before legalize
6898   // ops – legalization has a hard time producing good code for this.
6899   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6900     // If we have a vector select, let's replace the comparison results in the
6901     // affected lanes with the correct tautological result.
6902     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6903                                               DL, SETCCVT, SETCCVT);
6904     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6905                        Replacement, NewCC);
6906   }
6907 
6908   // Else, we can just invert the comparison result in the appropriate lanes.
6909   //
6910   // NOTE: see the note above VSELECT above.
6911   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6912     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6913                        TautologicalInvertedChannels);
6914 
6915   return SDValue(); // Don't know how to lower.
6916 }
6917 
6918 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6919 /// where the divisor is constant and the comparison target is zero,
6920 /// return a DAG expression that will generate the same comparison result
6921 /// using only multiplications, additions and shifts/rotations.
6922 /// Ref: "Hacker's Delight" 10-17.
6923 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6924                                         SDValue CompTargetNode,
6925                                         ISD::CondCode Cond,
6926                                         DAGCombinerInfo &DCI,
6927                                         const SDLoc &DL) const {
6928   SmallVector<SDNode *, 7> Built;
6929   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6930                                          DCI, DL, Built)) {
6931     assert(Built.size() <= 7 && "Max size prediction failed.");
6932     for (SDNode *N : Built)
6933       DCI.AddToWorklist(N);
6934     return Folded;
6935   }
6936 
6937   return SDValue();
6938 }
6939 
6940 SDValue
6941 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6942                                   SDValue CompTargetNode, ISD::CondCode Cond,
6943                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6944                                   SmallVectorImpl<SDNode *> &Created) const {
6945   // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6946   // Fold:
6947   //   (seteq/ne (srem N, D), 0)
6948   // To:
6949   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
6950   //
6951   // - D must be constant, with D = D0 * 2^K where D0 is odd
6952   // - P is the multiplicative inverse of D0 modulo 2^W
6953   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6954   // - Q = floor((2 * A) / (2^K))
6955   // where W is the width of the common type of N and D.
6956   //
6957   // When D is a power of two (and thus D0 is 1), the normal
6958   // formula for A and Q don't apply, because the derivation
6959   // depends on D not dividing 2^(W-1), and thus theorem ZRS
6960   // does not apply. This specifically fails when N = INT_MIN.
6961   //
6962   // Instead, for power-of-two D, we use:
6963   // - A = 2^(W-1)
6964   // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6965   // - Q = 2^(W-K) - 1
6966   // |-> Test that the top K bits are zero after rotation
6967   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6968          "Only applicable for (in)equality comparisons.");
6969 
6970   SelectionDAG &DAG = DCI.DAG;
6971 
6972   EVT VT = REMNode.getValueType();
6973   EVT SVT = VT.getScalarType();
6974   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6975   EVT ShSVT = ShVT.getScalarType();
6976 
6977   // If we are after ops legalization, and MUL is unavailable, we can not
6978   // proceed.
6979   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6980     return SDValue();
6981 
6982   // TODO: Could support comparing with non-zero too.
6983   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6984   if (!CompTarget || !CompTarget->isZero())
6985     return SDValue();
6986 
6987   bool HadIntMinDivisor = false;
6988   bool HadOneDivisor = false;
6989   bool AllDivisorsAreOnes = true;
6990   bool HadEvenDivisor = false;
6991   bool NeedToApplyOffset = false;
6992   bool AllDivisorsArePowerOfTwo = true;
6993   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6994 
6995   auto BuildSREMPattern = [&](ConstantSDNode *C) {
6996     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6997     if (C->isZero())
6998       return false;
6999 
7000     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7001 
7002     // WARNING: this fold is only valid for positive divisors!
7003     APInt D = C->getAPIntValue();
7004     if (D.isNegative())
7005       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
7006 
7007     HadIntMinDivisor |= D.isMinSignedValue();
7008 
7009     // If all divisors are ones, we will prefer to avoid the fold.
7010     HadOneDivisor |= D.isOne();
7011     AllDivisorsAreOnes &= D.isOne();
7012 
7013     // Decompose D into D0 * 2^K
7014     unsigned K = D.countr_zero();
7015     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7016     APInt D0 = D.lshr(K);
7017 
7018     if (!D.isMinSignedValue()) {
7019       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7020       // we don't care about this lane in this fold, we'll special-handle it.
7021       HadEvenDivisor |= (K != 0);
7022     }
7023 
7024     // D is a power-of-two if D0 is one. This includes INT_MIN.
7025     // If all divisors are power-of-two, we will prefer to avoid the fold.
7026     AllDivisorsArePowerOfTwo &= D0.isOne();
7027 
7028     // P = inv(D0, 2^W)
7029     // 2^W requires W + 1 bits, so we have to extend and then truncate.
7030     unsigned W = D.getBitWidth();
7031     APInt P = D0.multiplicativeInverse();
7032     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7033 
7034     // A = floor((2^(W - 1) - 1) / D0) & -2^K
7035     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7036     A.clearLowBits(K);
7037 
7038     if (!D.isMinSignedValue()) {
7039       // If divisor INT_MIN, then we don't care about this lane in this fold,
7040       // we'll special-handle it.
7041       NeedToApplyOffset |= A != 0;
7042     }
7043 
7044     // Q = floor((2 * A) / (2^K))
7045     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7046 
7047     assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7048            "We are expecting that A is always less than all-ones for SVT");
7049     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7050            "We are expecting that K is always less than all-ones for ShSVT");
7051 
7052     // If D was a power of two, apply the alternate constant derivation.
7053     if (D0.isOne()) {
7054       // A = 2^(W-1)
7055       A = APInt::getSignedMinValue(W);
7056       // - Q = 2^(W-K) - 1
7057       Q = APInt::getAllOnes(W - K).zext(W);
7058     }
7059 
7060     // If the divisor is 1 the result can be constant-folded. Likewise, we
7061     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7062     if (D.isOne()) {
7063       // Set P, A and K to a bogus values so we can try to splat them.
7064       P = 0;
7065       A = -1;
7066       K = -1;
7067 
7068       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
7069       Q = -1;
7070     }
7071 
7072     PAmts.push_back(DAG.getConstant(P, DL, SVT));
7073     AAmts.push_back(DAG.getConstant(A, DL, SVT));
7074     KAmts.push_back(
7075         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7076                               /*implicitTrunc=*/true),
7077                         DL, ShSVT));
7078     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7079     return true;
7080   };
7081 
7082   SDValue N = REMNode.getOperand(0);
7083   SDValue D = REMNode.getOperand(1);
7084 
7085   // Collect the values from each element.
7086   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7087     return SDValue();
7088 
7089   // If this is a srem by a one, avoid the fold since it can be constant-folded.
7090   if (AllDivisorsAreOnes)
7091     return SDValue();
7092 
7093   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7094   // since it can be best implemented as a bit test.
7095   if (AllDivisorsArePowerOfTwo)
7096     return SDValue();
7097 
7098   SDValue PVal, AVal, KVal, QVal;
7099   if (D.getOpcode() == ISD::BUILD_VECTOR) {
7100     if (HadOneDivisor) {
7101       // Try to turn PAmts into a splat, since we don't care about the values
7102       // that are currently '0'. If we can't, just keep '0'`s.
7103       turnVectorIntoSplatVector(PAmts, isNullConstant);
7104       // Try to turn AAmts into a splat, since we don't care about the
7105       // values that are currently '-1'. If we can't, change them to '0'`s.
7106       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
7107                                 DAG.getConstant(0, DL, SVT));
7108       // Try to turn KAmts into a splat, since we don't care about the values
7109       // that are currently '-1'. If we can't, change them to '0'`s.
7110       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
7111                                 DAG.getConstant(0, DL, ShSVT));
7112     }
7113 
7114     PVal = DAG.getBuildVector(VT, DL, PAmts);
7115     AVal = DAG.getBuildVector(VT, DL, AAmts);
7116     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7117     QVal = DAG.getBuildVector(VT, DL, QAmts);
7118   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7119     assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7120            QAmts.size() == 1 &&
7121            "Expected matchUnaryPredicate to return one element for scalable "
7122            "vectors");
7123     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7124     AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7125     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7126     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7127   } else {
7128     assert(isa<ConstantSDNode>(D) && "Expected a constant");
7129     PVal = PAmts[0];
7130     AVal = AAmts[0];
7131     KVal = KAmts[0];
7132     QVal = QAmts[0];
7133   }
7134 
7135   // (mul N, P)
7136   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7137   Created.push_back(Op0.getNode());
7138 
7139   if (NeedToApplyOffset) {
7140     // We need ADD to do this.
7141     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7142       return SDValue();
7143 
7144     // (add (mul N, P), A)
7145     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7146     Created.push_back(Op0.getNode());
7147   }
7148 
7149   // Rotate right only if any divisor was even. We avoid rotates for all-odd
7150   // divisors as a performance improvement, since rotating by 0 is a no-op.
7151   if (HadEvenDivisor) {
7152     // We need ROTR to do this.
7153     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7154       return SDValue();
7155     // SREM: (rotr (add (mul N, P), A), K)
7156     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7157     Created.push_back(Op0.getNode());
7158   }
7159 
7160   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7161   SDValue Fold =
7162       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7163                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7164 
7165   // If we didn't have lanes with INT_MIN divisor, then we're done.
7166   if (!HadIntMinDivisor)
7167     return Fold;
7168 
7169   // That fold is only valid for positive divisors. Which effectively means,
7170   // it is invalid for INT_MIN divisors. So if we have such a lane,
7171   // we must fix-up results for said lanes.
7172   assert(VT.isVector() && "Can/should only get here for vectors.");
7173 
7174   // NOTE: we avoid letting illegal types through even if we're before legalize
7175   // ops – legalization has a hard time producing good code for the code that
7176   // follows.
7177   if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7178       !isOperationLegalOrCustom(ISD::AND, VT) ||
7179       !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
7180       !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
7181     return SDValue();
7182 
7183   Created.push_back(Fold.getNode());
7184 
7185   SDValue IntMin = DAG.getConstant(
7186       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
7187   SDValue IntMax = DAG.getConstant(
7188       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
7189   SDValue Zero =
7190       DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
7191 
7192   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7193   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7194   Created.push_back(DivisorIsIntMin.getNode());
7195 
7196   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
7197   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7198   Created.push_back(Masked.getNode());
7199   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7200   Created.push_back(MaskedIsZero.getNode());
7201 
7202   // To produce final result we need to blend 2 vectors: 'SetCC' and
7203   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7204   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7205   // constant-folded, select can get lowered to a shuffle with constant mask.
7206   SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7207                                 MaskedIsZero, Fold);
7208 
7209   return Blended;
7210 }
7211 
7212 bool TargetLowering::
7213 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7214   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7215     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7216                                 "be a constant integer");
7217     return true;
7218   }
7219 
7220   return false;
7221 }
7222 
7223 SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7224                                          const DenormalMode &Mode) const {
7225   SDLoc DL(Op);
7226   EVT VT = Op.getValueType();
7227   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7228   SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7229 
7230   // This is specifically a check for the handling of denormal inputs, not the
7231   // result.
7232   if (Mode.Input == DenormalMode::PreserveSign ||
7233       Mode.Input == DenormalMode::PositiveZero) {
7234     // Test = X == 0.0
7235     return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7236   }
7237 
7238   // Testing it with denormal inputs to avoid wrong estimate.
7239   //
7240   // Test = fabs(X) < SmallestNormal
7241   const fltSemantics &FltSem = VT.getFltSemantics();
7242   APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7243   SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7244   SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7245   return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7246 }
7247 
7248 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7249                                              bool LegalOps, bool OptForSize,
7250                                              NegatibleCost &Cost,
7251                                              unsigned Depth) const {
7252   // fneg is removable even if it has multiple uses.
7253   if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7254     Cost = NegatibleCost::Cheaper;
7255     return Op.getOperand(0);
7256   }
7257 
7258   // Don't recurse exponentially.
7259   if (Depth > SelectionDAG::MaxRecursionDepth)
7260     return SDValue();
7261 
7262   // Pre-increment recursion depth for use in recursive calls.
7263   ++Depth;
7264   const SDNodeFlags Flags = Op->getFlags();
7265   const TargetOptions &Options = DAG.getTarget().Options;
7266   EVT VT = Op.getValueType();
7267   unsigned Opcode = Op.getOpcode();
7268 
7269   // Don't allow anything with multiple uses unless we know it is free.
7270   if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7271     bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7272                         isFPExtFree(VT, Op.getOperand(0).getValueType());
7273     if (!IsFreeExtend)
7274       return SDValue();
7275   }
7276 
7277   auto RemoveDeadNode = [&](SDValue N) {
7278     if (N && N.getNode()->use_empty())
7279       DAG.RemoveDeadNode(N.getNode());
7280   };
7281 
7282   SDLoc DL(Op);
7283 
7284   // Because getNegatedExpression can delete nodes we need a handle to keep
7285   // temporary nodes alive in case the recursion manages to create an identical
7286   // node.
7287   std::list<HandleSDNode> Handles;
7288 
7289   switch (Opcode) {
7290   case ISD::ConstantFP: {
7291     // Don't invert constant FP values after legalization unless the target says
7292     // the negated constant is legal.
7293     bool IsOpLegal =
7294         isOperationLegal(ISD::ConstantFP, VT) ||
7295         isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7296                      OptForSize);
7297 
7298     if (LegalOps && !IsOpLegal)
7299       break;
7300 
7301     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7302     V.changeSign();
7303     SDValue CFP = DAG.getConstantFP(V, DL, VT);
7304 
7305     // If we already have the use of the negated floating constant, it is free
7306     // to negate it even it has multiple uses.
7307     if (!Op.hasOneUse() && CFP.use_empty())
7308       break;
7309     Cost = NegatibleCost::Neutral;
7310     return CFP;
7311   }
7312   case ISD::BUILD_VECTOR: {
7313     // Only permit BUILD_VECTOR of constants.
7314     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7315           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7316         }))
7317       break;
7318 
7319     bool IsOpLegal =
7320         (isOperationLegal(ISD::ConstantFP, VT) &&
7321          isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
7322         llvm::all_of(Op->op_values(), [&](SDValue N) {
7323           return N.isUndef() ||
7324                  isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7325                               OptForSize);
7326         });
7327 
7328     if (LegalOps && !IsOpLegal)
7329       break;
7330 
7331     SmallVector<SDValue, 4> Ops;
7332     for (SDValue C : Op->op_values()) {
7333       if (C.isUndef()) {
7334         Ops.push_back(C);
7335         continue;
7336       }
7337       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7338       V.changeSign();
7339       Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7340     }
7341     Cost = NegatibleCost::Neutral;
7342     return DAG.getBuildVector(VT, DL, Ops);
7343   }
7344   case ISD::FADD: {
7345     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7346       break;
7347 
7348     // After operation legalization, it might not be legal to create new FSUBs.
7349     if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7350       break;
7351     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7352 
7353     // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7354     NegatibleCost CostX = NegatibleCost::Expensive;
7355     SDValue NegX =
7356         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7357     // Prevent this node from being deleted by the next call.
7358     if (NegX)
7359       Handles.emplace_back(NegX);
7360 
7361     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7362     NegatibleCost CostY = NegatibleCost::Expensive;
7363     SDValue NegY =
7364         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7365 
7366     // We're done with the handles.
7367     Handles.clear();
7368 
7369     // Negate the X if its cost is less or equal than Y.
7370     if (NegX && (CostX <= CostY)) {
7371       Cost = CostX;
7372       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7373       if (NegY != N)
7374         RemoveDeadNode(NegY);
7375       return N;
7376     }
7377 
7378     // Negate the Y if it is not expensive.
7379     if (NegY) {
7380       Cost = CostY;
7381       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7382       if (NegX != N)
7383         RemoveDeadNode(NegX);
7384       return N;
7385     }
7386     break;
7387   }
7388   case ISD::FSUB: {
7389     // We can't turn -(A-B) into B-A when we honor signed zeros.
7390     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7391       break;
7392 
7393     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7394     // fold (fneg (fsub 0, Y)) -> Y
7395     if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7396       if (C->isZero()) {
7397         Cost = NegatibleCost::Cheaper;
7398         return Y;
7399       }
7400 
7401     // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7402     Cost = NegatibleCost::Neutral;
7403     return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7404   }
7405   case ISD::FMUL:
7406   case ISD::FDIV: {
7407     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7408 
7409     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7410     NegatibleCost CostX = NegatibleCost::Expensive;
7411     SDValue NegX =
7412         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7413     // Prevent this node from being deleted by the next call.
7414     if (NegX)
7415       Handles.emplace_back(NegX);
7416 
7417     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7418     NegatibleCost CostY = NegatibleCost::Expensive;
7419     SDValue NegY =
7420         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7421 
7422     // We're done with the handles.
7423     Handles.clear();
7424 
7425     // Negate the X if its cost is less or equal than Y.
7426     if (NegX && (CostX <= CostY)) {
7427       Cost = CostX;
7428       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7429       if (NegY != N)
7430         RemoveDeadNode(NegY);
7431       return N;
7432     }
7433 
7434     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7435     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7436       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7437         break;
7438 
7439     // Negate the Y if it is not expensive.
7440     if (NegY) {
7441       Cost = CostY;
7442       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7443       if (NegX != N)
7444         RemoveDeadNode(NegX);
7445       return N;
7446     }
7447     break;
7448   }
7449   case ISD::FMA:
7450   case ISD::FMAD: {
7451     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7452       break;
7453 
7454     SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7455     NegatibleCost CostZ = NegatibleCost::Expensive;
7456     SDValue NegZ =
7457         getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7458     // Give up if fail to negate the Z.
7459     if (!NegZ)
7460       break;
7461 
7462     // Prevent this node from being deleted by the next two calls.
7463     Handles.emplace_back(NegZ);
7464 
7465     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7466     NegatibleCost CostX = NegatibleCost::Expensive;
7467     SDValue NegX =
7468         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7469     // Prevent this node from being deleted by the next call.
7470     if (NegX)
7471       Handles.emplace_back(NegX);
7472 
7473     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7474     NegatibleCost CostY = NegatibleCost::Expensive;
7475     SDValue NegY =
7476         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7477 
7478     // We're done with the handles.
7479     Handles.clear();
7480 
7481     // Negate the X if its cost is less or equal than Y.
7482     if (NegX && (CostX <= CostY)) {
7483       Cost = std::min(CostX, CostZ);
7484       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7485       if (NegY != N)
7486         RemoveDeadNode(NegY);
7487       return N;
7488     }
7489 
7490     // Negate the Y if it is not expensive.
7491     if (NegY) {
7492       Cost = std::min(CostY, CostZ);
7493       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7494       if (NegX != N)
7495         RemoveDeadNode(NegX);
7496       return N;
7497     }
7498     break;
7499   }
7500 
7501   case ISD::FP_EXTEND:
7502   case ISD::FSIN:
7503     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7504                                             OptForSize, Cost, Depth))
7505       return DAG.getNode(Opcode, DL, VT, NegV);
7506     break;
7507   case ISD::FP_ROUND:
7508     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7509                                             OptForSize, Cost, Depth))
7510       return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7511     break;
7512   case ISD::SELECT:
7513   case ISD::VSELECT: {
7514     // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7515     // iff at least one cost is cheaper and the other is neutral/cheaper
7516     SDValue LHS = Op.getOperand(1);
7517     NegatibleCost CostLHS = NegatibleCost::Expensive;
7518     SDValue NegLHS =
7519         getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7520     if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7521       RemoveDeadNode(NegLHS);
7522       break;
7523     }
7524 
7525     // Prevent this node from being deleted by the next call.
7526     Handles.emplace_back(NegLHS);
7527 
7528     SDValue RHS = Op.getOperand(2);
7529     NegatibleCost CostRHS = NegatibleCost::Expensive;
7530     SDValue NegRHS =
7531         getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7532 
7533     // We're done with the handles.
7534     Handles.clear();
7535 
7536     if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7537         (CostLHS != NegatibleCost::Cheaper &&
7538          CostRHS != NegatibleCost::Cheaper)) {
7539       RemoveDeadNode(NegLHS);
7540       RemoveDeadNode(NegRHS);
7541       break;
7542     }
7543 
7544     Cost = std::min(CostLHS, CostRHS);
7545     return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7546   }
7547   }
7548 
7549   return SDValue();
7550 }
7551 
7552 //===----------------------------------------------------------------------===//
7553 // Legalization Utilities
7554 //===----------------------------------------------------------------------===//
7555 
7556 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7557                                     SDValue LHS, SDValue RHS,
7558                                     SmallVectorImpl<SDValue> &Result,
7559                                     EVT HiLoVT, SelectionDAG &DAG,
7560                                     MulExpansionKind Kind, SDValue LL,
7561                                     SDValue LH, SDValue RL, SDValue RH) const {
7562   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7563          Opcode == ISD::SMUL_LOHI);
7564 
7565   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7566                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
7567   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7568                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
7569   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7570                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
7571   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7572                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
7573 
7574   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7575     return false;
7576 
7577   unsigned OuterBitSize = VT.getScalarSizeInBits();
7578   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7579 
7580   // LL, LH, RL, and RH must be either all NULL or all set to a value.
7581   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7582          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7583 
7584   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7585   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7586                           bool Signed) -> bool {
7587     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7588       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7589       Hi = SDValue(Lo.getNode(), 1);
7590       return true;
7591     }
7592     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7593       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7594       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7595       return true;
7596     }
7597     return false;
7598   };
7599 
7600   SDValue Lo, Hi;
7601 
7602   if (!LL.getNode() && !RL.getNode() &&
7603       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7604     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7605     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7606   }
7607 
7608   if (!LL.getNode())
7609     return false;
7610 
7611   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7612   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7613       DAG.MaskedValueIsZero(RHS, HighMask)) {
7614     // The inputs are both zero-extended.
7615     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7616       Result.push_back(Lo);
7617       Result.push_back(Hi);
7618       if (Opcode != ISD::MUL) {
7619         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7620         Result.push_back(Zero);
7621         Result.push_back(Zero);
7622       }
7623       return true;
7624     }
7625   }
7626 
7627   if (!VT.isVector() && Opcode == ISD::MUL &&
7628       DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7629       DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7630     // The input values are both sign-extended.
7631     // TODO non-MUL case?
7632     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7633       Result.push_back(Lo);
7634       Result.push_back(Hi);
7635       return true;
7636     }
7637   }
7638 
7639   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7640   SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7641 
7642   if (!LH.getNode() && !RH.getNode() &&
7643       isOperationLegalOrCustom(ISD::SRL, VT) &&
7644       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7645     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7646     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7647     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7648     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7649   }
7650 
7651   if (!LH.getNode())
7652     return false;
7653 
7654   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7655     return false;
7656 
7657   Result.push_back(Lo);
7658 
7659   if (Opcode == ISD::MUL) {
7660     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7661     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7662     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7663     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7664     Result.push_back(Hi);
7665     return true;
7666   }
7667 
7668   // Compute the full width result.
7669   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7670     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7671     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7672     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7673     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7674   };
7675 
7676   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7677   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7678     return false;
7679 
7680   // This is effectively the add part of a multiply-add of half-sized operands,
7681   // so it cannot overflow.
7682   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7683 
7684   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7685     return false;
7686 
7687   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7688   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7689 
7690   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7691                   isOperationLegalOrCustom(ISD::ADDE, VT));
7692   if (UseGlue)
7693     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7694                        Merge(Lo, Hi));
7695   else
7696     Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7697                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7698 
7699   SDValue Carry = Next.getValue(1);
7700   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7701   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7702 
7703   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7704     return false;
7705 
7706   if (UseGlue)
7707     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7708                      Carry);
7709   else
7710     Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7711                      Zero, Carry);
7712 
7713   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7714 
7715   if (Opcode == ISD::SMUL_LOHI) {
7716     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7717                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7718     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7719 
7720     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7721                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7722     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7723   }
7724 
7725   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7726   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7727   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7728   return true;
7729 }
7730 
7731 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7732                                SelectionDAG &DAG, MulExpansionKind Kind,
7733                                SDValue LL, SDValue LH, SDValue RL,
7734                                SDValue RH) const {
7735   SmallVector<SDValue, 2> Result;
7736   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7737                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7738                            DAG, Kind, LL, LH, RL, RH);
7739   if (Ok) {
7740     assert(Result.size() == 2);
7741     Lo = Result[0];
7742     Hi = Result[1];
7743   }
7744   return Ok;
7745 }
7746 
7747 // Optimize unsigned division or remainder by constants for types twice as large
7748 // as a legal VT.
7749 //
7750 // If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7751 // can be computed
7752 // as:
7753 //   Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7754 //   Remainder = Sum % Constant
7755 // This is based on "Remainder by Summing Digits" from Hacker's Delight.
7756 //
7757 // For division, we can compute the remainder using the algorithm described
7758 // above, subtract it from the dividend to get an exact multiple of Constant.
7759 // Then multiply that exact multiply by the multiplicative inverse modulo
7760 // (1 << (BitWidth / 2)) to get the quotient.
7761 
7762 // If Constant is even, we can shift right the dividend and the divisor by the
7763 // number of trailing zeros in Constant before applying the remainder algorithm.
7764 // If we're after the quotient, we can subtract this value from the shifted
7765 // dividend and multiply by the multiplicative inverse of the shifted divisor.
7766 // If we want the remainder, we shift the value left by the number of trailing
7767 // zeros and add the bits that were shifted out of the dividend.
7768 bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7769                                             SmallVectorImpl<SDValue> &Result,
7770                                             EVT HiLoVT, SelectionDAG &DAG,
7771                                             SDValue LL, SDValue LH) const {
7772   unsigned Opcode = N->getOpcode();
7773   EVT VT = N->getValueType(0);
7774 
7775   // TODO: Support signed division/remainder.
7776   if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7777     return false;
7778   assert(
7779       (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7780       "Unexpected opcode");
7781 
7782   auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7783   if (!CN)
7784     return false;
7785 
7786   APInt Divisor = CN->getAPIntValue();
7787   unsigned BitWidth = Divisor.getBitWidth();
7788   unsigned HBitWidth = BitWidth / 2;
7789   assert(VT.getScalarSizeInBits() == BitWidth &&
7790          HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7791 
7792   // Divisor needs to less than (1 << HBitWidth).
7793   APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7794   if (Divisor.uge(HalfMaxPlus1))
7795     return false;
7796 
7797   // We depend on the UREM by constant optimization in DAGCombiner that requires
7798   // high multiply.
7799   if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7800       !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
7801     return false;
7802 
7803   // Don't expand if optimizing for size.
7804   if (DAG.shouldOptForSize())
7805     return false;
7806 
7807   // Early out for 0 or 1 divisors.
7808   if (Divisor.ule(1))
7809     return false;
7810 
7811   // If the divisor is even, shift it until it becomes odd.
7812   unsigned TrailingZeros = 0;
7813   if (!Divisor[0]) {
7814     TrailingZeros = Divisor.countr_zero();
7815     Divisor.lshrInPlace(TrailingZeros);
7816   }
7817 
7818   SDLoc dl(N);
7819   SDValue Sum;
7820   SDValue PartialRem;
7821 
7822   // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7823   // then add in the carry.
7824   // TODO: If we can't split it in half, we might be able to split into 3 or
7825   // more pieces using a smaller bit width.
7826   if (HalfMaxPlus1.urem(Divisor).isOne()) {
7827     assert(!LL == !LH && "Expected both input halves or no input halves!");
7828     if (!LL)
7829       std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7830 
7831     // Shift the input by the number of TrailingZeros in the divisor. The
7832     // shifted out bits will be added to the remainder later.
7833     if (TrailingZeros) {
7834       // Save the shifted off bits if we need the remainder.
7835       if (Opcode != ISD::UDIV) {
7836         APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7837         PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7838                                  DAG.getConstant(Mask, dl, HiLoVT));
7839       }
7840 
7841       LL = DAG.getNode(
7842           ISD::OR, dl, HiLoVT,
7843           DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7844                       DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7845           DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7846                       DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7847                                                  HiLoVT, dl)));
7848       LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7849                        DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7850     }
7851 
7852     // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7853     EVT SetCCType =
7854         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7855     if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
7856       SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7857       Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7858       Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7859                         DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7860     } else {
7861       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7862       SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7863       // If the boolean for the target is 0 or 1, we can add the setcc result
7864       // directly.
7865       if (getBooleanContents(HiLoVT) ==
7866           TargetLoweringBase::ZeroOrOneBooleanContent)
7867         Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7868       else
7869         Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7870                               DAG.getConstant(0, dl, HiLoVT));
7871       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7872     }
7873   }
7874 
7875   // If we didn't find a sum, we can't do the expansion.
7876   if (!Sum)
7877     return false;
7878 
7879   // Perform a HiLoVT urem on the Sum using truncated divisor.
7880   SDValue RemL =
7881       DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7882                   DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7883   SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7884 
7885   if (Opcode != ISD::UREM) {
7886     // Subtract the remainder from the shifted dividend.
7887     SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7888     SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7889 
7890     Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7891 
7892     // Multiply by the multiplicative inverse of the divisor modulo
7893     // (1 << BitWidth).
7894     APInt MulFactor = Divisor.multiplicativeInverse();
7895 
7896     SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7897                                    DAG.getConstant(MulFactor, dl, VT));
7898 
7899     // Split the quotient into low and high parts.
7900     SDValue QuotL, QuotH;
7901     std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7902     Result.push_back(QuotL);
7903     Result.push_back(QuotH);
7904   }
7905 
7906   if (Opcode != ISD::UDIV) {
7907     // If we shifted the input, shift the remainder left and add the bits we
7908     // shifted off the input.
7909     if (TrailingZeros) {
7910       APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7911       RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7912                          DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7913       RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7914     }
7915     Result.push_back(RemL);
7916     Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7917   }
7918 
7919   return true;
7920 }
7921 
7922 // Check that (every element of) Z is undef or not an exact multiple of BW.
7923 static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7924   return ISD::matchUnaryPredicate(
7925       Z,
7926       [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7927       true);
7928 }
7929 
7930 static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7931   EVT VT = Node->getValueType(0);
7932   SDValue ShX, ShY;
7933   SDValue ShAmt, InvShAmt;
7934   SDValue X = Node->getOperand(0);
7935   SDValue Y = Node->getOperand(1);
7936   SDValue Z = Node->getOperand(2);
7937   SDValue Mask = Node->getOperand(3);
7938   SDValue VL = Node->getOperand(4);
7939 
7940   unsigned BW = VT.getScalarSizeInBits();
7941   bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7942   SDLoc DL(SDValue(Node, 0));
7943 
7944   EVT ShVT = Z.getValueType();
7945   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7946     // fshl: X << C | Y >> (BW - C)
7947     // fshr: X << (BW - C) | Y >> C
7948     // where C = Z % BW is not zero
7949     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7950     ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7951     InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7952     ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7953                       VL);
7954     ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7955                       VL);
7956   } else {
7957     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7958     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7959     SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7960     if (isPowerOf2_32(BW)) {
7961       // Z % BW -> Z & (BW - 1)
7962       ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7963       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7964       SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7965                                  DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7966       InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7967     } else {
7968       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7969       ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7970       InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7971     }
7972 
7973     SDValue One = DAG.getConstant(1, DL, ShVT);
7974     if (IsFSHL) {
7975       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7976       SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
7977       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
7978     } else {
7979       SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7980       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7981       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
7982     }
7983   }
7984   return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7985 }
7986 
7987 SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7988                                           SelectionDAG &DAG) const {
7989   if (Node->isVPOpcode())
7990     return expandVPFunnelShift(Node, DAG);
7991 
7992   EVT VT = Node->getValueType(0);
7993 
7994   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7995                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
7996                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
7997                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
7998     return SDValue();
7999 
8000   SDValue X = Node->getOperand(0);
8001   SDValue Y = Node->getOperand(1);
8002   SDValue Z = Node->getOperand(2);
8003 
8004   unsigned BW = VT.getScalarSizeInBits();
8005   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8006   SDLoc DL(SDValue(Node, 0));
8007 
8008   EVT ShVT = Z.getValueType();
8009 
8010   // If a funnel shift in the other direction is more supported, use it.
8011   unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8012   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8013       isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8014     if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8015       // fshl X, Y, Z -> fshr X, Y, -Z
8016       // fshr X, Y, Z -> fshl X, Y, -Z
8017       SDValue Zero = DAG.getConstant(0, DL, ShVT);
8018       Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8019     } else {
8020       // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8021       // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8022       SDValue One = DAG.getConstant(1, DL, ShVT);
8023       if (IsFSHL) {
8024         Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8025         X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8026       } else {
8027         X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8028         Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8029       }
8030       Z = DAG.getNOT(DL, Z, ShVT);
8031     }
8032     return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8033   }
8034 
8035   SDValue ShX, ShY;
8036   SDValue ShAmt, InvShAmt;
8037   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8038     // fshl: X << C | Y >> (BW - C)
8039     // fshr: X << (BW - C) | Y >> C
8040     // where C = Z % BW is not zero
8041     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8042     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8043     InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8044     ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8045     ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8046   } else {
8047     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8048     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8049     SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8050     if (isPowerOf2_32(BW)) {
8051       // Z % BW -> Z & (BW - 1)
8052       ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8053       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8054       InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8055     } else {
8056       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8057       ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8058       InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8059     }
8060 
8061     SDValue One = DAG.getConstant(1, DL, ShVT);
8062     if (IsFSHL) {
8063       ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8064       SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8065       ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8066     } else {
8067       SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8068       ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8069       ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8070     }
8071   }
8072   return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8073 }
8074 
8075 // TODO: Merge with expandFunnelShift.
8076 SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8077                                   SelectionDAG &DAG) const {
8078   EVT VT = Node->getValueType(0);
8079   unsigned EltSizeInBits = VT.getScalarSizeInBits();
8080   bool IsLeft = Node->getOpcode() == ISD::ROTL;
8081   SDValue Op0 = Node->getOperand(0);
8082   SDValue Op1 = Node->getOperand(1);
8083   SDLoc DL(SDValue(Node, 0));
8084 
8085   EVT ShVT = Op1.getValueType();
8086   SDValue Zero = DAG.getConstant(0, DL, ShVT);
8087 
8088   // If a rotate in the other direction is more supported, use it.
8089   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8090   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8091       isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8092     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8093     return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8094   }
8095 
8096   if (!AllowVectorOps && VT.isVector() &&
8097       (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8098        !isOperationLegalOrCustom(ISD::SRL, VT) ||
8099        !isOperationLegalOrCustom(ISD::SUB, VT) ||
8100        !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
8101        !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
8102     return SDValue();
8103 
8104   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8105   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8106   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8107   SDValue ShVal;
8108   SDValue HsVal;
8109   if (isPowerOf2_32(EltSizeInBits)) {
8110     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8111     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8112     SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8113     SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8114     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8115     SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8116     HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8117   } else {
8118     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8119     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8120     SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8121     SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8122     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8123     SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8124     SDValue One = DAG.getConstant(1, DL, ShVT);
8125     HsVal =
8126         DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8127   }
8128   return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8129 }
8130 
8131 void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8132                                       SelectionDAG &DAG) const {
8133   assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8134   EVT VT = Node->getValueType(0);
8135   unsigned VTBits = VT.getScalarSizeInBits();
8136   assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8137 
8138   bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8139   bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8140   SDValue ShOpLo = Node->getOperand(0);
8141   SDValue ShOpHi = Node->getOperand(1);
8142   SDValue ShAmt = Node->getOperand(2);
8143   EVT ShAmtVT = ShAmt.getValueType();
8144   EVT ShAmtCCVT =
8145       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8146   SDLoc dl(Node);
8147 
8148   // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8149   // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8150   // away during isel.
8151   SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8152                                   DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8153   SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8154                                      DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8155                        : DAG.getConstant(0, dl, VT);
8156 
8157   SDValue Tmp2, Tmp3;
8158   if (IsSHL) {
8159     Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8160     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8161   } else {
8162     Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8163     Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8164   }
8165 
8166   // If the shift amount is larger or equal than the width of a part we don't
8167   // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8168   // values for large shift amounts.
8169   SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8170                                 DAG.getConstant(VTBits, dl, ShAmtVT));
8171   SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8172                               DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8173 
8174   if (IsSHL) {
8175     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8176     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8177   } else {
8178     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8179     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8180   }
8181 }
8182 
8183 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8184                                       SelectionDAG &DAG) const {
8185   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8186   SDValue Src = Node->getOperand(OpNo);
8187   EVT SrcVT = Src.getValueType();
8188   EVT DstVT = Node->getValueType(0);
8189   SDLoc dl(SDValue(Node, 0));
8190 
8191   // FIXME: Only f32 to i64 conversions are supported.
8192   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8193     return false;
8194 
8195   if (Node->isStrictFPOpcode())
8196     // When a NaN is converted to an integer a trap is allowed. We can't
8197     // use this expansion here because it would eliminate that trap. Other
8198     // traps are also allowed and cannot be eliminated. See
8199     // IEEE 754-2008 sec 5.8.
8200     return false;
8201 
8202   // Expand f32 -> i64 conversion
8203   // This algorithm comes from compiler-rt's implementation of fixsfdi:
8204   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8205   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8206   EVT IntVT = SrcVT.changeTypeToInteger();
8207   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8208 
8209   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8210   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8211   SDValue Bias = DAG.getConstant(127, dl, IntVT);
8212   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8213   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8214   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8215 
8216   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8217 
8218   SDValue ExponentBits = DAG.getNode(
8219       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8220       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8221   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8222 
8223   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8224                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8225                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8226   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8227 
8228   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8229                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8230                           DAG.getConstant(0x00800000, dl, IntVT));
8231 
8232   R = DAG.getZExtOrTrunc(R, dl, DstVT);
8233 
8234   R = DAG.getSelectCC(
8235       dl, Exponent, ExponentLoBit,
8236       DAG.getNode(ISD::SHL, dl, DstVT, R,
8237                   DAG.getZExtOrTrunc(
8238                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8239                       dl, IntShVT)),
8240       DAG.getNode(ISD::SRL, dl, DstVT, R,
8241                   DAG.getZExtOrTrunc(
8242                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8243                       dl, IntShVT)),
8244       ISD::SETGT);
8245 
8246   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8247                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8248 
8249   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8250                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8251   return true;
8252 }
8253 
8254 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8255                                       SDValue &Chain,
8256                                       SelectionDAG &DAG) const {
8257   SDLoc dl(SDValue(Node, 0));
8258   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8259   SDValue Src = Node->getOperand(OpNo);
8260 
8261   EVT SrcVT = Src.getValueType();
8262   EVT DstVT = Node->getValueType(0);
8263   EVT SetCCVT =
8264       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8265   EVT DstSetCCVT =
8266       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8267 
8268   // Only expand vector types if we have the appropriate vector bit operations.
8269   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8270                                                    ISD::FP_TO_SINT;
8271   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8272                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
8273     return false;
8274 
8275   // If the maximum float value is smaller then the signed integer range,
8276   // the destination signmask can't be represented by the float, so we can
8277   // just use FP_TO_SINT directly.
8278   const fltSemantics &APFSem = SrcVT.getFltSemantics();
8279   APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8280   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8281   if (APFloat::opOverflow &
8282       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8283     if (Node->isStrictFPOpcode()) {
8284       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8285                            { Node->getOperand(0), Src });
8286       Chain = Result.getValue(1);
8287     } else
8288       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8289     return true;
8290   }
8291 
8292   // Don't expand it if there isn't cheap fsub instruction.
8293   if (!isOperationLegalOrCustom(
8294           Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8295     return false;
8296 
8297   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8298   SDValue Sel;
8299 
8300   if (Node->isStrictFPOpcode()) {
8301     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8302                        Node->getOperand(0), /*IsSignaling*/ true);
8303     Chain = Sel.getValue(1);
8304   } else {
8305     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8306   }
8307 
8308   bool Strict = Node->isStrictFPOpcode() ||
8309                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8310 
8311   if (Strict) {
8312     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8313     // signmask then offset (the result of which should be fully representable).
8314     // Sel = Src < 0x8000000000000000
8315     // FltOfs = select Sel, 0, 0x8000000000000000
8316     // IntOfs = select Sel, 0, 0x8000000000000000
8317     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8318 
8319     // TODO: Should any fast-math-flags be set for the FSUB?
8320     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8321                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8322     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8323     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8324                                    DAG.getConstant(0, dl, DstVT),
8325                                    DAG.getConstant(SignMask, dl, DstVT));
8326     SDValue SInt;
8327     if (Node->isStrictFPOpcode()) {
8328       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8329                                 { Chain, Src, FltOfs });
8330       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8331                          { Val.getValue(1), Val });
8332       Chain = SInt.getValue(1);
8333     } else {
8334       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8335       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8336     }
8337     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8338   } else {
8339     // Expand based on maximum range of FP_TO_SINT:
8340     // True = fp_to_sint(Src)
8341     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8342     // Result = select (Src < 0x8000000000000000), True, False
8343 
8344     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8345     // TODO: Should any fast-math-flags be set for the FSUB?
8346     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8347                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8348     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8349                         DAG.getConstant(SignMask, dl, DstVT));
8350     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8351     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8352   }
8353   return true;
8354 }
8355 
8356 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8357                                       SDValue &Chain, SelectionDAG &DAG) const {
8358   // This transform is not correct for converting 0 when rounding mode is set
8359   // to round toward negative infinity which will produce -0.0. So disable
8360   // under strictfp.
8361   if (Node->isStrictFPOpcode())
8362     return false;
8363 
8364   SDValue Src = Node->getOperand(0);
8365   EVT SrcVT = Src.getValueType();
8366   EVT DstVT = Node->getValueType(0);
8367 
8368   // If the input is known to be non-negative and SINT_TO_FP is legal then use
8369   // it.
8370   if (Node->getFlags().hasNonNeg() &&
8371       isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT)) {
8372     Result =
8373         DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8374     return true;
8375   }
8376 
8377   if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8378     return false;
8379 
8380   // Only expand vector types if we have the appropriate vector bit
8381   // operations.
8382   if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8383                            !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
8384                            !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
8385                            !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
8386                            !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
8387     return false;
8388 
8389   SDLoc dl(SDValue(Node, 0));
8390   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8391 
8392   // Implementation of unsigned i64 to f64 following the algorithm in
8393   // __floatundidf in compiler_rt.  This implementation performs rounding
8394   // correctly in all rounding modes with the exception of converting 0
8395   // when rounding toward negative infinity. In that case the fsub will
8396   // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8397   // incorrect.
8398   SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8399   SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8400       llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8401   SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8402   SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8403   SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8404 
8405   SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8406   SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8407   SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8408   SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8409   SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8410   SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8411   SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8412   Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8413   return true;
8414 }
8415 
8416 SDValue
8417 TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8418                                                SelectionDAG &DAG) const {
8419   unsigned Opcode = Node->getOpcode();
8420   assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8421           Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8422          "Wrong opcode");
8423 
8424   if (Node->getFlags().hasNoNaNs()) {
8425     ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8426     EVT VT = Node->getValueType(0);
8427     if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8428          !isOperationLegalOrCustom(ISD::VSELECT, VT)) &&
8429         VT.isVector())
8430       return SDValue();
8431     SDValue Op1 = Node->getOperand(0);
8432     SDValue Op2 = Node->getOperand(1);
8433     SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8434     // Copy FMF flags, but always set the no-signed-zeros flag
8435     // as this is implied by the FMINNUM/FMAXNUM semantics.
8436     SelCC->setFlags(Node->getFlags() | SDNodeFlags::NoSignedZeros);
8437     return SelCC;
8438   }
8439 
8440   return SDValue();
8441 }
8442 
8443 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8444                                               SelectionDAG &DAG) const {
8445   if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8446     return Expanded;
8447 
8448   EVT VT = Node->getValueType(0);
8449   if (VT.isScalableVector())
8450     report_fatal_error(
8451         "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8452 
8453   SDLoc dl(Node);
8454   unsigned NewOp =
8455       Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8456 
8457   if (isOperationLegalOrCustom(NewOp, VT)) {
8458     SDValue Quiet0 = Node->getOperand(0);
8459     SDValue Quiet1 = Node->getOperand(1);
8460 
8461     if (!Node->getFlags().hasNoNaNs()) {
8462       // Insert canonicalizes if it's possible we need to quiet to get correct
8463       // sNaN behavior.
8464       if (!DAG.isKnownNeverSNaN(Quiet0)) {
8465         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8466                              Node->getFlags());
8467       }
8468       if (!DAG.isKnownNeverSNaN(Quiet1)) {
8469         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8470                              Node->getFlags());
8471       }
8472     }
8473 
8474     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8475   }
8476 
8477   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8478   // instead if there are no NaNs and there can't be an incompatible zero
8479   // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8480   if ((Node->getFlags().hasNoNaNs() ||
8481        (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8482         DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8483       (Node->getFlags().hasNoSignedZeros() ||
8484        DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8485        DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8486     unsigned IEEE2018Op =
8487         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8488     if (isOperationLegalOrCustom(IEEE2018Op, VT))
8489       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8490                          Node->getOperand(1), Node->getFlags());
8491   }
8492 
8493   if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8494     return SelCC;
8495 
8496   return SDValue();
8497 }
8498 
8499 SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8500                                                 SelectionDAG &DAG) const {
8501   if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8502     return Expanded;
8503 
8504   SDLoc DL(N);
8505   SDValue LHS = N->getOperand(0);
8506   SDValue RHS = N->getOperand(1);
8507   unsigned Opc = N->getOpcode();
8508   EVT VT = N->getValueType(0);
8509   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8510   bool IsMax = Opc == ISD::FMAXIMUM;
8511   SDNodeFlags Flags = N->getFlags();
8512 
8513   // First, implement comparison not propagating NaN. If no native fmin or fmax
8514   // available, use plain select with setcc instead.
8515   SDValue MinMax;
8516   unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8517   unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8518 
8519   // FIXME: We should probably define fminnum/fmaxnum variants with correct
8520   // signed zero behavior.
8521   bool MinMaxMustRespectOrderedZero = false;
8522 
8523   if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8524     MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8525     MinMaxMustRespectOrderedZero = true;
8526   } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8527     MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8528   } else {
8529     if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8530       return DAG.UnrollVectorOp(N);
8531 
8532     // NaN (if exists) will be propagated later, so orderness doesn't matter.
8533     SDValue Compare =
8534         DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8535     MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8536   }
8537 
8538   // Propagate any NaN of both operands
8539   if (!N->getFlags().hasNoNaNs() &&
8540       (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8541     ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8542                                         APFloat::getNaN(VT.getFltSemantics()));
8543     MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8544                            DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8545   }
8546 
8547   // fminimum/fmaximum requires -0.0 less than +0.0
8548   if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8549       !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8550     SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8551                                   DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8552     SDValue TestZero =
8553         DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8554     SDValue LCmp = DAG.getSelect(
8555         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8556         MinMax, Flags);
8557     SDValue RCmp = DAG.getSelect(
8558         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8559         LCmp, Flags);
8560     MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8561   }
8562 
8563   return MinMax;
8564 }
8565 
8566 SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
8567                                                       SelectionDAG &DAG) const {
8568   SDLoc DL(Node);
8569   SDValue LHS = Node->getOperand(0);
8570   SDValue RHS = Node->getOperand(1);
8571   unsigned Opc = Node->getOpcode();
8572   EVT VT = Node->getValueType(0);
8573   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8574   bool IsMax = Opc == ISD::FMAXIMUMNUM;
8575   const TargetOptions &Options = DAG.getTarget().Options;
8576   SDNodeFlags Flags = Node->getFlags();
8577 
8578   unsigned NewOp =
8579       Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8580 
8581   if (isOperationLegalOrCustom(NewOp, VT)) {
8582     if (!Flags.hasNoNaNs()) {
8583       // Insert canonicalizes if it's possible we need to quiet to get correct
8584       // sNaN behavior.
8585       if (!DAG.isKnownNeverSNaN(LHS)) {
8586         LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8587       }
8588       if (!DAG.isKnownNeverSNaN(RHS)) {
8589         RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8590       }
8591     }
8592 
8593     return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8594   }
8595 
8596   // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8597   // same behaviors for all of other cases: +0.0 vs -0.0 included.
8598   if (Flags.hasNoNaNs() ||
8599       (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8600     unsigned IEEE2019Op =
8601         Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8602     if (isOperationLegalOrCustom(IEEE2019Op, VT))
8603       return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8604   }
8605 
8606   // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8607   // either one for +0.0 vs -0.0.
8608   if ((Flags.hasNoNaNs() ||
8609        (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8610       (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8611        DAG.isKnownNeverZeroFloat(RHS))) {
8612     unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8613     if (isOperationLegalOrCustom(IEEE2008Op, VT))
8614       return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8615   }
8616 
8617   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8618     return DAG.UnrollVectorOp(Node);
8619 
8620   // If only one operand is NaN, override it with another operand.
8621   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8622     LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8623   }
8624   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8625     RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8626   }
8627 
8628   SDValue MinMax =
8629       DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8630   // If MinMax is NaN, let's quiet it.
8631   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
8632       !DAG.isKnownNeverNaN(RHS)) {
8633     MinMax = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
8634   }
8635 
8636   // Fixup signed zero behavior.
8637   if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8638       DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
8639     return MinMax;
8640   }
8641   SDValue TestZero =
8642       DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8643   SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8644                                 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8645   SDValue LCmp = DAG.getSelect(
8646       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8647       MinMax, Flags);
8648   SDValue RCmp = DAG.getSelect(
8649       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8650       Flags);
8651   return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8652 }
8653 
8654 /// Returns a true value if if this FPClassTest can be performed with an ordered
8655 /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8656 /// std::nullopt if it cannot be performed as a compare with 0.
8657 static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8658                                            const fltSemantics &Semantics,
8659                                            const MachineFunction &MF) {
8660   FPClassTest OrderedMask = Test & ~fcNan;
8661   FPClassTest NanTest = Test & fcNan;
8662   bool IsOrdered = NanTest == fcNone;
8663   bool IsUnordered = NanTest == fcNan;
8664 
8665   // Skip cases that are testing for only a qnan or snan.
8666   if (!IsOrdered && !IsUnordered)
8667     return std::nullopt;
8668 
8669   if (OrderedMask == fcZero &&
8670       MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8671     return IsOrdered;
8672   if (OrderedMask == (fcZero | fcSubnormal) &&
8673       MF.getDenormalMode(Semantics).inputsAreZero())
8674     return IsOrdered;
8675   return std::nullopt;
8676 }
8677 
8678 SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8679                                          const FPClassTest OrigTestMask,
8680                                          SDNodeFlags Flags, const SDLoc &DL,
8681                                          SelectionDAG &DAG) const {
8682   EVT OperandVT = Op.getValueType();
8683   assert(OperandVT.isFloatingPoint());
8684   FPClassTest Test = OrigTestMask;
8685 
8686   // Degenerated cases.
8687   if (Test == fcNone)
8688     return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8689   if (Test == fcAllFlags)
8690     return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8691 
8692   // PPC double double is a pair of doubles, of which the higher part determines
8693   // the value class.
8694   if (OperandVT == MVT::ppcf128) {
8695     Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8696                      DAG.getConstant(1, DL, MVT::i32));
8697     OperandVT = MVT::f64;
8698   }
8699 
8700   // Floating-point type properties.
8701   EVT ScalarFloatVT = OperandVT.getScalarType();
8702   const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8703   const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8704   bool IsF80 = (ScalarFloatVT == MVT::f80);
8705 
8706   // Some checks can be implemented using float comparisons, if floating point
8707   // exceptions are ignored.
8708   if (Flags.hasNoFPExcept() &&
8709       isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8710     FPClassTest FPTestMask = Test;
8711     bool IsInvertedFP = false;
8712 
8713     if (FPClassTest InvertedFPCheck =
8714             invertFPClassTestIfSimpler(FPTestMask, true)) {
8715       FPTestMask = InvertedFPCheck;
8716       IsInvertedFP = true;
8717     }
8718 
8719     ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8720     ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8721 
8722     // See if we can fold an | fcNan into an unordered compare.
8723     FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8724 
8725     // Can't fold the ordered check if we're only testing for snan or qnan
8726     // individually.
8727     if ((FPTestMask & fcNan) != fcNan)
8728       OrderedFPTestMask = FPTestMask;
8729 
8730     const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8731 
8732     if (std::optional<bool> IsCmp0 =
8733             isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8734         IsCmp0 && (isCondCodeLegalOrCustom(
8735                       *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8736                       OperandVT.getScalarType().getSimpleVT()))) {
8737 
8738       // If denormals could be implicitly treated as 0, this is not equivalent
8739       // to a compare with 0 since it will also be true for denormals.
8740       return DAG.getSetCC(DL, ResultVT, Op,
8741                           DAG.getConstantFP(0.0, DL, OperandVT),
8742                           *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8743     }
8744 
8745     if (FPTestMask == fcNan &&
8746         isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
8747                                 OperandVT.getScalarType().getSimpleVT()))
8748       return DAG.getSetCC(DL, ResultVT, Op, Op,
8749                           IsInvertedFP ? ISD::SETO : ISD::SETUO);
8750 
8751     bool IsOrderedInf = FPTestMask == fcInf;
8752     if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8753         isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8754                                              : UnorderedCmpOpcode,
8755                                 OperandVT.getScalarType().getSimpleVT()) &&
8756         isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8757         (isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) ||
8758          (OperandVT.isVector() &&
8759           isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) {
8760       // isinf(x) --> fabs(x) == inf
8761       SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8762       SDValue Inf =
8763           DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8764       return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8765                           IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8766     }
8767 
8768     if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8769         isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8770                                           : UnorderedCmpOpcode,
8771                                 OperandVT.getSimpleVT())) {
8772       // isposinf(x) --> x == inf
8773       // isneginf(x) --> x == -inf
8774       // isposinf(x) || nan --> x u== inf
8775       // isneginf(x) || nan --> x u== -inf
8776 
8777       SDValue Inf = DAG.getConstantFP(
8778           APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
8779           OperandVT);
8780       return DAG.getSetCC(DL, ResultVT, Op, Inf,
8781                           IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8782     }
8783 
8784     if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8785       // TODO: Could handle ordered case, but it produces worse code for
8786       // x86. Maybe handle ordered if fabs is free?
8787 
8788       ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8789       ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8790 
8791       if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8792                                   OperandVT.getScalarType().getSimpleVT())) {
8793         // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8794 
8795         // TODO: Maybe only makes sense if fabs is free. Integer test of
8796         // exponent bits seems better for x86.
8797         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8798         SDValue SmallestNormal = DAG.getConstantFP(
8799             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8800         return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8801                             IsOrdered ? OrderedOp : UnorderedOp);
8802       }
8803     }
8804 
8805     if (FPTestMask == fcNormal) {
8806       // TODO: Handle unordered
8807       ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8808       ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
8809 
8810       if (isCondCodeLegalOrCustom(IsFiniteOp,
8811                                   OperandVT.getScalarType().getSimpleVT()) &&
8812           isCondCodeLegalOrCustom(IsNormalOp,
8813                                   OperandVT.getScalarType().getSimpleVT()) &&
8814           isFAbsFree(OperandVT)) {
8815         // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
8816         SDValue Inf =
8817             DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8818         SDValue SmallestNormal = DAG.getConstantFP(
8819             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8820 
8821         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8822         SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
8823         SDValue IsNormal =
8824             DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
8825         unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
8826         return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
8827       }
8828     }
8829   }
8830 
8831   // Some checks may be represented as inversion of simpler check, for example
8832   // "inf|normal|subnormal|zero" => !"nan".
8833   bool IsInverted = false;
8834 
8835   if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8836     Test = InvertedCheck;
8837     IsInverted = true;
8838   }
8839 
8840   // In the general case use integer operations.
8841   unsigned BitSize = OperandVT.getScalarSizeInBits();
8842   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8843   if (OperandVT.isVector())
8844     IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8845                              OperandVT.getVectorElementCount());
8846   SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8847 
8848   // Various masks.
8849   APInt SignBit = APInt::getSignMask(BitSize);
8850   APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
8851   APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8852   const unsigned ExplicitIntBitInF80 = 63;
8853   APInt ExpMask = Inf;
8854   if (IsF80)
8855     ExpMask.clearBit(ExplicitIntBitInF80);
8856   APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8857   APInt QNaNBitMask =
8858       APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8859   APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8860 
8861   SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8862   SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8863   SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8864   SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8865   SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8866   SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8867 
8868   SDValue Res;
8869   const auto appendResult = [&](SDValue PartialRes) {
8870     if (PartialRes) {
8871       if (Res)
8872         Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8873       else
8874         Res = PartialRes;
8875     }
8876   };
8877 
8878   SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8879   const auto getIntBitIsSet = [&]() -> SDValue {
8880     if (!IntBitIsSetV) {
8881       APInt IntBitMask(BitSize, 0);
8882       IntBitMask.setBit(ExplicitIntBitInF80);
8883       SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8884       SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8885       IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8886     }
8887     return IntBitIsSetV;
8888   };
8889 
8890   // Split the value into sign bit and absolute value.
8891   SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8892   SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8893                                DAG.getConstant(0, DL, IntVT), ISD::SETLT);
8894 
8895   // Tests that involve more than one class should be processed first.
8896   SDValue PartialRes;
8897 
8898   if (IsF80)
8899     ; // Detect finite numbers of f80 by checking individual classes because
8900       // they have different settings of the explicit integer bit.
8901   else if ((Test & fcFinite) == fcFinite) {
8902     // finite(V) ==> abs(V) < exp_mask
8903     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8904     Test &= ~fcFinite;
8905   } else if ((Test & fcFinite) == fcPosFinite) {
8906     // finite(V) && V > 0 ==> V < exp_mask
8907     PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8908     Test &= ~fcPosFinite;
8909   } else if ((Test & fcFinite) == fcNegFinite) {
8910     // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8911     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8912     PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8913     Test &= ~fcNegFinite;
8914   }
8915   appendResult(PartialRes);
8916 
8917   if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8918     // fcZero | fcSubnormal => test all exponent bits are 0
8919     // TODO: Handle sign bit specific cases
8920     if (PartialCheck == (fcZero | fcSubnormal)) {
8921       SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8922       SDValue ExpIsZero =
8923           DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8924       appendResult(ExpIsZero);
8925       Test &= ~PartialCheck & fcAllFlags;
8926     }
8927   }
8928 
8929   // Check for individual classes.
8930 
8931   if (unsigned PartialCheck = Test & fcZero) {
8932     if (PartialCheck == fcPosZero)
8933       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8934     else if (PartialCheck == fcZero)
8935       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8936     else // ISD::fcNegZero
8937       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8938     appendResult(PartialRes);
8939   }
8940 
8941   if (unsigned PartialCheck = Test & fcSubnormal) {
8942     // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8943     // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8944     SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8945     SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8946     SDValue VMinusOneV =
8947         DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8948     PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8949     if (PartialCheck == fcNegSubnormal)
8950       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8951     appendResult(PartialRes);
8952   }
8953 
8954   if (unsigned PartialCheck = Test & fcInf) {
8955     if (PartialCheck == fcPosInf)
8956       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8957     else if (PartialCheck == fcInf)
8958       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8959     else { // ISD::fcNegInf
8960       APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8961       SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8962       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8963     }
8964     appendResult(PartialRes);
8965   }
8966 
8967   if (unsigned PartialCheck = Test & fcNan) {
8968     APInt InfWithQnanBit = Inf | QNaNBitMask;
8969     SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8970     if (PartialCheck == fcNan) {
8971       // isnan(V) ==> abs(V) > int(inf)
8972       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8973       if (IsF80) {
8974         // Recognize unsupported values as NaNs for compatibility with glibc.
8975         // In them (exp(V)==0) == int_bit.
8976         SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8977         SDValue ExpIsZero =
8978             DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8979         SDValue IsPseudo =
8980             DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8981         PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8982       }
8983     } else if (PartialCheck == fcQNan) {
8984       // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8985       PartialRes =
8986           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8987     } else { // ISD::fcSNan
8988       // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8989       //                    abs(V) < (unsigned(Inf) | quiet_bit)
8990       SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8991       SDValue IsNotQnan =
8992           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8993       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8994     }
8995     appendResult(PartialRes);
8996   }
8997 
8998   if (unsigned PartialCheck = Test & fcNormal) {
8999     // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9000     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9001     SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9002     SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9003     APInt ExpLimit = ExpMask - ExpLSB;
9004     SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9005     PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9006     if (PartialCheck == fcNegNormal)
9007       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9008     else if (PartialCheck == fcPosNormal) {
9009       SDValue PosSignV =
9010           DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
9011       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9012     }
9013     if (IsF80)
9014       PartialRes =
9015           DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9016     appendResult(PartialRes);
9017   }
9018 
9019   if (!Res)
9020     return DAG.getConstant(IsInverted, DL, ResultVT);
9021   if (IsInverted)
9022     Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
9023   return Res;
9024 }
9025 
9026 // Only expand vector types if we have the appropriate vector bit operations.
9027 static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9028   assert(VT.isVector() && "Expected vector type");
9029   unsigned Len = VT.getScalarSizeInBits();
9030   return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9031          TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
9032          TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
9033          (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9034          TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
9035 }
9036 
9037 SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9038   SDLoc dl(Node);
9039   EVT VT = Node->getValueType(0);
9040   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9041   SDValue Op = Node->getOperand(0);
9042   unsigned Len = VT.getScalarSizeInBits();
9043   assert(VT.isInteger() && "CTPOP not implemented for this type.");
9044 
9045   // TODO: Add support for irregular type lengths.
9046   if (!(Len <= 128 && Len % 8 == 0))
9047     return SDValue();
9048 
9049   // Only expand vector types if we have the appropriate vector bit operations.
9050   if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9051     return SDValue();
9052 
9053   // This is the "best" algorithm from
9054   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9055   SDValue Mask55 =
9056       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9057   SDValue Mask33 =
9058       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9059   SDValue Mask0F =
9060       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9061 
9062   // v = v - ((v >> 1) & 0x55555555...)
9063   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9064                    DAG.getNode(ISD::AND, dl, VT,
9065                                DAG.getNode(ISD::SRL, dl, VT, Op,
9066                                            DAG.getConstant(1, dl, ShVT)),
9067                                Mask55));
9068   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9069   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9070                    DAG.getNode(ISD::AND, dl, VT,
9071                                DAG.getNode(ISD::SRL, dl, VT, Op,
9072                                            DAG.getConstant(2, dl, ShVT)),
9073                                Mask33));
9074   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9075   Op = DAG.getNode(ISD::AND, dl, VT,
9076                    DAG.getNode(ISD::ADD, dl, VT, Op,
9077                                DAG.getNode(ISD::SRL, dl, VT, Op,
9078                                            DAG.getConstant(4, dl, ShVT))),
9079                    Mask0F);
9080 
9081   if (Len <= 8)
9082     return Op;
9083 
9084   // Avoid the multiply if we only have 2 bytes to add.
9085   // TODO: Only doing this for scalars because vectors weren't as obviously
9086   // improved.
9087   if (Len == 16 && !VT.isVector()) {
9088     // v = (v + (v >> 8)) & 0x00FF;
9089     return DAG.getNode(ISD::AND, dl, VT,
9090                      DAG.getNode(ISD::ADD, dl, VT, Op,
9091                                  DAG.getNode(ISD::SRL, dl, VT, Op,
9092                                              DAG.getConstant(8, dl, ShVT))),
9093                      DAG.getConstant(0xFF, dl, VT));
9094   }
9095 
9096   // v = (v * 0x01010101...) >> (Len - 8)
9097   SDValue V;
9098   if (isOperationLegalOrCustomOrPromote(
9099           ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9100     SDValue Mask01 =
9101         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9102     V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9103   } else {
9104     V = Op;
9105     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9106       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9107       V = DAG.getNode(ISD::ADD, dl, VT, V,
9108                       DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9109     }
9110   }
9111   return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9112 }
9113 
9114 SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9115   SDLoc dl(Node);
9116   EVT VT = Node->getValueType(0);
9117   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9118   SDValue Op = Node->getOperand(0);
9119   SDValue Mask = Node->getOperand(1);
9120   SDValue VL = Node->getOperand(2);
9121   unsigned Len = VT.getScalarSizeInBits();
9122   assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9123 
9124   // TODO: Add support for irregular type lengths.
9125   if (!(Len <= 128 && Len % 8 == 0))
9126     return SDValue();
9127 
9128   // This is same algorithm of expandCTPOP from
9129   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9130   SDValue Mask55 =
9131       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9132   SDValue Mask33 =
9133       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9134   SDValue Mask0F =
9135       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9136 
9137   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9138 
9139   // v = v - ((v >> 1) & 0x55555555...)
9140   Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9141                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9142                                  DAG.getConstant(1, dl, ShVT), Mask, VL),
9143                      Mask55, Mask, VL);
9144   Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9145 
9146   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9147   Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9148   Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9149                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9150                                  DAG.getConstant(2, dl, ShVT), Mask, VL),
9151                      Mask33, Mask, VL);
9152   Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9153 
9154   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9155   Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9156                      Mask, VL),
9157   Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9158   Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9159 
9160   if (Len <= 8)
9161     return Op;
9162 
9163   // v = (v * 0x01010101...) >> (Len - 8)
9164   SDValue V;
9165   if (isOperationLegalOrCustomOrPromote(
9166           ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9167     SDValue Mask01 =
9168         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9169     V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9170   } else {
9171     V = Op;
9172     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9173       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9174       V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9175                       DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9176                       Mask, VL);
9177     }
9178   }
9179   return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9180                      Mask, VL);
9181 }
9182 
9183 SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9184   SDLoc dl(Node);
9185   EVT VT = Node->getValueType(0);
9186   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9187   SDValue Op = Node->getOperand(0);
9188   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9189 
9190   // If the non-ZERO_UNDEF version is supported we can use that instead.
9191   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9192       isOperationLegalOrCustom(ISD::CTLZ, VT))
9193     return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9194 
9195   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9196   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
9197     EVT SetCCVT =
9198         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9199     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9200     SDValue Zero = DAG.getConstant(0, dl, VT);
9201     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9202     return DAG.getSelect(dl, VT, SrcIsZero,
9203                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9204   }
9205 
9206   // Only expand vector types if we have the appropriate vector bit operations.
9207   // This includes the operations needed to expand CTPOP if it isn't supported.
9208   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9209                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9210                          !canExpandVectorCTPOP(*this, VT)) ||
9211                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
9212                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
9213     return SDValue();
9214 
9215   // for now, we do this:
9216   // x = x | (x >> 1);
9217   // x = x | (x >> 2);
9218   // ...
9219   // x = x | (x >>16);
9220   // x = x | (x >>32); // for 64-bit input
9221   // return popcount(~x);
9222   //
9223   // Ref: "Hacker's Delight" by Henry Warren
9224   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9225     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9226     Op = DAG.getNode(ISD::OR, dl, VT, Op,
9227                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9228   }
9229   Op = DAG.getNOT(dl, Op, VT);
9230   return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9231 }
9232 
9233 SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9234   SDLoc dl(Node);
9235   EVT VT = Node->getValueType(0);
9236   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9237   SDValue Op = Node->getOperand(0);
9238   SDValue Mask = Node->getOperand(1);
9239   SDValue VL = Node->getOperand(2);
9240   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9241 
9242   // do this:
9243   // x = x | (x >> 1);
9244   // x = x | (x >> 2);
9245   // ...
9246   // x = x | (x >>16);
9247   // x = x | (x >>32); // for 64-bit input
9248   // return popcount(~x);
9249   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9250     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9251     Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9252                      DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9253                      VL);
9254   }
9255   Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9256                    Mask, VL);
9257   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9258 }
9259 
9260 SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9261                                         const SDLoc &DL, EVT VT, SDValue Op,
9262                                         unsigned BitWidth) const {
9263   if (BitWidth != 32 && BitWidth != 64)
9264     return SDValue();
9265   APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9266                                   : APInt(64, 0x0218A392CD3D5DBFULL);
9267   const DataLayout &TD = DAG.getDataLayout();
9268   MachinePointerInfo PtrInfo =
9269       MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
9270   unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9271   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9272   SDValue Lookup = DAG.getNode(
9273       ISD::SRL, DL, VT,
9274       DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9275                   DAG.getConstant(DeBruijn, DL, VT)),
9276       DAG.getConstant(ShiftAmt, DL, VT));
9277   Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
9278 
9279   SmallVector<uint8_t> Table(BitWidth, 0);
9280   for (unsigned i = 0; i < BitWidth; i++) {
9281     APInt Shl = DeBruijn.shl(i);
9282     APInt Lshr = Shl.lshr(ShiftAmt);
9283     Table[Lshr.getZExtValue()] = i;
9284   }
9285 
9286   // Create a ConstantArray in Constant Pool
9287   auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9288   SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9289                                       TD.getPrefTypeAlign(CA->getType()));
9290   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9291                                    DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9292                                    PtrInfo, MVT::i8);
9293   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9294     return ExtLoad;
9295 
9296   EVT SetCCVT =
9297       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9298   SDValue Zero = DAG.getConstant(0, DL, VT);
9299   SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9300   return DAG.getSelect(DL, VT, SrcIsZero,
9301                        DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9302 }
9303 
9304 SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9305   SDLoc dl(Node);
9306   EVT VT = Node->getValueType(0);
9307   SDValue Op = Node->getOperand(0);
9308   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9309 
9310   // If the non-ZERO_UNDEF version is supported we can use that instead.
9311   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9312       isOperationLegalOrCustom(ISD::CTTZ, VT))
9313     return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9314 
9315   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9316   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
9317     EVT SetCCVT =
9318         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9319     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9320     SDValue Zero = DAG.getConstant(0, dl, VT);
9321     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9322     return DAG.getSelect(dl, VT, SrcIsZero,
9323                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9324   }
9325 
9326   // Only expand vector types if we have the appropriate vector bit operations.
9327   // This includes the operations needed to expand CTPOP if it isn't supported.
9328   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9329                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9330                          !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
9331                          !canExpandVectorCTPOP(*this, VT)) ||
9332                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
9333                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
9334                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9335     return SDValue();
9336 
9337   // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9338   if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9339       !isOperationLegal(ISD::CTLZ, VT))
9340     if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9341       return V;
9342 
9343   // for now, we use: { return popcount(~x & (x - 1)); }
9344   // unless the target has ctlz but not ctpop, in which case we use:
9345   // { return 32 - nlz(~x & (x-1)); }
9346   // Ref: "Hacker's Delight" by Henry Warren
9347   SDValue Tmp = DAG.getNode(
9348       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9349       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9350 
9351   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9352   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
9353     return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9354                        DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9355   }
9356 
9357   return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9358 }
9359 
9360 SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9361   SDValue Op = Node->getOperand(0);
9362   SDValue Mask = Node->getOperand(1);
9363   SDValue VL = Node->getOperand(2);
9364   SDLoc dl(Node);
9365   EVT VT = Node->getValueType(0);
9366 
9367   // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9368   SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9369                             DAG.getAllOnesConstant(dl, VT), Mask, VL);
9370   SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9371                                  DAG.getConstant(1, dl, VT), Mask, VL);
9372   SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9373   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9374 }
9375 
9376 SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9377                                              SelectionDAG &DAG) const {
9378   // %cond = to_bool_vec %source
9379   // %splat = splat /*val=*/VL
9380   // %tz = step_vector
9381   // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9382   // %r = vp.reduce.umin %v
9383   SDLoc DL(N);
9384   SDValue Source = N->getOperand(0);
9385   SDValue Mask = N->getOperand(1);
9386   SDValue EVL = N->getOperand(2);
9387   EVT SrcVT = Source.getValueType();
9388   EVT ResVT = N->getValueType(0);
9389   EVT ResVecVT =
9390       EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9391 
9392   // Convert to boolean vector.
9393   if (SrcVT.getScalarType() != MVT::i1) {
9394     SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9395     SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9396                              SrcVT.getVectorElementCount());
9397     Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9398                          DAG.getCondCode(ISD::SETNE), Mask, EVL);
9399   }
9400 
9401   SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9402   SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9403   SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9404   SDValue Select =
9405       DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9406   return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9407 }
9408 
9409 SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9410                                   bool IsNegative) const {
9411   SDLoc dl(N);
9412   EVT VT = N->getValueType(0);
9413   SDValue Op = N->getOperand(0);
9414 
9415   // abs(x) -> smax(x,sub(0,x))
9416   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9417       isOperationLegal(ISD::SMAX, VT)) {
9418     SDValue Zero = DAG.getConstant(0, dl, VT);
9419     Op = DAG.getFreeze(Op);
9420     return DAG.getNode(ISD::SMAX, dl, VT, Op,
9421                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9422   }
9423 
9424   // abs(x) -> umin(x,sub(0,x))
9425   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9426       isOperationLegal(ISD::UMIN, VT)) {
9427     SDValue Zero = DAG.getConstant(0, dl, VT);
9428     Op = DAG.getFreeze(Op);
9429     return DAG.getNode(ISD::UMIN, dl, VT, Op,
9430                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9431   }
9432 
9433   // 0 - abs(x) -> smin(x, sub(0,x))
9434   if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9435       isOperationLegal(ISD::SMIN, VT)) {
9436     SDValue Zero = DAG.getConstant(0, dl, VT);
9437     Op = DAG.getFreeze(Op);
9438     return DAG.getNode(ISD::SMIN, dl, VT, Op,
9439                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9440   }
9441 
9442   // Only expand vector types if we have the appropriate vector operations.
9443   if (VT.isVector() &&
9444       (!isOperationLegalOrCustom(ISD::SRA, VT) ||
9445        (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9446        (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9447        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9448     return SDValue();
9449 
9450   Op = DAG.getFreeze(Op);
9451   SDValue Shift = DAG.getNode(
9452       ISD::SRA, dl, VT, Op,
9453       DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9454   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9455 
9456   // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9457   if (!IsNegative)
9458     return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9459 
9460   // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9461   return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9462 }
9463 
9464 SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9465   SDLoc dl(N);
9466   EVT VT = N->getValueType(0);
9467   SDValue LHS = DAG.getFreeze(N->getOperand(0));
9468   SDValue RHS = DAG.getFreeze(N->getOperand(1));
9469   bool IsSigned = N->getOpcode() == ISD::ABDS;
9470 
9471   // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9472   // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9473   unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9474   unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9475   if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9476     SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9477     SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9478     return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9479   }
9480 
9481   // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9482   if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9483     return DAG.getNode(ISD::OR, dl, VT,
9484                        DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9485                        DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9486 
9487   // If the subtract doesn't overflow then just use abs(sub())
9488   // NOTE: don't use frozen operands for value tracking.
9489   bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9490                        DAG.SignBitIsZero(N->getOperand(0));
9491 
9492   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
9493                              N->getOperand(1)))
9494     return DAG.getNode(ISD::ABS, dl, VT,
9495                        DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9496 
9497   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
9498                              N->getOperand(0)))
9499     return DAG.getNode(ISD::ABS, dl, VT,
9500                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9501 
9502   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9503   ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9504   SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9505 
9506   // Branchless expansion iff cmp result is allbits:
9507   // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9508   // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9509   if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9510     SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9511     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9512     return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9513   }
9514 
9515   // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9516   // flag if the (scalar) type is illegal as this is more likely to legalize
9517   // cleanly:
9518   // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9519   if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9520     SDValue USubO =
9521         DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9522     SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9523     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9524     return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9525   }
9526 
9527   // FIXME: Should really try to split the vector in case it's legal on a
9528   // subvector.
9529   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9530     return DAG.UnrollVectorOp(N);
9531 
9532   // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9533   // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9534   return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9535                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9536 }
9537 
9538 SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
9539   SDLoc dl(N);
9540   EVT VT = N->getValueType(0);
9541   SDValue LHS = N->getOperand(0);
9542   SDValue RHS = N->getOperand(1);
9543 
9544   unsigned Opc = N->getOpcode();
9545   bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9546   bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9547   unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9548   unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9549   unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9550   unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9551   assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9552           Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9553          "Unknown AVG node");
9554 
9555   // If the operands are already extended, we can add+shift.
9556   bool IsExt =
9557       (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9558        DAG.ComputeNumSignBits(RHS) >= 2) ||
9559       (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9560        DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9561   if (IsExt) {
9562     SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9563     if (!IsFloor)
9564       Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9565     return DAG.getNode(ShiftOpc, dl, VT, Sum,
9566                        DAG.getShiftAmountConstant(1, VT, dl));
9567   }
9568 
9569   // For scalars, see if we can efficiently extend/truncate to use add+shift.
9570   if (VT.isScalarInteger()) {
9571     unsigned BW = VT.getScalarSizeInBits();
9572     EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9573     if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9574       LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9575       RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9576       SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9577       if (!IsFloor)
9578         Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9579                           DAG.getConstant(1, dl, ExtVT));
9580       // Just use SRL as we will be truncating away the extended sign bits.
9581       Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9582                         DAG.getShiftAmountConstant(1, ExtVT, dl));
9583       return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9584     }
9585   }
9586 
9587   // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9588   if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9589     SDValue UAddWithOverflow =
9590         DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9591 
9592     SDValue Sum = UAddWithOverflow.getValue(0);
9593     SDValue Overflow = UAddWithOverflow.getValue(1);
9594 
9595     // Right shift the sum by 1
9596     SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9597                                   DAG.getShiftAmountConstant(1, VT, dl));
9598 
9599     SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9600     SDValue OverflowShl = DAG.getNode(
9601         ISD::SHL, dl, VT, ZeroExtOverflow,
9602         DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9603 
9604     return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9605   }
9606 
9607   // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9608   // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9609   // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9610   // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9611   LHS = DAG.getFreeze(LHS);
9612   RHS = DAG.getFreeze(RHS);
9613   SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9614   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9615   SDValue Shift =
9616       DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9617   return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9618 }
9619 
9620 SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9621   SDLoc dl(N);
9622   EVT VT = N->getValueType(0);
9623   SDValue Op = N->getOperand(0);
9624 
9625   if (!VT.isSimple())
9626     return SDValue();
9627 
9628   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9629   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9630   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9631   default:
9632     return SDValue();
9633   case MVT::i16:
9634     // Use a rotate by 8. This can be further expanded if necessary.
9635     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9636   case MVT::i32:
9637     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9638     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9639                        DAG.getConstant(0xFF00, dl, VT));
9640     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9641     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9642     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9643     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9644     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9645     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9646     return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9647   case MVT::i64:
9648     Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9649     Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9650                        DAG.getConstant(255ULL<<8, dl, VT));
9651     Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9652     Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9653                        DAG.getConstant(255ULL<<16, dl, VT));
9654     Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9655     Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9656                        DAG.getConstant(255ULL<<24, dl, VT));
9657     Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9658     Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9659     Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9660                        DAG.getConstant(255ULL<<24, dl, VT));
9661     Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9662     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9663                        DAG.getConstant(255ULL<<16, dl, VT));
9664     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9665     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9666                        DAG.getConstant(255ULL<<8, dl, VT));
9667     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9668     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9669     Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9670     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9671     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9672     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9673     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9674     return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9675   }
9676 }
9677 
9678 SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9679   SDLoc dl(N);
9680   EVT VT = N->getValueType(0);
9681   SDValue Op = N->getOperand(0);
9682   SDValue Mask = N->getOperand(1);
9683   SDValue EVL = N->getOperand(2);
9684 
9685   if (!VT.isSimple())
9686     return SDValue();
9687 
9688   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9689   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9690   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9691   default:
9692     return SDValue();
9693   case MVT::i16:
9694     Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9695                        Mask, EVL);
9696     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9697                        Mask, EVL);
9698     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9699   case MVT::i32:
9700     Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9701                        Mask, EVL);
9702     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9703                        Mask, EVL);
9704     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9705                        Mask, EVL);
9706     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9707                        Mask, EVL);
9708     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9709                        DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9710     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9711                        Mask, EVL);
9712     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9713     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9714     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9715   case MVT::i64:
9716     Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9717                        Mask, EVL);
9718     Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9719                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9720     Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9721                        Mask, EVL);
9722     Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9723                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9724     Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9725                        Mask, EVL);
9726     Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9727                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9728     Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9729                        Mask, EVL);
9730     Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9731                        Mask, EVL);
9732     Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9733                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9734     Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9735                        Mask, EVL);
9736     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9737                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9738     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9739                        Mask, EVL);
9740     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9741                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9742     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9743                        Mask, EVL);
9744     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9745     Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9746     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9747     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9748     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9749     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9750     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9751   }
9752 }
9753 
9754 SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9755   SDLoc dl(N);
9756   EVT VT = N->getValueType(0);
9757   SDValue Op = N->getOperand(0);
9758   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9759   unsigned Sz = VT.getScalarSizeInBits();
9760 
9761   SDValue Tmp, Tmp2, Tmp3;
9762 
9763   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9764   // and finally the i1 pairs.
9765   // TODO: We can easily support i4/i2 legal types if any target ever does.
9766   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9767     // Create the masks - repeating the pattern every byte.
9768     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9769     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9770     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9771 
9772     // BSWAP if the type is wider than a single byte.
9773     Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9774 
9775     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9776     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9777     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9778     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9779     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9780     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9781 
9782     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9783     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9784     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9785     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9786     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9787     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9788 
9789     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9790     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9791     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9792     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9793     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9794     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9795     return Tmp;
9796   }
9797 
9798   Tmp = DAG.getConstant(0, dl, VT);
9799   for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9800     if (I < J)
9801       Tmp2 =
9802           DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9803     else
9804       Tmp2 =
9805           DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9806 
9807     APInt Shift = APInt::getOneBitSet(Sz, J);
9808     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9809     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9810   }
9811 
9812   return Tmp;
9813 }
9814 
9815 SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9816   assert(N->getOpcode() == ISD::VP_BITREVERSE);
9817 
9818   SDLoc dl(N);
9819   EVT VT = N->getValueType(0);
9820   SDValue Op = N->getOperand(0);
9821   SDValue Mask = N->getOperand(1);
9822   SDValue EVL = N->getOperand(2);
9823   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9824   unsigned Sz = VT.getScalarSizeInBits();
9825 
9826   SDValue Tmp, Tmp2, Tmp3;
9827 
9828   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9829   // and finally the i1 pairs.
9830   // TODO: We can easily support i4/i2 legal types if any target ever does.
9831   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9832     // Create the masks - repeating the pattern every byte.
9833     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9834     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9835     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9836 
9837     // BSWAP if the type is wider than a single byte.
9838     Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9839 
9840     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9841     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9842                        Mask, EVL);
9843     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9844                        DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9845     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9846                        Mask, EVL);
9847     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9848                        Mask, EVL);
9849     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9850 
9851     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9852     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9853                        Mask, EVL);
9854     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9855                        DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9856     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9857                        Mask, EVL);
9858     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9859                        Mask, EVL);
9860     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9861 
9862     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9863     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9864                        Mask, EVL);
9865     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9866                        DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9867     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9868                        Mask, EVL);
9869     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9870                        Mask, EVL);
9871     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9872     return Tmp;
9873   }
9874   return SDValue();
9875 }
9876 
9877 std::pair<SDValue, SDValue>
9878 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9879                                     SelectionDAG &DAG) const {
9880   SDLoc SL(LD);
9881   SDValue Chain = LD->getChain();
9882   SDValue BasePTR = LD->getBasePtr();
9883   EVT SrcVT = LD->getMemoryVT();
9884   EVT DstVT = LD->getValueType(0);
9885   ISD::LoadExtType ExtType = LD->getExtensionType();
9886 
9887   if (SrcVT.isScalableVector())
9888     report_fatal_error("Cannot scalarize scalable vector loads");
9889 
9890   unsigned NumElem = SrcVT.getVectorNumElements();
9891 
9892   EVT SrcEltVT = SrcVT.getScalarType();
9893   EVT DstEltVT = DstVT.getScalarType();
9894 
9895   // A vector must always be stored in memory as-is, i.e. without any padding
9896   // between the elements, since various code depend on it, e.g. in the
9897   // handling of a bitcast of a vector type to int, which may be done with a
9898   // vector store followed by an integer load. A vector that does not have
9899   // elements that are byte-sized must therefore be stored as an integer
9900   // built out of the extracted vector elements.
9901   if (!SrcEltVT.isByteSized()) {
9902     unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9903     EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9904 
9905     unsigned NumSrcBits = SrcVT.getSizeInBits();
9906     EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9907 
9908     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9909     SDValue SrcEltBitMask = DAG.getConstant(
9910         APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9911 
9912     // Load the whole vector and avoid masking off the top bits as it makes
9913     // the codegen worse.
9914     SDValue Load =
9915         DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9916                        LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9917                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
9918 
9919     SmallVector<SDValue, 8> Vals;
9920     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9921       unsigned ShiftIntoIdx =
9922           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9923       SDValue ShiftAmount = DAG.getShiftAmountConstant(
9924           ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
9925       SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9926       SDValue Elt =
9927           DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9928       SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9929 
9930       if (ExtType != ISD::NON_EXTLOAD) {
9931         unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9932         Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9933       }
9934 
9935       Vals.push_back(Scalar);
9936     }
9937 
9938     SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9939     return std::make_pair(Value, Load.getValue(1));
9940   }
9941 
9942   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9943   assert(SrcEltVT.isByteSized());
9944 
9945   SmallVector<SDValue, 8> Vals;
9946   SmallVector<SDValue, 8> LoadChains;
9947 
9948   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9949     SDValue ScalarLoad =
9950         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9951                        LD->getPointerInfo().getWithOffset(Idx * Stride),
9952                        SrcEltVT, LD->getOriginalAlign(),
9953                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
9954 
9955     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9956 
9957     Vals.push_back(ScalarLoad.getValue(0));
9958     LoadChains.push_back(ScalarLoad.getValue(1));
9959   }
9960 
9961   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9962   SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9963 
9964   return std::make_pair(Value, NewChain);
9965 }
9966 
9967 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9968                                              SelectionDAG &DAG) const {
9969   SDLoc SL(ST);
9970 
9971   SDValue Chain = ST->getChain();
9972   SDValue BasePtr = ST->getBasePtr();
9973   SDValue Value = ST->getValue();
9974   EVT StVT = ST->getMemoryVT();
9975 
9976   if (StVT.isScalableVector())
9977     report_fatal_error("Cannot scalarize scalable vector stores");
9978 
9979   // The type of the data we want to save
9980   EVT RegVT = Value.getValueType();
9981   EVT RegSclVT = RegVT.getScalarType();
9982 
9983   // The type of data as saved in memory.
9984   EVT MemSclVT = StVT.getScalarType();
9985 
9986   unsigned NumElem = StVT.getVectorNumElements();
9987 
9988   // A vector must always be stored in memory as-is, i.e. without any padding
9989   // between the elements, since various code depend on it, e.g. in the
9990   // handling of a bitcast of a vector type to int, which may be done with a
9991   // vector store followed by an integer load. A vector that does not have
9992   // elements that are byte-sized must therefore be stored as an integer
9993   // built out of the extracted vector elements.
9994   if (!MemSclVT.isByteSized()) {
9995     unsigned NumBits = StVT.getSizeInBits();
9996     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9997 
9998     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9999 
10000     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10001       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10002                                 DAG.getVectorIdxConstant(Idx, SL));
10003       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10004       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10005       unsigned ShiftIntoIdx =
10006           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10007       SDValue ShiftAmount =
10008           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10009       SDValue ShiftedElt =
10010           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10011       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10012     }
10013 
10014     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10015                         ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10016                         ST->getAAInfo());
10017   }
10018 
10019   // Store Stride in bytes
10020   unsigned Stride = MemSclVT.getSizeInBits() / 8;
10021   assert(Stride && "Zero stride!");
10022   // Extract each of the elements from the original vector and save them into
10023   // memory individually.
10024   SmallVector<SDValue, 8> Stores;
10025   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10026     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10027                               DAG.getVectorIdxConstant(Idx, SL));
10028 
10029     SDValue Ptr =
10030         DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10031 
10032     // This scalar TruncStore may be illegal, but we legalize it later.
10033     SDValue Store = DAG.getTruncStore(
10034         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10035         MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10036         ST->getAAInfo());
10037 
10038     Stores.push_back(Store);
10039   }
10040 
10041   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10042 }
10043 
10044 std::pair<SDValue, SDValue>
10045 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
10046   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10047          "unaligned indexed loads not implemented!");
10048   SDValue Chain = LD->getChain();
10049   SDValue Ptr = LD->getBasePtr();
10050   EVT VT = LD->getValueType(0);
10051   EVT LoadedVT = LD->getMemoryVT();
10052   SDLoc dl(LD);
10053   auto &MF = DAG.getMachineFunction();
10054 
10055   if (VT.isFloatingPoint() || VT.isVector()) {
10056     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10057     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10058       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10059           LoadedVT.isVector()) {
10060         // Scalarize the load and let the individual components be handled.
10061         return scalarizeVectorLoad(LD, DAG);
10062       }
10063 
10064       // Expand to a (misaligned) integer load of the same size,
10065       // then bitconvert to floating point or vector.
10066       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10067                                     LD->getMemOperand());
10068       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10069       if (LoadedVT != VT)
10070         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10071                              ISD::ANY_EXTEND, dl, VT, Result);
10072 
10073       return std::make_pair(Result, newLoad.getValue(1));
10074     }
10075 
10076     // Copy the value to a (aligned) stack slot using (unaligned) integer
10077     // loads and stores, then do a (aligned) load from the stack slot.
10078     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10079     unsigned LoadedBytes = LoadedVT.getStoreSize();
10080     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10081     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10082 
10083     // Make sure the stack slot is also aligned for the register type.
10084     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10085     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10086     SmallVector<SDValue, 8> Stores;
10087     SDValue StackPtr = StackBase;
10088     unsigned Offset = 0;
10089 
10090     EVT PtrVT = Ptr.getValueType();
10091     EVT StackPtrVT = StackPtr.getValueType();
10092 
10093     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10094     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10095 
10096     // Do all but one copies using the full register width.
10097     for (unsigned i = 1; i < NumRegs; i++) {
10098       // Load one integer register's worth from the original location.
10099       SDValue Load = DAG.getLoad(
10100           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10101           LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10102           LD->getAAInfo());
10103       // Follow the load with a store to the stack slot.  Remember the store.
10104       Stores.push_back(DAG.getStore(
10105           Load.getValue(1), dl, Load, StackPtr,
10106           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10107       // Increment the pointers.
10108       Offset += RegBytes;
10109 
10110       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10111       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10112     }
10113 
10114     // The last copy may be partial.  Do an extending load.
10115     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10116                                   8 * (LoadedBytes - Offset));
10117     SDValue Load =
10118         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10119                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
10120                        LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10121                        LD->getAAInfo());
10122     // Follow the load with a store to the stack slot.  Remember the store.
10123     // On big-endian machines this requires a truncating store to ensure
10124     // that the bits end up in the right place.
10125     Stores.push_back(DAG.getTruncStore(
10126         Load.getValue(1), dl, Load, StackPtr,
10127         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10128 
10129     // The order of the stores doesn't matter - say it with a TokenFactor.
10130     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10131 
10132     // Finally, perform the original load only redirected to the stack slot.
10133     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10134                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10135                           LoadedVT);
10136 
10137     // Callers expect a MERGE_VALUES node.
10138     return std::make_pair(Load, TF);
10139   }
10140 
10141   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10142          "Unaligned load of unsupported type.");
10143 
10144   // Compute the new VT that is half the size of the old one.  This is an
10145   // integer MVT.
10146   unsigned NumBits = LoadedVT.getSizeInBits();
10147   EVT NewLoadedVT;
10148   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10149   NumBits >>= 1;
10150 
10151   Align Alignment = LD->getOriginalAlign();
10152   unsigned IncrementSize = NumBits / 8;
10153   ISD::LoadExtType HiExtType = LD->getExtensionType();
10154 
10155   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10156   if (HiExtType == ISD::NON_EXTLOAD)
10157     HiExtType = ISD::ZEXTLOAD;
10158 
10159   // Load the value in two parts
10160   SDValue Lo, Hi;
10161   if (DAG.getDataLayout().isLittleEndian()) {
10162     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10163                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10164                         LD->getAAInfo());
10165 
10166     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10167     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10168                         LD->getPointerInfo().getWithOffset(IncrementSize),
10169                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10170                         LD->getAAInfo());
10171   } else {
10172     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10173                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10174                         LD->getAAInfo());
10175 
10176     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10177     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10178                         LD->getPointerInfo().getWithOffset(IncrementSize),
10179                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10180                         LD->getAAInfo());
10181   }
10182 
10183   // aggregate the two parts
10184   SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10185   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10186   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10187 
10188   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10189                              Hi.getValue(1));
10190 
10191   return std::make_pair(Result, TF);
10192 }
10193 
10194 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10195                                              SelectionDAG &DAG) const {
10196   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10197          "unaligned indexed stores not implemented!");
10198   SDValue Chain = ST->getChain();
10199   SDValue Ptr = ST->getBasePtr();
10200   SDValue Val = ST->getValue();
10201   EVT VT = Val.getValueType();
10202   Align Alignment = ST->getOriginalAlign();
10203   auto &MF = DAG.getMachineFunction();
10204   EVT StoreMemVT = ST->getMemoryVT();
10205 
10206   SDLoc dl(ST);
10207   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10208     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10209     if (isTypeLegal(intVT)) {
10210       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10211           StoreMemVT.isVector()) {
10212         // Scalarize the store and let the individual components be handled.
10213         SDValue Result = scalarizeVectorStore(ST, DAG);
10214         return Result;
10215       }
10216       // Expand to a bitconvert of the value to the integer type of the
10217       // same size, then a (misaligned) int store.
10218       // FIXME: Does not handle truncating floating point stores!
10219       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10220       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10221                             Alignment, ST->getMemOperand()->getFlags());
10222       return Result;
10223     }
10224     // Do a (aligned) store to a stack slot, then copy from the stack slot
10225     // to the final destination using (unaligned) integer loads and stores.
10226     MVT RegVT = getRegisterType(
10227         *DAG.getContext(),
10228         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10229     EVT PtrVT = Ptr.getValueType();
10230     unsigned StoredBytes = StoreMemVT.getStoreSize();
10231     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10232     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10233 
10234     // Make sure the stack slot is also aligned for the register type.
10235     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10236     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10237 
10238     // Perform the original store, only redirected to the stack slot.
10239     SDValue Store = DAG.getTruncStore(
10240         Chain, dl, Val, StackPtr,
10241         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10242 
10243     EVT StackPtrVT = StackPtr.getValueType();
10244 
10245     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10246     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10247     SmallVector<SDValue, 8> Stores;
10248     unsigned Offset = 0;
10249 
10250     // Do all but one copies using the full register width.
10251     for (unsigned i = 1; i < NumRegs; i++) {
10252       // Load one integer register's worth from the stack slot.
10253       SDValue Load = DAG.getLoad(
10254           RegVT, dl, Store, StackPtr,
10255           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10256       // Store it to the final location.  Remember the store.
10257       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10258                                     ST->getPointerInfo().getWithOffset(Offset),
10259                                     ST->getOriginalAlign(),
10260                                     ST->getMemOperand()->getFlags()));
10261       // Increment the pointers.
10262       Offset += RegBytes;
10263       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10264       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10265     }
10266 
10267     // The last store may be partial.  Do a truncating store.  On big-endian
10268     // machines this requires an extending load from the stack slot to ensure
10269     // that the bits are in the right place.
10270     EVT LoadMemVT =
10271         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10272 
10273     // Load from the stack slot.
10274     SDValue Load = DAG.getExtLoad(
10275         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10276         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10277 
10278     Stores.push_back(
10279         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10280                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10281                           ST->getOriginalAlign(),
10282                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10283     // The order of the stores doesn't matter - say it with a TokenFactor.
10284     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10285     return Result;
10286   }
10287 
10288   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10289          "Unaligned store of unknown type.");
10290   // Get the half-size VT
10291   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10292   unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10293   unsigned IncrementSize = NumBits / 8;
10294 
10295   // Divide the stored value in two parts.
10296   SDValue ShiftAmount =
10297       DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10298   SDValue Lo = Val;
10299   // If Val is a constant, replace the upper bits with 0. The SRL will constant
10300   // fold and not use the upper bits. A smaller constant may be easier to
10301   // materialize.
10302   if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10303     Lo = DAG.getNode(
10304         ISD::AND, dl, VT, Lo,
10305         DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10306                         VT));
10307   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10308 
10309   // Store the two parts
10310   SDValue Store1, Store2;
10311   Store1 = DAG.getTruncStore(Chain, dl,
10312                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10313                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10314                              ST->getMemOperand()->getFlags());
10315 
10316   Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10317   Store2 = DAG.getTruncStore(
10318       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10319       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10320       ST->getMemOperand()->getFlags(), ST->getAAInfo());
10321 
10322   SDValue Result =
10323       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10324   return Result;
10325 }
10326 
10327 SDValue
10328 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10329                                        const SDLoc &DL, EVT DataVT,
10330                                        SelectionDAG &DAG,
10331                                        bool IsCompressedMemory) const {
10332   SDValue Increment;
10333   EVT AddrVT = Addr.getValueType();
10334   EVT MaskVT = Mask.getValueType();
10335   assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10336          "Incompatible types of Data and Mask");
10337   if (IsCompressedMemory) {
10338     if (DataVT.isScalableVector())
10339       report_fatal_error(
10340           "Cannot currently handle compressed memory with scalable vectors");
10341     // Incrementing the pointer according to number of '1's in the mask.
10342     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10343     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10344     if (MaskIntVT.getSizeInBits() < 32) {
10345       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10346       MaskIntVT = MVT::i32;
10347     }
10348 
10349     // Count '1's with POPCNT.
10350     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10351     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10352     // Scale is an element size in bytes.
10353     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10354                                     AddrVT);
10355     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10356   } else if (DataVT.isScalableVector()) {
10357     Increment = DAG.getVScale(DL, AddrVT,
10358                               APInt(AddrVT.getFixedSizeInBits(),
10359                                     DataVT.getStoreSize().getKnownMinValue()));
10360   } else
10361     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10362 
10363   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10364 }
10365 
10366 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10367                                        EVT VecVT, const SDLoc &dl,
10368                                        ElementCount SubEC) {
10369   assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10370          "Cannot index a scalable vector within a fixed-width vector");
10371 
10372   unsigned NElts = VecVT.getVectorMinNumElements();
10373   unsigned NumSubElts = SubEC.getKnownMinValue();
10374   EVT IdxVT = Idx.getValueType();
10375 
10376   if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10377     // If this is a constant index and we know the value plus the number of the
10378     // elements in the subvector minus one is less than the minimum number of
10379     // elements then it's safe to return Idx.
10380     if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10381       if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10382         return Idx;
10383     SDValue VS =
10384         DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10385     unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10386     SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10387                               DAG.getConstant(NumSubElts, dl, IdxVT));
10388     return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10389   }
10390   if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10391     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10392     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10393                        DAG.getConstant(Imm, dl, IdxVT));
10394   }
10395   unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10396   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10397                      DAG.getConstant(MaxIndex, dl, IdxVT));
10398 }
10399 
10400 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
10401                                                 SDValue VecPtr, EVT VecVT,
10402                                                 SDValue Index) const {
10403   return getVectorSubVecPointer(
10404       DAG, VecPtr, VecVT,
10405       EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
10406       Index);
10407 }
10408 
10409 SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
10410                                                SDValue VecPtr, EVT VecVT,
10411                                                EVT SubVecVT,
10412                                                SDValue Index) const {
10413   SDLoc dl(Index);
10414   // Make sure the index type is big enough to compute in.
10415   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10416 
10417   EVT EltVT = VecVT.getVectorElementType();
10418 
10419   // Calculate the element offset and add it to the pointer.
10420   unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10421   assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10422          "Converting bits to bytes lost precision");
10423   assert(SubVecVT.getVectorElementType() == EltVT &&
10424          "Sub-vector must be a vector with matching element type");
10425   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10426                                   SubVecVT.getVectorElementCount());
10427 
10428   EVT IdxVT = Index.getValueType();
10429   if (SubVecVT.isScalableVector())
10430     Index =
10431         DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10432                     DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10433 
10434   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10435                       DAG.getConstant(EltSize, dl, IdxVT));
10436   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10437 }
10438 
10439 //===----------------------------------------------------------------------===//
10440 // Implementation of Emulated TLS Model
10441 //===----------------------------------------------------------------------===//
10442 
10443 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10444                                                 SelectionDAG &DAG) const {
10445   // Access to address of TLS varialbe xyz is lowered to a function call:
10446   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10447   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10448   PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10449   SDLoc dl(GA);
10450 
10451   ArgListTy Args;
10452   ArgListEntry Entry;
10453   const GlobalValue *GV =
10454       cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10455   SmallString<32> NameString("__emutls_v.");
10456   NameString += GV->getName();
10457   StringRef EmuTlsVarName(NameString);
10458   const GlobalVariable *EmuTlsVar =
10459       GV->getParent()->getNamedGlobal(EmuTlsVarName);
10460   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10461   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10462   Entry.Ty = VoidPtrType;
10463   Args.push_back(Entry);
10464 
10465   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10466 
10467   TargetLowering::CallLoweringInfo CLI(DAG);
10468   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10469   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10470   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10471 
10472   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10473   // At last for X86 targets, maybe good for other targets too?
10474   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10475   MFI.setAdjustsStack(true); // Is this only for X86 target?
10476   MFI.setHasCalls(true);
10477 
10478   assert((GA->getOffset() == 0) &&
10479          "Emulated TLS must have zero offset in GlobalAddressSDNode");
10480   return CallResult.first;
10481 }
10482 
10483 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10484                                                 SelectionDAG &DAG) const {
10485   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10486   if (!isCtlzFast())
10487     return SDValue();
10488   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10489   SDLoc dl(Op);
10490   if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10491     EVT VT = Op.getOperand(0).getValueType();
10492     SDValue Zext = Op.getOperand(0);
10493     if (VT.bitsLT(MVT::i32)) {
10494       VT = MVT::i32;
10495       Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10496     }
10497     unsigned Log2b = Log2_32(VT.getSizeInBits());
10498     SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10499     SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10500                               DAG.getConstant(Log2b, dl, MVT::i32));
10501     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10502   }
10503   return SDValue();
10504 }
10505 
10506 SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
10507   SDValue Op0 = Node->getOperand(0);
10508   SDValue Op1 = Node->getOperand(1);
10509   EVT VT = Op0.getValueType();
10510   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10511   unsigned Opcode = Node->getOpcode();
10512   SDLoc DL(Node);
10513 
10514   // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10515   if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10516       getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10517     Op0 = DAG.getFreeze(Op0);
10518     SDValue Zero = DAG.getConstant(0, DL, VT);
10519     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10520                        DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10521   }
10522 
10523   // umin(x,y) -> sub(x,usubsat(x,y))
10524   // TODO: Missing freeze(Op0)?
10525   if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10526       isOperationLegal(ISD::USUBSAT, VT)) {
10527     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10528                        DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10529   }
10530 
10531   // umax(x,y) -> add(x,usubsat(y,x))
10532   // TODO: Missing freeze(Op0)?
10533   if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10534       isOperationLegal(ISD::USUBSAT, VT)) {
10535     return DAG.getNode(ISD::ADD, DL, VT, Op0,
10536                        DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10537   }
10538 
10539   // FIXME: Should really try to split the vector in case it's legal on a
10540   // subvector.
10541   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10542     return DAG.UnrollVectorOp(Node);
10543 
10544   // Attempt to find an existing SETCC node that we can reuse.
10545   // TODO: Do we need a generic doesSETCCNodeExist?
10546   // TODO: Missing freeze(Op0)/freeze(Op1)?
10547   auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10548                          ISD::CondCode PrefCommuteCC,
10549                          ISD::CondCode AltCommuteCC) {
10550     SDVTList BoolVTList = DAG.getVTList(BoolVT);
10551     for (ISD::CondCode CC : {PrefCC, AltCC}) {
10552       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10553                             {Op0, Op1, DAG.getCondCode(CC)})) {
10554         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10555         return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10556       }
10557     }
10558     for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10559       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10560                             {Op0, Op1, DAG.getCondCode(CC)})) {
10561         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10562         return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10563       }
10564     }
10565     SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10566     return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10567   };
10568 
10569   // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10570   //                      -> Y = (A < B) ? B : A
10571   //                      -> Y = (A >= B) ? A : B
10572   //                      -> Y = (A <= B) ? B : A
10573   switch (Opcode) {
10574   case ISD::SMAX:
10575     return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10576   case ISD::SMIN:
10577     return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10578   case ISD::UMAX:
10579     return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10580   case ISD::UMIN:
10581     return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10582   }
10583 
10584   llvm_unreachable("How did we get here?");
10585 }
10586 
10587 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
10588   unsigned Opcode = Node->getOpcode();
10589   SDValue LHS = Node->getOperand(0);
10590   SDValue RHS = Node->getOperand(1);
10591   EVT VT = LHS.getValueType();
10592   SDLoc dl(Node);
10593 
10594   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10595   assert(VT.isInteger() && "Expected operands to be integers");
10596 
10597   // usub.sat(a, b) -> umax(a, b) - b
10598   if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10599     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10600     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10601   }
10602 
10603   // uadd.sat(a, b) -> umin(a, ~b) + b
10604   if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10605     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10606     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10607     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10608   }
10609 
10610   unsigned OverflowOp;
10611   switch (Opcode) {
10612   case ISD::SADDSAT:
10613     OverflowOp = ISD::SADDO;
10614     break;
10615   case ISD::UADDSAT:
10616     OverflowOp = ISD::UADDO;
10617     break;
10618   case ISD::SSUBSAT:
10619     OverflowOp = ISD::SSUBO;
10620     break;
10621   case ISD::USUBSAT:
10622     OverflowOp = ISD::USUBO;
10623     break;
10624   default:
10625     llvm_unreachable("Expected method to receive signed or unsigned saturation "
10626                      "addition or subtraction node.");
10627   }
10628 
10629   // FIXME: Should really try to split the vector in case it's legal on a
10630   // subvector.
10631   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10632     return DAG.UnrollVectorOp(Node);
10633 
10634   unsigned BitWidth = LHS.getScalarValueSizeInBits();
10635   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10636   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10637   SDValue SumDiff = Result.getValue(0);
10638   SDValue Overflow = Result.getValue(1);
10639   SDValue Zero = DAG.getConstant(0, dl, VT);
10640   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10641 
10642   if (Opcode == ISD::UADDSAT) {
10643     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10644       // (LHS + RHS) | OverflowMask
10645       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10646       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10647     }
10648     // Overflow ? 0xffff.... : (LHS + RHS)
10649     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10650   }
10651 
10652   if (Opcode == ISD::USUBSAT) {
10653     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10654       // (LHS - RHS) & ~OverflowMask
10655       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10656       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10657       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10658     }
10659     // Overflow ? 0 : (LHS - RHS)
10660     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10661   }
10662 
10663   if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10664     APInt MinVal = APInt::getSignedMinValue(BitWidth);
10665     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
10666 
10667     KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10668     KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10669 
10670     // If either of the operand signs are known, then they are guaranteed to
10671     // only saturate in one direction. If non-negative they will saturate
10672     // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10673     //
10674     // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10675     // sign of 'y' has to be flipped.
10676 
10677     bool LHSIsNonNegative = KnownLHS.isNonNegative();
10678     bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10679                                                    : KnownRHS.isNegative();
10680     if (LHSIsNonNegative || RHSIsNonNegative) {
10681       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10682       return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10683     }
10684 
10685     bool LHSIsNegative = KnownLHS.isNegative();
10686     bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10687                                                 : KnownRHS.isNonNegative();
10688     if (LHSIsNegative || RHSIsNegative) {
10689       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10690       return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10691     }
10692   }
10693 
10694   // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10695   APInt MinVal = APInt::getSignedMinValue(BitWidth);
10696   SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10697   SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10698                               DAG.getConstant(BitWidth - 1, dl, VT));
10699   Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10700   return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10701 }
10702 
10703 SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
10704   unsigned Opcode = Node->getOpcode();
10705   SDValue LHS = Node->getOperand(0);
10706   SDValue RHS = Node->getOperand(1);
10707   EVT VT = LHS.getValueType();
10708   EVT ResVT = Node->getValueType(0);
10709   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10710   SDLoc dl(Node);
10711 
10712   auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10713   auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10714   SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10715   SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10716 
10717   // We can't perform arithmetic on i1 values. Extending them would
10718   // probably result in worse codegen, so let's just use two selects instead.
10719   // Some targets are also just better off using selects rather than subtraction
10720   // because one of the conditions can be merged with one of the selects.
10721   // And finally, if we don't know the contents of high bits of a boolean value
10722   // we can't perform any arithmetic either.
10723   if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10724       getBooleanContents(BoolVT) == UndefinedBooleanContent) {
10725     SDValue SelectZeroOrOne =
10726         DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10727                       DAG.getConstant(0, dl, ResVT));
10728     return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10729                          SelectZeroOrOne);
10730   }
10731 
10732   if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent)
10733     std::swap(IsGT, IsLT);
10734   return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10735                             ResVT);
10736 }
10737 
10738 SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10739   unsigned Opcode = Node->getOpcode();
10740   bool IsSigned = Opcode == ISD::SSHLSAT;
10741   SDValue LHS = Node->getOperand(0);
10742   SDValue RHS = Node->getOperand(1);
10743   EVT VT = LHS.getValueType();
10744   SDLoc dl(Node);
10745 
10746   assert((Node->getOpcode() == ISD::SSHLSAT ||
10747           Node->getOpcode() == ISD::USHLSAT) &&
10748           "Expected a SHLSAT opcode");
10749   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10750   assert(VT.isInteger() && "Expected operands to be integers");
10751 
10752   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10753     return DAG.UnrollVectorOp(Node);
10754 
10755   // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10756 
10757   unsigned BW = VT.getScalarSizeInBits();
10758   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10759   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10760   SDValue Orig =
10761       DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10762 
10763   SDValue SatVal;
10764   if (IsSigned) {
10765     SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10766     SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10767     SDValue Cond =
10768         DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10769     SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10770   } else {
10771     SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10772   }
10773   SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10774   return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10775 }
10776 
10777 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10778                                         bool Signed, EVT WideVT,
10779                                         const SDValue LL, const SDValue LH,
10780                                         const SDValue RL, const SDValue RH,
10781                                         SDValue &Lo, SDValue &Hi) const {
10782   // We can fall back to a libcall with an illegal type for the MUL if we
10783   // have a libcall big enough.
10784   // Also, we can fall back to a division in some cases, but that's a big
10785   // performance hit in the general case.
10786   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10787   if (WideVT == MVT::i16)
10788     LC = RTLIB::MUL_I16;
10789   else if (WideVT == MVT::i32)
10790     LC = RTLIB::MUL_I32;
10791   else if (WideVT == MVT::i64)
10792     LC = RTLIB::MUL_I64;
10793   else if (WideVT == MVT::i128)
10794     LC = RTLIB::MUL_I128;
10795 
10796   if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10797     // We'll expand the multiplication by brute force because we have no other
10798     // options. This is a trivially-generalized version of the code from
10799     // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10800     // 4.3.1).
10801     EVT VT = LL.getValueType();
10802     unsigned Bits = VT.getSizeInBits();
10803     unsigned HalfBits = Bits >> 1;
10804     SDValue Mask =
10805         DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10806     SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10807     SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10808 
10809     SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10810     SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10811 
10812     SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10813     SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10814     SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10815     SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10816 
10817     SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10818                             DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10819     SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10820     SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10821 
10822     SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10823                             DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10824     SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10825 
10826     SDValue W =
10827         DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10828                     DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10829     Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10830                      DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10831 
10832     Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10833                      DAG.getNode(ISD::ADD, dl, VT,
10834                                  DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10835                                  DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10836   } else {
10837     // Attempt a libcall.
10838     SDValue Ret;
10839     TargetLowering::MakeLibCallOptions CallOptions;
10840     CallOptions.setSExt(Signed);
10841     CallOptions.setIsPostTypeLegalization(true);
10842     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10843       // Halves of WideVT are packed into registers in different order
10844       // depending on platform endianness. This is usually handled by
10845       // the C calling convention, but we can't defer to it in
10846       // the legalizer.
10847       SDValue Args[] = {LL, LH, RL, RH};
10848       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10849     } else {
10850       SDValue Args[] = {LH, LL, RH, RL};
10851       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10852     }
10853     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10854            "Ret value is a collection of constituent nodes holding result.");
10855     if (DAG.getDataLayout().isLittleEndian()) {
10856       // Same as above.
10857       Lo = Ret.getOperand(0);
10858       Hi = Ret.getOperand(1);
10859     } else {
10860       Lo = Ret.getOperand(1);
10861       Hi = Ret.getOperand(0);
10862     }
10863   }
10864 }
10865 
10866 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10867                                         bool Signed, const SDValue LHS,
10868                                         const SDValue RHS, SDValue &Lo,
10869                                         SDValue &Hi) const {
10870   EVT VT = LHS.getValueType();
10871   assert(RHS.getValueType() == VT && "Mismatching operand types");
10872 
10873   SDValue HiLHS;
10874   SDValue HiRHS;
10875   if (Signed) {
10876     // The high part is obtained by SRA'ing all but one of the bits of low
10877     // part.
10878     unsigned LoSize = VT.getFixedSizeInBits();
10879     HiLHS = DAG.getNode(
10880         ISD::SRA, dl, VT, LHS,
10881         DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10882     HiRHS = DAG.getNode(
10883         ISD::SRA, dl, VT, RHS,
10884         DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10885   } else {
10886     HiLHS = DAG.getConstant(0, dl, VT);
10887     HiRHS = DAG.getConstant(0, dl, VT);
10888   }
10889   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10890   forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10891 }
10892 
10893 SDValue
10894 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
10895   assert((Node->getOpcode() == ISD::SMULFIX ||
10896           Node->getOpcode() == ISD::UMULFIX ||
10897           Node->getOpcode() == ISD::SMULFIXSAT ||
10898           Node->getOpcode() == ISD::UMULFIXSAT) &&
10899          "Expected a fixed point multiplication opcode");
10900 
10901   SDLoc dl(Node);
10902   SDValue LHS = Node->getOperand(0);
10903   SDValue RHS = Node->getOperand(1);
10904   EVT VT = LHS.getValueType();
10905   unsigned Scale = Node->getConstantOperandVal(2);
10906   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10907                      Node->getOpcode() == ISD::UMULFIXSAT);
10908   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10909                  Node->getOpcode() == ISD::SMULFIXSAT);
10910   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10911   unsigned VTSize = VT.getScalarSizeInBits();
10912 
10913   if (!Scale) {
10914     // [us]mul.fix(a, b, 0) -> mul(a, b)
10915     if (!Saturating) {
10916       if (isOperationLegalOrCustom(ISD::MUL, VT))
10917         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10918     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10919       SDValue Result =
10920           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10921       SDValue Product = Result.getValue(0);
10922       SDValue Overflow = Result.getValue(1);
10923       SDValue Zero = DAG.getConstant(0, dl, VT);
10924 
10925       APInt MinVal = APInt::getSignedMinValue(VTSize);
10926       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10927       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10928       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10929       // Xor the inputs, if resulting sign bit is 0 the product will be
10930       // positive, else negative.
10931       SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10932       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10933       Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10934       return DAG.getSelect(dl, VT, Overflow, Result, Product);
10935     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10936       SDValue Result =
10937           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10938       SDValue Product = Result.getValue(0);
10939       SDValue Overflow = Result.getValue(1);
10940 
10941       APInt MaxVal = APInt::getMaxValue(VTSize);
10942       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10943       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10944     }
10945   }
10946 
10947   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10948          "Expected scale to be less than the number of bits if signed or at "
10949          "most the number of bits if unsigned.");
10950   assert(LHS.getValueType() == RHS.getValueType() &&
10951          "Expected both operands to be the same type");
10952 
10953   // Get the upper and lower bits of the result.
10954   SDValue Lo, Hi;
10955   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10956   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10957   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
10958   if (VT.isVector())
10959     WideVT =
10960         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
10961   if (isOperationLegalOrCustom(LoHiOp, VT)) {
10962     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10963     Lo = Result.getValue(0);
10964     Hi = Result.getValue(1);
10965   } else if (isOperationLegalOrCustom(HiOp, VT)) {
10966     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10967     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10968   } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
10969     // Try for a multiplication using a wider type.
10970     unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10971     SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
10972     SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
10973     SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
10974     Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
10975     SDValue Shifted =
10976         DAG.getNode(ISD::SRA, dl, WideVT, Res,
10977                     DAG.getShiftAmountConstant(VTSize, WideVT, dl));
10978     Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
10979   } else if (VT.isVector()) {
10980     return SDValue();
10981   } else {
10982     forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10983   }
10984 
10985   if (Scale == VTSize)
10986     // Result is just the top half since we'd be shifting by the width of the
10987     // operand. Overflow impossible so this works for both UMULFIX and
10988     // UMULFIXSAT.
10989     return Hi;
10990 
10991   // The result will need to be shifted right by the scale since both operands
10992   // are scaled. The result is given to us in 2 halves, so we only want part of
10993   // both in the result.
10994   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10995                                DAG.getShiftAmountConstant(Scale, VT, dl));
10996   if (!Saturating)
10997     return Result;
10998 
10999   if (!Signed) {
11000     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11001     // widened multiplication) aren't all zeroes.
11002 
11003     // Saturate to max if ((Hi >> Scale) != 0),
11004     // which is the same as if (Hi > ((1 << Scale) - 1))
11005     APInt MaxVal = APInt::getMaxValue(VTSize);
11006     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11007                                       dl, VT);
11008     Result = DAG.getSelectCC(dl, Hi, LowMask,
11009                              DAG.getConstant(MaxVal, dl, VT), Result,
11010                              ISD::SETUGT);
11011 
11012     return Result;
11013   }
11014 
11015   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11016   // widened multiplication) aren't all ones or all zeroes.
11017 
11018   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11019   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11020 
11021   if (Scale == 0) {
11022     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11023                                DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11024     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11025     // Saturated to SatMin if wide product is negative, and SatMax if wide
11026     // product is positive ...
11027     SDValue Zero = DAG.getConstant(0, dl, VT);
11028     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11029                                                ISD::SETLT);
11030     // ... but only if we overflowed.
11031     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11032   }
11033 
11034   //  We handled Scale==0 above so all the bits to examine is in Hi.
11035 
11036   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11037   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11038   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11039                                     dl, VT);
11040   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11041   // Saturate to min if (Hi >> (Scale - 1)) < -1),
11042   // which is the same as if (HI < (-1 << (Scale - 1))
11043   SDValue HighMask =
11044       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11045                       dl, VT);
11046   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11047   return Result;
11048 }
11049 
11050 SDValue
11051 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11052                                     SDValue LHS, SDValue RHS,
11053                                     unsigned Scale, SelectionDAG &DAG) const {
11054   assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11055           Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11056          "Expected a fixed point division opcode");
11057 
11058   EVT VT = LHS.getValueType();
11059   bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11060   bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11061   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11062 
11063   // If there is enough room in the type to upscale the LHS or downscale the
11064   // RHS before the division, we can perform it in this type without having to
11065   // resize. For signed operations, the LHS headroom is the number of
11066   // redundant sign bits, and for unsigned ones it is the number of zeroes.
11067   // The headroom for the RHS is the number of trailing zeroes.
11068   unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11069                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
11070   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11071 
11072   // For signed saturating operations, we need to be able to detect true integer
11073   // division overflow; that is, when you have MIN / -EPS. However, this
11074   // is undefined behavior and if we emit divisions that could take such
11075   // values it may cause undesired behavior (arithmetic exceptions on x86, for
11076   // example).
11077   // Avoid this by requiring an extra bit so that we never get this case.
11078   // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11079   // signed saturating division, we need to emit a whopping 32-bit division.
11080   if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11081     return SDValue();
11082 
11083   unsigned LHSShift = std::min(LHSLead, Scale);
11084   unsigned RHSShift = Scale - LHSShift;
11085 
11086   // At this point, we know that if we shift the LHS up by LHSShift and the
11087   // RHS down by RHSShift, we can emit a regular division with a final scaling
11088   // factor of Scale.
11089 
11090   if (LHSShift)
11091     LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11092                       DAG.getShiftAmountConstant(LHSShift, VT, dl));
11093   if (RHSShift)
11094     RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11095                       DAG.getShiftAmountConstant(RHSShift, VT, dl));
11096 
11097   SDValue Quot;
11098   if (Signed) {
11099     // For signed operations, if the resulting quotient is negative and the
11100     // remainder is nonzero, subtract 1 from the quotient to round towards
11101     // negative infinity.
11102     SDValue Rem;
11103     // FIXME: Ideally we would always produce an SDIVREM here, but if the
11104     // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11105     // we couldn't just form a libcall, but the type legalizer doesn't do it.
11106     if (isTypeLegal(VT) &&
11107         isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
11108       Quot = DAG.getNode(ISD::SDIVREM, dl,
11109                          DAG.getVTList(VT, VT),
11110                          LHS, RHS);
11111       Rem = Quot.getValue(1);
11112       Quot = Quot.getValue(0);
11113     } else {
11114       Quot = DAG.getNode(ISD::SDIV, dl, VT,
11115                          LHS, RHS);
11116       Rem = DAG.getNode(ISD::SREM, dl, VT,
11117                         LHS, RHS);
11118     }
11119     SDValue Zero = DAG.getConstant(0, dl, VT);
11120     SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11121     SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11122     SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11123     SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11124     SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11125                                DAG.getConstant(1, dl, VT));
11126     Quot = DAG.getSelect(dl, VT,
11127                          DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11128                          Sub1, Quot);
11129   } else
11130     Quot = DAG.getNode(ISD::UDIV, dl, VT,
11131                        LHS, RHS);
11132 
11133   return Quot;
11134 }
11135 
11136 void TargetLowering::expandUADDSUBO(
11137     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11138   SDLoc dl(Node);
11139   SDValue LHS = Node->getOperand(0);
11140   SDValue RHS = Node->getOperand(1);
11141   bool IsAdd = Node->getOpcode() == ISD::UADDO;
11142 
11143   // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11144   unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11145   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11146     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11147     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11148                                     { LHS, RHS, CarryIn });
11149     Result = SDValue(NodeCarry.getNode(), 0);
11150     Overflow = SDValue(NodeCarry.getNode(), 1);
11151     return;
11152   }
11153 
11154   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11155                             LHS.getValueType(), LHS, RHS);
11156 
11157   EVT ResultType = Node->getValueType(1);
11158   EVT SetCCType = getSetCCResultType(
11159       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11160   SDValue SetCC;
11161   if (IsAdd && isOneConstant(RHS)) {
11162     // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11163     // the live range of X. We assume comparing with 0 is cheap.
11164     // The general case (X + C) < C is not necessarily beneficial. Although we
11165     // reduce the live range of X, we may introduce the materialization of
11166     // constant C.
11167     SetCC =
11168         DAG.getSetCC(dl, SetCCType, Result,
11169                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11170   } else if (IsAdd && isAllOnesConstant(RHS)) {
11171     // Special case: uaddo X, -1 overflows if X != 0.
11172     SetCC =
11173         DAG.getSetCC(dl, SetCCType, LHS,
11174                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11175   } else {
11176     ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11177     SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11178   }
11179   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11180 }
11181 
11182 void TargetLowering::expandSADDSUBO(
11183     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11184   SDLoc dl(Node);
11185   SDValue LHS = Node->getOperand(0);
11186   SDValue RHS = Node->getOperand(1);
11187   bool IsAdd = Node->getOpcode() == ISD::SADDO;
11188 
11189   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11190                             LHS.getValueType(), LHS, RHS);
11191 
11192   EVT ResultType = Node->getValueType(1);
11193   EVT OType = getSetCCResultType(
11194       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11195 
11196   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11197   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11198   if (isOperationLegal(OpcSat, LHS.getValueType())) {
11199     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11200     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11201     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11202     return;
11203   }
11204 
11205   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11206 
11207   // For an addition, the result should be less than one of the operands (LHS)
11208   // if and only if the other operand (RHS) is negative, otherwise there will
11209   // be overflow.
11210   // For a subtraction, the result should be less than one of the operands
11211   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11212   // otherwise there will be overflow.
11213   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11214   SDValue ConditionRHS =
11215       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11216 
11217   Overflow = DAG.getBoolExtOrTrunc(
11218       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11219       ResultType, ResultType);
11220 }
11221 
11222 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11223                                 SDValue &Overflow, SelectionDAG &DAG) const {
11224   SDLoc dl(Node);
11225   EVT VT = Node->getValueType(0);
11226   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11227   SDValue LHS = Node->getOperand(0);
11228   SDValue RHS = Node->getOperand(1);
11229   bool isSigned = Node->getOpcode() == ISD::SMULO;
11230 
11231   // For power-of-two multiplications we can use a simpler shift expansion.
11232   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11233     const APInt &C = RHSC->getAPIntValue();
11234     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11235     if (C.isPowerOf2()) {
11236       // smulo(x, signed_min) is same as umulo(x, signed_min).
11237       bool UseArithShift = isSigned && !C.isMinSignedValue();
11238       SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11239       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11240       Overflow = DAG.getSetCC(dl, SetCCVT,
11241           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11242                       dl, VT, Result, ShiftAmt),
11243           LHS, ISD::SETNE);
11244       return true;
11245     }
11246   }
11247 
11248   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11249   if (VT.isVector())
11250     WideVT =
11251         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
11252 
11253   SDValue BottomHalf;
11254   SDValue TopHalf;
11255   static const unsigned Ops[2][3] =
11256       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
11257         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
11258   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11259     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11260     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11261   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11262     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11263                              RHS);
11264     TopHalf = BottomHalf.getValue(1);
11265   } else if (isTypeLegal(WideVT)) {
11266     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11267     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11268     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11269     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11270     SDValue ShiftAmt =
11271         DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11272     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11273                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11274   } else {
11275     if (VT.isVector())
11276       return false;
11277 
11278     forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11279   }
11280 
11281   Result = BottomHalf;
11282   if (isSigned) {
11283     SDValue ShiftAmt = DAG.getShiftAmountConstant(
11284         VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11285     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11286     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11287   } else {
11288     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11289                             DAG.getConstant(0, dl, VT), ISD::SETNE);
11290   }
11291 
11292   // Truncate the result if SetCC returns a larger type than needed.
11293   EVT RType = Node->getValueType(1);
11294   if (RType.bitsLT(Overflow.getValueType()))
11295     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11296 
11297   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11298          "Unexpected result type for S/UMULO legalization");
11299   return true;
11300 }
11301 
11302 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
11303   SDLoc dl(Node);
11304   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11305   SDValue Op = Node->getOperand(0);
11306   EVT VT = Op.getValueType();
11307 
11308   if (VT.isScalableVector())
11309     report_fatal_error(
11310         "Expanding reductions for scalable vectors is undefined.");
11311 
11312   // Try to use a shuffle reduction for power of two vectors.
11313   if (VT.isPow2VectorType()) {
11314     while (VT.getVectorNumElements() > 1) {
11315       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11316       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11317         break;
11318 
11319       SDValue Lo, Hi;
11320       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11321       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11322       VT = HalfVT;
11323     }
11324   }
11325 
11326   EVT EltVT = VT.getVectorElementType();
11327   unsigned NumElts = VT.getVectorNumElements();
11328 
11329   SmallVector<SDValue, 8> Ops;
11330   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11331 
11332   SDValue Res = Ops[0];
11333   for (unsigned i = 1; i < NumElts; i++)
11334     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11335 
11336   // Result type may be wider than element type.
11337   if (EltVT != Node->getValueType(0))
11338     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11339   return Res;
11340 }
11341 
11342 SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
11343   SDLoc dl(Node);
11344   SDValue AccOp = Node->getOperand(0);
11345   SDValue VecOp = Node->getOperand(1);
11346   SDNodeFlags Flags = Node->getFlags();
11347 
11348   EVT VT = VecOp.getValueType();
11349   EVT EltVT = VT.getVectorElementType();
11350 
11351   if (VT.isScalableVector())
11352     report_fatal_error(
11353         "Expanding reductions for scalable vectors is undefined.");
11354 
11355   unsigned NumElts = VT.getVectorNumElements();
11356 
11357   SmallVector<SDValue, 8> Ops;
11358   DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11359 
11360   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11361 
11362   SDValue Res = AccOp;
11363   for (unsigned i = 0; i < NumElts; i++)
11364     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11365 
11366   return Res;
11367 }
11368 
11369 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11370                                SelectionDAG &DAG) const {
11371   EVT VT = Node->getValueType(0);
11372   SDLoc dl(Node);
11373   bool isSigned = Node->getOpcode() == ISD::SREM;
11374   unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11375   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11376   SDValue Dividend = Node->getOperand(0);
11377   SDValue Divisor = Node->getOperand(1);
11378   if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11379     SDVTList VTs = DAG.getVTList(VT, VT);
11380     Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11381     return true;
11382   }
11383   if (isOperationLegalOrCustom(DivOpc, VT)) {
11384     // X % Y -> X-X/Y*Y
11385     SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11386     SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11387     Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11388     return true;
11389   }
11390   return false;
11391 }
11392 
11393 SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11394                                             SelectionDAG &DAG) const {
11395   bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11396   SDLoc dl(SDValue(Node, 0));
11397   SDValue Src = Node->getOperand(0);
11398 
11399   // DstVT is the result type, while SatVT is the size to which we saturate
11400   EVT SrcVT = Src.getValueType();
11401   EVT DstVT = Node->getValueType(0);
11402 
11403   EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11404   unsigned SatWidth = SatVT.getScalarSizeInBits();
11405   unsigned DstWidth = DstVT.getScalarSizeInBits();
11406   assert(SatWidth <= DstWidth &&
11407          "Expected saturation width smaller than result width");
11408 
11409   // Determine minimum and maximum integer values and their corresponding
11410   // floating-point values.
11411   APInt MinInt, MaxInt;
11412   if (IsSigned) {
11413     MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11414     MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11415   } else {
11416     MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11417     MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11418   }
11419 
11420   // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11421   // libcall emission cannot handle this. Large result types will fail.
11422   if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11423     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11424     SrcVT = Src.getValueType();
11425   }
11426 
11427   const fltSemantics &Sem = SrcVT.getFltSemantics();
11428   APFloat MinFloat(Sem);
11429   APFloat MaxFloat(Sem);
11430 
11431   APFloat::opStatus MinStatus =
11432       MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11433   APFloat::opStatus MaxStatus =
11434       MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11435   bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11436                              !(MaxStatus & APFloat::opStatus::opInexact);
11437 
11438   SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11439   SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11440 
11441   // If the integer bounds are exactly representable as floats and min/max are
11442   // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11443   // of comparisons and selects.
11444   bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11445                      isOperationLegal(ISD::FMAXNUM, SrcVT);
11446   if (AreExactFloatBounds && MinMaxLegal) {
11447     SDValue Clamped = Src;
11448 
11449     // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11450     Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11451     // Clamp by MaxFloat from above. NaN cannot occur.
11452     Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11453     // Convert clamped value to integer.
11454     SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11455                                   dl, DstVT, Clamped);
11456 
11457     // In the unsigned case we're done, because we mapped NaN to MinFloat,
11458     // which will cast to zero.
11459     if (!IsSigned)
11460       return FpToInt;
11461 
11462     // Otherwise, select 0 if Src is NaN.
11463     SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11464     EVT SetCCVT =
11465         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11466     SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11467     return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11468   }
11469 
11470   SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11471   SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11472 
11473   // Result of direct conversion. The assumption here is that the operation is
11474   // non-trapping and it's fine to apply it to an out-of-range value if we
11475   // select it away later.
11476   SDValue FpToInt =
11477       DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11478 
11479   SDValue Select = FpToInt;
11480 
11481   EVT SetCCVT =
11482       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11483 
11484   // If Src ULT MinFloat, select MinInt. In particular, this also selects
11485   // MinInt if Src is NaN.
11486   SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11487   Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11488   // If Src OGT MaxFloat, select MaxInt.
11489   SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11490   Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11491 
11492   // In the unsigned case we are done, because we mapped NaN to MinInt, which
11493   // is already zero.
11494   if (!IsSigned)
11495     return Select;
11496 
11497   // Otherwise, select 0 if Src is NaN.
11498   SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11499   SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11500   return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11501 }
11502 
11503 SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
11504                                                 const SDLoc &dl,
11505                                                 SelectionDAG &DAG) const {
11506   EVT OperandVT = Op.getValueType();
11507   if (OperandVT.getScalarType() == ResultVT.getScalarType())
11508     return Op;
11509   EVT ResultIntVT = ResultVT.changeTypeToInteger();
11510   // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11511   // can induce double-rounding which may alter the results. We can
11512   // correct for this using a trick explained in: Boldo, Sylvie, and
11513   // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11514   // World Congress. 2005.
11515   unsigned BitSize = OperandVT.getScalarSizeInBits();
11516   EVT WideIntVT = OperandVT.changeTypeToInteger();
11517   SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11518   SDValue SignBit =
11519       DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11520                   DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11521   SDValue AbsWide;
11522   if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11523     AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11524   } else {
11525     SDValue ClearedSign = DAG.getNode(
11526         ISD::AND, dl, WideIntVT, OpAsInt,
11527         DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11528     AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11529   }
11530   SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11531   SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11532 
11533   // We can keep the narrow value as-is if narrowing was exact (no
11534   // rounding error), the wide value was NaN (the narrow value is also
11535   // NaN and should be preserved) or if we rounded to the odd value.
11536   SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11537   SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11538   SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11539   SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11540   EVT ResultIntVTCCVT = getSetCCResultType(
11541       DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11542   SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11543   // The result is already odd so we don't need to do anything.
11544   SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11545 
11546   EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11547                                        AbsWide.getValueType());
11548   // We keep results which are exact, odd or NaN.
11549   SDValue KeepNarrow =
11550       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11551   KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11552   // We morally performed a round-down if AbsNarrow is smaller than
11553   // AbsWide.
11554   SDValue NarrowIsRd =
11555       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11556   // If the narrow value is odd or exact, pick it.
11557   // Otherwise, narrow is even and corresponds to either the rounded-up
11558   // or rounded-down value. If narrow is the rounded-down value, we want
11559   // the rounded-up value as it will be odd.
11560   SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11561   SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11562   Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11563   int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11564   SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11565   SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11566   SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11567   Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11568   return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11569 }
11570 
11571 SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
11572   assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11573   SDValue Op = Node->getOperand(0);
11574   EVT VT = Node->getValueType(0);
11575   SDLoc dl(Node);
11576   if (VT.getScalarType() == MVT::bf16) {
11577     if (Node->getConstantOperandVal(1) == 1) {
11578       return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11579     }
11580     EVT OperandVT = Op.getValueType();
11581     SDValue IsNaN = DAG.getSetCC(
11582         dl,
11583         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11584         Op, Op, ISD::SETUO);
11585 
11586     // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11587     // can induce double-rounding which may alter the results. We can
11588     // correct for this using a trick explained in: Boldo, Sylvie, and
11589     // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11590     // World Congress. 2005.
11591     EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11592     EVT I32 = F32.changeTypeToInteger();
11593     Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11594     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11595 
11596     // Conversions should set NaN's quiet bit. This also prevents NaNs from
11597     // turning into infinities.
11598     SDValue NaN =
11599         DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11600 
11601     // Factor in the contribution of the low 16 bits.
11602     SDValue One = DAG.getConstant(1, dl, I32);
11603     SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11604                               DAG.getShiftAmountConstant(16, I32, dl));
11605     Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11606     SDValue RoundingBias =
11607         DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11608     SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11609 
11610     // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11611     // 0x80000000.
11612     Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11613 
11614     // Now that we have rounded, shift the bits into position.
11615     Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11616                      DAG.getShiftAmountConstant(16, I32, dl));
11617     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11618     EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11619     Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11620     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11621   }
11622   return SDValue();
11623 }
11624 
11625 SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11626                                            SelectionDAG &DAG) const {
11627   assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11628   assert(Node->getValueType(0).isScalableVector() &&
11629          "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11630 
11631   EVT VT = Node->getValueType(0);
11632   SDValue V1 = Node->getOperand(0);
11633   SDValue V2 = Node->getOperand(1);
11634   int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11635   SDLoc DL(Node);
11636 
11637   // Expand through memory thusly:
11638   //  Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11639   //  Store V1, Ptr
11640   //  Store V2, Ptr + sizeof(V1)
11641   //  If (Imm < 0)
11642   //    TrailingElts = -Imm
11643   //    Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11644   //  else
11645   //    Ptr = Ptr + (Imm * sizeof(VT.Elt))
11646   //  Res = Load Ptr
11647 
11648   Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11649 
11650   EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
11651                                VT.getVectorElementCount() * 2);
11652   SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11653   EVT PtrVT = StackPtr.getValueType();
11654   auto &MF = DAG.getMachineFunction();
11655   auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11656   auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11657 
11658   // Store the lo part of CONCAT_VECTORS(V1, V2)
11659   SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11660   // Store the hi part of CONCAT_VECTORS(V1, V2)
11661   SDValue OffsetToV2 = DAG.getVScale(
11662       DL, PtrVT,
11663       APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11664   SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11665   SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11666 
11667   if (Imm >= 0) {
11668     // Load back the required element. getVectorElementPointer takes care of
11669     // clamping the index if it's out-of-bounds.
11670     StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11671     // Load the spliced result
11672     return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11673                        MachinePointerInfo::getUnknownStack(MF));
11674   }
11675 
11676   uint64_t TrailingElts = -Imm;
11677 
11678   // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11679   TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11680   SDValue TrailingBytes =
11681       DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11682 
11683   if (TrailingElts > VT.getVectorMinNumElements()) {
11684     SDValue VLBytes =
11685         DAG.getVScale(DL, PtrVT,
11686                       APInt(PtrVT.getFixedSizeInBits(),
11687                             VT.getStoreSize().getKnownMinValue()));
11688     TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11689   }
11690 
11691   // Calculate the start address of the spliced result.
11692   StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11693 
11694   // Load the spliced result
11695   return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11696                      MachinePointerInfo::getUnknownStack(MF));
11697 }
11698 
11699 SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
11700                                               SelectionDAG &DAG) const {
11701   SDLoc DL(Node);
11702   SDValue Vec = Node->getOperand(0);
11703   SDValue Mask = Node->getOperand(1);
11704   SDValue Passthru = Node->getOperand(2);
11705 
11706   EVT VecVT = Vec.getValueType();
11707   EVT ScalarVT = VecVT.getScalarType();
11708   EVT MaskVT = Mask.getValueType();
11709   EVT MaskScalarVT = MaskVT.getScalarType();
11710 
11711   // Needs to be handled by targets that have scalable vector types.
11712   if (VecVT.isScalableVector())
11713     report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11714 
11715   SDValue StackPtr = DAG.CreateStackTemporary(
11716       VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11717   int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11718   MachinePointerInfo PtrInfo =
11719       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
11720 
11721   MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11722   SDValue Chain = DAG.getEntryNode();
11723   SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11724 
11725   bool HasPassthru = !Passthru.isUndef();
11726 
11727   // If we have a passthru vector, store it on the stack, overwrite the matching
11728   // positions and then re-write the last element that was potentially
11729   // overwritten even though mask[i] = false.
11730   if (HasPassthru)
11731     Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11732 
11733   SDValue LastWriteVal;
11734   APInt PassthruSplatVal;
11735   bool IsSplatPassthru =
11736       ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11737 
11738   if (IsSplatPassthru) {
11739     // As we do not know which position we wrote to last, we cannot simply
11740     // access that index from the passthru vector. So we first check if passthru
11741     // is a splat vector, to use any element ...
11742     LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11743   } else if (HasPassthru) {
11744     // ... if it is not a splat vector, we need to get the passthru value at
11745     // position = popcount(mask) and re-load it from the stack before it is
11746     // overwritten in the loop below.
11747     EVT PopcountVT = ScalarVT.changeTypeToInteger();
11748     SDValue Popcount = DAG.getNode(
11749         ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11750     Popcount =
11751         DAG.getNode(ISD::ZERO_EXTEND, DL,
11752                     MaskVT.changeVectorElementType(PopcountVT), Popcount);
11753     Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
11754     SDValue LastElmtPtr =
11755         getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11756     LastWriteVal = DAG.getLoad(
11757         ScalarVT, DL, Chain, LastElmtPtr,
11758         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11759     Chain = LastWriteVal.getValue(1);
11760   }
11761 
11762   unsigned NumElms = VecVT.getVectorNumElements();
11763   for (unsigned I = 0; I < NumElms; I++) {
11764     SDValue Idx = DAG.getVectorIdxConstant(I, DL);
11765 
11766     SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11767     SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11768     Chain = DAG.getStore(
11769         Chain, DL, ValI, OutPtr,
11770         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11771 
11772     // Get the mask value and add it to the current output position. This
11773     // either increments by 1 if MaskI is true or adds 0 otherwise.
11774     // Freeze in case we have poison/undef mask entries.
11775     SDValue MaskI = DAG.getFreeze(
11776         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11777     MaskI = DAG.getFreeze(MaskI);
11778     MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11779     MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11780     OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11781 
11782     if (HasPassthru && I == NumElms - 1) {
11783       SDValue EndOfVector =
11784           DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11785       SDValue AllLanesSelected =
11786           DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11787       OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11788       OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11789 
11790       // Re-write the last ValI if all lanes were selected. Otherwise,
11791       // overwrite the last write it with the passthru value.
11792       LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
11793                                    LastWriteVal, SDNodeFlags::Unpredictable);
11794       Chain = DAG.getStore(
11795           Chain, DL, LastWriteVal, OutPtr,
11796           MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11797     }
11798   }
11799 
11800   return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11801 }
11802 
11803 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
11804                                            SDValue &LHS, SDValue &RHS,
11805                                            SDValue &CC, SDValue Mask,
11806                                            SDValue EVL, bool &NeedInvert,
11807                                            const SDLoc &dl, SDValue &Chain,
11808                                            bool IsSignaling) const {
11809   MVT OpVT = LHS.getSimpleValueType();
11810   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11811   NeedInvert = false;
11812   assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11813   bool IsNonVP = !EVL;
11814   switch (getCondCodeAction(CCCode, OpVT)) {
11815   default:
11816     llvm_unreachable("Unknown condition code action!");
11817   case TargetLowering::Legal:
11818     // Nothing to do.
11819     break;
11820   case TargetLowering::Expand: {
11821     ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
11822     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11823       std::swap(LHS, RHS);
11824       CC = DAG.getCondCode(InvCC);
11825       return true;
11826     }
11827     // Swapping operands didn't work. Try inverting the condition.
11828     bool NeedSwap = false;
11829     InvCC = getSetCCInverse(CCCode, OpVT);
11830     if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
11831       // If inverting the condition is not enough, try swapping operands
11832       // on top of it.
11833       InvCC = ISD::getSetCCSwappedOperands(InvCC);
11834       NeedSwap = true;
11835     }
11836     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11837       CC = DAG.getCondCode(InvCC);
11838       NeedInvert = true;
11839       if (NeedSwap)
11840         std::swap(LHS, RHS);
11841       return true;
11842     }
11843 
11844     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
11845     unsigned Opc = 0;
11846     switch (CCCode) {
11847     default:
11848       llvm_unreachable("Don't know how to expand this condition!");
11849     case ISD::SETUO:
11850       if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
11851         CC1 = ISD::SETUNE;
11852         CC2 = ISD::SETUNE;
11853         Opc = ISD::OR;
11854         break;
11855       }
11856       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11857              "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11858       NeedInvert = true;
11859       [[fallthrough]];
11860     case ISD::SETO:
11861       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11862              "If SETO is expanded, SETOEQ must be legal!");
11863       CC1 = ISD::SETOEQ;
11864       CC2 = ISD::SETOEQ;
11865       Opc = ISD::AND;
11866       break;
11867     case ISD::SETONE:
11868     case ISD::SETUEQ:
11869       // If the SETUO or SETO CC isn't legal, we might be able to use
11870       // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11871       // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11872       // the operands.
11873       CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11874       if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
11875                                           isCondCodeLegal(ISD::SETOLT, OpVT))) {
11876         CC1 = ISD::SETOGT;
11877         CC2 = ISD::SETOLT;
11878         Opc = ISD::OR;
11879         NeedInvert = ((unsigned)CCCode & 0x8U);
11880         break;
11881       }
11882       [[fallthrough]];
11883     case ISD::SETOEQ:
11884     case ISD::SETOGT:
11885     case ISD::SETOGE:
11886     case ISD::SETOLT:
11887     case ISD::SETOLE:
11888     case ISD::SETUNE:
11889     case ISD::SETUGT:
11890     case ISD::SETUGE:
11891     case ISD::SETULT:
11892     case ISD::SETULE:
11893       // If we are floating point, assign and break, otherwise fall through.
11894       if (!OpVT.isInteger()) {
11895         // We can use the 4th bit to tell if we are the unordered
11896         // or ordered version of the opcode.
11897         CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11898         Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11899         CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11900         break;
11901       }
11902       // Fallthrough if we are unsigned integer.
11903       [[fallthrough]];
11904     case ISD::SETLE:
11905     case ISD::SETGT:
11906     case ISD::SETGE:
11907     case ISD::SETLT:
11908     case ISD::SETNE:
11909     case ISD::SETEQ:
11910       // If all combinations of inverting the condition and swapping operands
11911       // didn't work then we have no means to expand the condition.
11912       llvm_unreachable("Don't know how to expand this condition!");
11913     }
11914 
11915     SDValue SetCC1, SetCC2;
11916     if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11917       // If we aren't the ordered or unorder operation,
11918       // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11919       if (IsNonVP) {
11920         SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11921         SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11922       } else {
11923         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11924         SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11925       }
11926     } else {
11927       // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11928       if (IsNonVP) {
11929         SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11930         SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11931       } else {
11932         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11933         SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11934       }
11935     }
11936     if (Chain)
11937       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11938                           SetCC2.getValue(1));
11939     if (IsNonVP)
11940       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11941     else {
11942       // Transform the binary opcode to the VP equivalent.
11943       assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11944       Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11945       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11946     }
11947     RHS = SDValue();
11948     CC = SDValue();
11949     return true;
11950   }
11951   }
11952   return false;
11953 }
11954 
11955 SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
11956                                                       SelectionDAG &DAG) const {
11957   EVT VT = Node->getValueType(0);
11958   // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
11959   // split into two equal parts.
11960   if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
11961     return SDValue();
11962 
11963   // Restrict expansion to cases where both parts can be concatenated.
11964   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
11965   if (LoVT != HiVT || !isTypeLegal(LoVT))
11966     return SDValue();
11967 
11968   SDLoc DL(Node);
11969   unsigned Opcode = Node->getOpcode();
11970 
11971   // Don't expand if the result is likely to be unrolled anyway.
11972   if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
11973     return SDValue();
11974 
11975   SmallVector<SDValue, 4> LoOps, HiOps;
11976   for (const SDValue &V : Node->op_values()) {
11977     auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
11978     LoOps.push_back(Lo);
11979     HiOps.push_back(Hi);
11980   }
11981 
11982   SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
11983   SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
11984   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
11985 }
11986