xref: /llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (revision 067e8b8dc54b2558548c248ae851a0e01cb05878)
1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Analysis/VectorUtils.h"
16 #include "llvm/CodeGen/CallingConvLower.h"
17 #include "llvm/CodeGen/CodeGenCommonISel.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineJumpTableInfo.h"
21 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/TargetRegisterInfo.h"
25 #include "llvm/IR/DataLayout.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/Support/DivisionByConstantInfo.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/KnownBits.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include <cctype>
37 using namespace llvm;
38 
39 /// NOTE: The TargetMachine owns TLOF.
40 TargetLowering::TargetLowering(const TargetMachine &tm)
41     : TargetLoweringBase(tm) {}
42 
43 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44   return nullptr;
45 }
46 
47 bool TargetLowering::isPositionIndependent() const {
48   return getTargetMachine().isPositionIndependent();
49 }
50 
51 /// Check whether a given call node is in tail position within its function. If
52 /// so, it sets Chain to the input chain of the tail call.
53 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54                                           SDValue &Chain) const {
55   const Function &F = DAG.getMachineFunction().getFunction();
56 
57   // First, check if tail calls have been disabled in this function.
58   if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59     return false;
60 
61   // Conservatively require the attributes of the call to match those of
62   // the return. Ignore following attributes because they don't affect the
63   // call sequence.
64   AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65   for (const auto &Attr :
66        {Attribute::Alignment, Attribute::Dereferenceable,
67         Attribute::DereferenceableOrNull, Attribute::NoAlias,
68         Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
69     CallerAttrs.removeAttribute(Attr);
70 
71   if (CallerAttrs.hasAttributes())
72     return false;
73 
74   // It's not safe to eliminate the sign / zero extension of the return value.
75   if (CallerAttrs.contains(Attribute::ZExt) ||
76       CallerAttrs.contains(Attribute::SExt))
77     return false;
78 
79   // Check if the only use is a function return node.
80   return isUsedByReturnOnly(Node, Chain);
81 }
82 
83 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
84     const uint32_t *CallerPreservedMask,
85     const SmallVectorImpl<CCValAssign> &ArgLocs,
86     const SmallVectorImpl<SDValue> &OutVals) const {
87   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
88     const CCValAssign &ArgLoc = ArgLocs[I];
89     if (!ArgLoc.isRegLoc())
90       continue;
91     MCRegister Reg = ArgLoc.getLocReg();
92     // Only look at callee saved registers.
93     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94       continue;
95     // Check that we pass the value used for the caller.
96     // (We look for a CopyFromReg reading a virtual register that is used
97     //  for the function live-in value of register Reg)
98     SDValue Value = OutVals[I];
99     if (Value->getOpcode() == ISD::AssertZext)
100       Value = Value.getOperand(0);
101     if (Value->getOpcode() != ISD::CopyFromReg)
102       return false;
103     Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105       return false;
106   }
107   return true;
108 }
109 
110 /// Set CallLoweringInfo attribute flags based on a call instruction
111 /// and called function attributes.
112 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
113                                                      unsigned ArgIdx) {
114   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116   IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
117   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
118   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
119   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
120   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
121   IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
122   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
123   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
124   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
125   IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
126   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
127   Alignment = Call->getParamStackAlign(ArgIdx);
128   IndirectType = nullptr;
129   assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
130          "multiple ABI attributes?");
131   if (IsByVal) {
132     IndirectType = Call->getParamByValType(ArgIdx);
133     if (!Alignment)
134       Alignment = Call->getParamAlign(ArgIdx);
135   }
136   if (IsPreallocated)
137     IndirectType = Call->getParamPreallocatedType(ArgIdx);
138   if (IsInAlloca)
139     IndirectType = Call->getParamInAllocaType(ArgIdx);
140   if (IsSRet)
141     IndirectType = Call->getParamStructRetType(ArgIdx);
142 }
143 
144 /// Generate a libcall taking the given operands as arguments and returning a
145 /// result of type RetVT.
146 std::pair<SDValue, SDValue>
147 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
148                             ArrayRef<SDValue> Ops,
149                             MakeLibCallOptions CallOptions,
150                             const SDLoc &dl,
151                             SDValue InChain) const {
152   if (!InChain)
153     InChain = DAG.getEntryNode();
154 
155   TargetLowering::ArgListTy Args;
156   Args.reserve(Ops.size());
157 
158   TargetLowering::ArgListEntry Entry;
159   for (unsigned i = 0; i < Ops.size(); ++i) {
160     SDValue NewOp = Ops[i];
161     Entry.Node = NewOp;
162     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
163     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
164                                                  CallOptions.IsSExt);
165     Entry.IsZExt = !Entry.IsSExt;
166 
167     if (CallOptions.IsSoften &&
168         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
169       Entry.IsSExt = Entry.IsZExt = false;
170     }
171     Args.push_back(Entry);
172   }
173 
174   if (LC == RTLIB::UNKNOWN_LIBCALL)
175     report_fatal_error("Unsupported library call operation!");
176   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
177                                          getPointerTy(DAG.getDataLayout()));
178 
179   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180   TargetLowering::CallLoweringInfo CLI(DAG);
181   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
182   bool zeroExtend = !signExtend;
183 
184   if (CallOptions.IsSoften &&
185       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
186     signExtend = zeroExtend = false;
187   }
188 
189   CLI.setDebugLoc(dl)
190       .setChain(InChain)
191       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
192       .setNoReturn(CallOptions.DoesNotReturn)
193       .setDiscardResult(!CallOptions.IsReturnValueUsed)
194       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
195       .setSExtResult(signExtend)
196       .setZExtResult(zeroExtend);
197   return LowerCallTo(CLI);
198 }
199 
200 bool TargetLowering::findOptimalMemOpLowering(
201     std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
202     unsigned SrcAS, const AttributeList &FuncAttributes) const {
203   if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
204       Op.getSrcAlign() < Op.getDstAlign())
205     return false;
206 
207   EVT VT = getOptimalMemOpType(Op, FuncAttributes);
208 
209   if (VT == MVT::Other) {
210     // Use the largest integer type whose alignment constraints are satisfied.
211     // We only need to check DstAlign here as SrcAlign is always greater or
212     // equal to DstAlign (or zero).
213     VT = MVT::LAST_INTEGER_VALUETYPE;
214     if (Op.isFixedDstAlign())
215       while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
216              !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
217         VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
218     assert(VT.isInteger());
219 
220     // Find the largest legal integer type.
221     MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
222     while (!isTypeLegal(LVT))
223       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
224     assert(LVT.isInteger());
225 
226     // If the type we've chosen is larger than the largest legal integer type
227     // then use that instead.
228     if (VT.bitsGT(LVT))
229       VT = LVT;
230   }
231 
232   unsigned NumMemOps = 0;
233   uint64_t Size = Op.size();
234   while (Size) {
235     unsigned VTSize = VT.getSizeInBits() / 8;
236     while (VTSize > Size) {
237       // For now, only use non-vector load / store's for the left-over pieces.
238       EVT NewVT = VT;
239       unsigned NewVTSize;
240 
241       bool Found = false;
242       if (VT.isVector() || VT.isFloatingPoint()) {
243         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
244         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
245             isSafeMemOpType(NewVT.getSimpleVT()))
246           Found = true;
247         else if (NewVT == MVT::i64 &&
248                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
249                  isSafeMemOpType(MVT::f64)) {
250           // i64 is usually not legal on 32-bit targets, but f64 may be.
251           NewVT = MVT::f64;
252           Found = true;
253         }
254       }
255 
256       if (!Found) {
257         do {
258           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
259           if (NewVT == MVT::i8)
260             break;
261         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
262       }
263       NewVTSize = NewVT.getSizeInBits() / 8;
264 
265       // If the new VT cannot cover all of the remaining bits, then consider
266       // issuing a (or a pair of) unaligned and overlapping load / store.
267       unsigned Fast;
268       if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
269           allowsMisalignedMemoryAccesses(
270               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
271               MachineMemOperand::MONone, &Fast) &&
272           Fast)
273         VTSize = Size;
274       else {
275         VT = NewVT;
276         VTSize = NewVTSize;
277       }
278     }
279 
280     if (++NumMemOps > Limit)
281       return false;
282 
283     MemOps.push_back(VT);
284     Size -= VTSize;
285   }
286 
287   return true;
288 }
289 
290 /// Soften the operands of a comparison. This code is shared among BR_CC,
291 /// SELECT_CC, and SETCC handlers.
292 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
293                                          SDValue &NewLHS, SDValue &NewRHS,
294                                          ISD::CondCode &CCCode,
295                                          const SDLoc &dl, const SDValue OldLHS,
296                                          const SDValue OldRHS) const {
297   SDValue Chain;
298   return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
299                              OldRHS, Chain);
300 }
301 
302 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
303                                          SDValue &NewLHS, SDValue &NewRHS,
304                                          ISD::CondCode &CCCode,
305                                          const SDLoc &dl, const SDValue OldLHS,
306                                          const SDValue OldRHS,
307                                          SDValue &Chain,
308                                          bool IsSignaling) const {
309   // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
310   // not supporting it. We can update this code when libgcc provides such
311   // functions.
312 
313   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
314          && "Unsupported setcc type!");
315 
316   // Expand into one or more soft-fp libcall(s).
317   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
318   bool ShouldInvertCC = false;
319   switch (CCCode) {
320   case ISD::SETEQ:
321   case ISD::SETOEQ:
322     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
323           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
324           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
325     break;
326   case ISD::SETNE:
327   case ISD::SETUNE:
328     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
329           (VT == MVT::f64) ? RTLIB::UNE_F64 :
330           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
331     break;
332   case ISD::SETGE:
333   case ISD::SETOGE:
334     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
335           (VT == MVT::f64) ? RTLIB::OGE_F64 :
336           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
337     break;
338   case ISD::SETLT:
339   case ISD::SETOLT:
340     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
341           (VT == MVT::f64) ? RTLIB::OLT_F64 :
342           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
343     break;
344   case ISD::SETLE:
345   case ISD::SETOLE:
346     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
347           (VT == MVT::f64) ? RTLIB::OLE_F64 :
348           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
349     break;
350   case ISD::SETGT:
351   case ISD::SETOGT:
352     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
353           (VT == MVT::f64) ? RTLIB::OGT_F64 :
354           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
355     break;
356   case ISD::SETO:
357     ShouldInvertCC = true;
358     [[fallthrough]];
359   case ISD::SETUO:
360     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
361           (VT == MVT::f64) ? RTLIB::UO_F64 :
362           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
363     break;
364   case ISD::SETONE:
365     // SETONE = O && UNE
366     ShouldInvertCC = true;
367     [[fallthrough]];
368   case ISD::SETUEQ:
369     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
370           (VT == MVT::f64) ? RTLIB::UO_F64 :
371           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
372     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
373           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
374           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
375     break;
376   default:
377     // Invert CC for unordered comparisons
378     ShouldInvertCC = true;
379     switch (CCCode) {
380     case ISD::SETULT:
381       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
382             (VT == MVT::f64) ? RTLIB::OGE_F64 :
383             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
384       break;
385     case ISD::SETULE:
386       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
387             (VT == MVT::f64) ? RTLIB::OGT_F64 :
388             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
389       break;
390     case ISD::SETUGT:
391       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
392             (VT == MVT::f64) ? RTLIB::OLE_F64 :
393             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
394       break;
395     case ISD::SETUGE:
396       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
397             (VT == MVT::f64) ? RTLIB::OLT_F64 :
398             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
399       break;
400     default: llvm_unreachable("Do not know how to soften this setcc!");
401     }
402   }
403 
404   // Use the target specific return value for comparison lib calls.
405   EVT RetVT = getCmpLibcallReturnType();
406   SDValue Ops[2] = {NewLHS, NewRHS};
407   TargetLowering::MakeLibCallOptions CallOptions;
408   EVT OpsVT[2] = { OldLHS.getValueType(),
409                    OldRHS.getValueType() };
410   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
411   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
412   NewLHS = Call.first;
413   NewRHS = DAG.getConstant(0, dl, RetVT);
414 
415   CCCode = getCmpLibcallCC(LC1);
416   if (ShouldInvertCC) {
417     assert(RetVT.isInteger());
418     CCCode = getSetCCInverse(CCCode, RetVT);
419   }
420 
421   if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
422     // Update Chain.
423     Chain = Call.second;
424   } else {
425     EVT SetCCVT =
426         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
427     SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
428     auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
429     CCCode = getCmpLibcallCC(LC2);
430     if (ShouldInvertCC)
431       CCCode = getSetCCInverse(CCCode, RetVT);
432     NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
433     if (Chain)
434       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
435                           Call2.second);
436     NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
437                          Tmp.getValueType(), Tmp, NewLHS);
438     NewRHS = SDValue();
439   }
440 }
441 
442 /// Return the entry encoding for a jump table in the current function. The
443 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444 unsigned TargetLowering::getJumpTableEncoding() const {
445   // In non-pic modes, just use the address of a block.
446   if (!isPositionIndependent())
447     return MachineJumpTableInfo::EK_BlockAddress;
448 
449   // In PIC mode, if the target supports a GPRel32 directive, use it.
450   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
451     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
452 
453   // Otherwise, use a label difference.
454   return MachineJumpTableInfo::EK_LabelDifference32;
455 }
456 
457 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
458                                                  SelectionDAG &DAG) const {
459   // If our PIC model is GP relative, use the global offset table as the base.
460   unsigned JTEncoding = getJumpTableEncoding();
461 
462   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
463       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
464     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
465 
466   return Table;
467 }
468 
469 /// This returns the relocation base for the given PIC jumptable, the same as
470 /// getPICJumpTableRelocBase, but as an MCExpr.
471 const MCExpr *
472 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
473                                              unsigned JTI,MCContext &Ctx) const{
474   // The normal PIC reloc base is the label at the start of the jump table.
475   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
476 }
477 
478 SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
479                                                SDValue Addr, int JTI,
480                                                SelectionDAG &DAG) const {
481   SDValue Chain = Value;
482   // Jump table debug info is only needed if CodeView is enabled.
483   if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
484     Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
485   }
486   return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
487 }
488 
489 bool
490 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
491   const TargetMachine &TM = getTargetMachine();
492   const GlobalValue *GV = GA->getGlobal();
493 
494   // If the address is not even local to this DSO we will have to load it from
495   // a got and then add the offset.
496   if (!TM.shouldAssumeDSOLocal(GV))
497     return false;
498 
499   // If the code is position independent we will have to add a base register.
500   if (isPositionIndependent())
501     return false;
502 
503   // Otherwise we can do it.
504   return true;
505 }
506 
507 //===----------------------------------------------------------------------===//
508 //  Optimization Methods
509 //===----------------------------------------------------------------------===//
510 
511 /// If the specified instruction has a constant integer operand and there are
512 /// bits set in that constant that are not demanded, then clear those bits and
513 /// return true.
514 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
515                                             const APInt &DemandedBits,
516                                             const APInt &DemandedElts,
517                                             TargetLoweringOpt &TLO) const {
518   SDLoc DL(Op);
519   unsigned Opcode = Op.getOpcode();
520 
521   // Early-out if we've ended up calling an undemanded node, leave this to
522   // constant folding.
523   if (DemandedBits.isZero() || DemandedElts.isZero())
524     return false;
525 
526   // Do target-specific constant optimization.
527   if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
528     return TLO.New.getNode();
529 
530   // FIXME: ISD::SELECT, ISD::SELECT_CC
531   switch (Opcode) {
532   default:
533     break;
534   case ISD::XOR:
535   case ISD::AND:
536   case ISD::OR: {
537     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
538     if (!Op1C || Op1C->isOpaque())
539       return false;
540 
541     // If this is a 'not' op, don't touch it because that's a canonical form.
542     const APInt &C = Op1C->getAPIntValue();
543     if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
544       return false;
545 
546     if (!C.isSubsetOf(DemandedBits)) {
547       EVT VT = Op.getValueType();
548       SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
549       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
550                                       Op->getFlags());
551       return TLO.CombineTo(Op, NewOp);
552     }
553 
554     break;
555   }
556   }
557 
558   return false;
559 }
560 
561 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
562                                             const APInt &DemandedBits,
563                                             TargetLoweringOpt &TLO) const {
564   EVT VT = Op.getValueType();
565   APInt DemandedElts = VT.isVector()
566                            ? APInt::getAllOnes(VT.getVectorNumElements())
567                            : APInt(1, 1);
568   return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
569 }
570 
571 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
572 /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
573 /// but it could be generalized for targets with other types of implicit
574 /// widening casts.
575 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
576                                       const APInt &DemandedBits,
577                                       TargetLoweringOpt &TLO) const {
578   assert(Op.getNumOperands() == 2 &&
579          "ShrinkDemandedOp only supports binary operators!");
580   assert(Op.getNode()->getNumValues() == 1 &&
581          "ShrinkDemandedOp only supports nodes with one result!");
582 
583   EVT VT = Op.getValueType();
584   SelectionDAG &DAG = TLO.DAG;
585   SDLoc dl(Op);
586 
587   // Early return, as this function cannot handle vector types.
588   if (VT.isVector())
589     return false;
590 
591   assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
592          Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
593          "ShrinkDemandedOp only supports operands that have the same size!");
594 
595   // Don't do this if the node has another user, which may require the
596   // full value.
597   if (!Op.getNode()->hasOneUse())
598     return false;
599 
600   // Search for the smallest integer type with free casts to and from
601   // Op's type. For expedience, just check power-of-2 integer types.
602   unsigned DemandedSize = DemandedBits.getActiveBits();
603   for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
604        SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
605     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
606     if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
607       // We found a type with free casts.
608       SDValue X = DAG.getNode(
609           Op.getOpcode(), dl, SmallVT,
610           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
611           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
612       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
613       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
614       return TLO.CombineTo(Op, Z);
615     }
616   }
617   return false;
618 }
619 
620 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
621                                           DAGCombinerInfo &DCI) const {
622   SelectionDAG &DAG = DCI.DAG;
623   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
624                         !DCI.isBeforeLegalizeOps());
625   KnownBits Known;
626 
627   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
628   if (Simplified) {
629     DCI.AddToWorklist(Op.getNode());
630     DCI.CommitTargetLoweringOpt(TLO);
631   }
632   return Simplified;
633 }
634 
635 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
636                                           const APInt &DemandedElts,
637                                           DAGCombinerInfo &DCI) const {
638   SelectionDAG &DAG = DCI.DAG;
639   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
640                         !DCI.isBeforeLegalizeOps());
641   KnownBits Known;
642 
643   bool Simplified =
644       SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
645   if (Simplified) {
646     DCI.AddToWorklist(Op.getNode());
647     DCI.CommitTargetLoweringOpt(TLO);
648   }
649   return Simplified;
650 }
651 
652 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
653                                           KnownBits &Known,
654                                           TargetLoweringOpt &TLO,
655                                           unsigned Depth,
656                                           bool AssumeSingleUse) const {
657   EVT VT = Op.getValueType();
658 
659   // Since the number of lanes in a scalable vector is unknown at compile time,
660   // we track one bit which is implicitly broadcast to all lanes.  This means
661   // that all lanes in a scalable vector are considered demanded.
662   APInt DemandedElts = VT.isFixedLengthVector()
663                            ? APInt::getAllOnes(VT.getVectorNumElements())
664                            : APInt(1, 1);
665   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
666                               AssumeSingleUse);
667 }
668 
669 // TODO: Under what circumstances can we create nodes? Constant folding?
670 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
671     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
672     SelectionDAG &DAG, unsigned Depth) const {
673   EVT VT = Op.getValueType();
674 
675   // Limit search depth.
676   if (Depth >= SelectionDAG::MaxRecursionDepth)
677     return SDValue();
678 
679   // Ignore UNDEFs.
680   if (Op.isUndef())
681     return SDValue();
682 
683   // Not demanding any bits/elts from Op.
684   if (DemandedBits == 0 || DemandedElts == 0)
685     return DAG.getUNDEF(VT);
686 
687   bool IsLE = DAG.getDataLayout().isLittleEndian();
688   unsigned NumElts = DemandedElts.getBitWidth();
689   unsigned BitWidth = DemandedBits.getBitWidth();
690   KnownBits LHSKnown, RHSKnown;
691   switch (Op.getOpcode()) {
692   case ISD::BITCAST: {
693     if (VT.isScalableVector())
694       return SDValue();
695 
696     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
697     EVT SrcVT = Src.getValueType();
698     EVT DstVT = Op.getValueType();
699     if (SrcVT == DstVT)
700       return Src;
701 
702     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
703     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
704     if (NumSrcEltBits == NumDstEltBits)
705       if (SDValue V = SimplifyMultipleUseDemandedBits(
706               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
707         return DAG.getBitcast(DstVT, V);
708 
709     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
710       unsigned Scale = NumDstEltBits / NumSrcEltBits;
711       unsigned NumSrcElts = SrcVT.getVectorNumElements();
712       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
713       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
714       for (unsigned i = 0; i != Scale; ++i) {
715         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
716         unsigned BitOffset = EltOffset * NumSrcEltBits;
717         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
718         if (!Sub.isZero()) {
719           DemandedSrcBits |= Sub;
720           for (unsigned j = 0; j != NumElts; ++j)
721             if (DemandedElts[j])
722               DemandedSrcElts.setBit((j * Scale) + i);
723         }
724       }
725 
726       if (SDValue V = SimplifyMultipleUseDemandedBits(
727               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
728         return DAG.getBitcast(DstVT, V);
729     }
730 
731     // TODO - bigendian once we have test coverage.
732     if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
733       unsigned Scale = NumSrcEltBits / NumDstEltBits;
734       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
735       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
736       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
737       for (unsigned i = 0; i != NumElts; ++i)
738         if (DemandedElts[i]) {
739           unsigned Offset = (i % Scale) * NumDstEltBits;
740           DemandedSrcBits.insertBits(DemandedBits, Offset);
741           DemandedSrcElts.setBit(i / Scale);
742         }
743 
744       if (SDValue V = SimplifyMultipleUseDemandedBits(
745               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
746         return DAG.getBitcast(DstVT, V);
747     }
748 
749     break;
750   }
751   case ISD::FREEZE: {
752     SDValue N0 = Op.getOperand(0);
753     if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
754                                              /*PoisonOnly=*/false))
755       return N0;
756     break;
757   }
758   case ISD::AND: {
759     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
760     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
761 
762     // If all of the demanded bits are known 1 on one side, return the other.
763     // These bits cannot contribute to the result of the 'and' in this
764     // context.
765     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
766       return Op.getOperand(0);
767     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
768       return Op.getOperand(1);
769     break;
770   }
771   case ISD::OR: {
772     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
773     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
774 
775     // If all of the demanded bits are known zero on one side, return the
776     // other.  These bits cannot contribute to the result of the 'or' in this
777     // context.
778     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
779       return Op.getOperand(0);
780     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
781       return Op.getOperand(1);
782     break;
783   }
784   case ISD::XOR: {
785     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
786     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
787 
788     // If all of the demanded bits are known zero on one side, return the
789     // other.
790     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
791       return Op.getOperand(0);
792     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
793       return Op.getOperand(1);
794     break;
795   }
796   case ISD::SHL: {
797     // If we are only demanding sign bits then we can use the shift source
798     // directly.
799     if (std::optional<uint64_t> MaxSA =
800             DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
801       SDValue Op0 = Op.getOperand(0);
802       unsigned ShAmt = *MaxSA;
803       unsigned NumSignBits =
804           DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
805       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
806       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
807         return Op0;
808     }
809     break;
810   }
811   case ISD::SETCC: {
812     SDValue Op0 = Op.getOperand(0);
813     SDValue Op1 = Op.getOperand(1);
814     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
815     // If (1) we only need the sign-bit, (2) the setcc operands are the same
816     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
817     // -1, we may be able to bypass the setcc.
818     if (DemandedBits.isSignMask() &&
819         Op0.getScalarValueSizeInBits() == BitWidth &&
820         getBooleanContents(Op0.getValueType()) ==
821             BooleanContent::ZeroOrNegativeOneBooleanContent) {
822       // If we're testing X < 0, then this compare isn't needed - just use X!
823       // FIXME: We're limiting to integer types here, but this should also work
824       // if we don't care about FP signed-zero. The use of SETLT with FP means
825       // that we don't care about NaNs.
826       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
827           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
828         return Op0;
829     }
830     break;
831   }
832   case ISD::SIGN_EXTEND_INREG: {
833     // If none of the extended bits are demanded, eliminate the sextinreg.
834     SDValue Op0 = Op.getOperand(0);
835     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
836     unsigned ExBits = ExVT.getScalarSizeInBits();
837     if (DemandedBits.getActiveBits() <= ExBits &&
838         shouldRemoveRedundantExtend(Op))
839       return Op0;
840     // If the input is already sign extended, just drop the extension.
841     unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
842     if (NumSignBits >= (BitWidth - ExBits + 1))
843       return Op0;
844     break;
845   }
846   case ISD::ANY_EXTEND_VECTOR_INREG:
847   case ISD::SIGN_EXTEND_VECTOR_INREG:
848   case ISD::ZERO_EXTEND_VECTOR_INREG: {
849     if (VT.isScalableVector())
850       return SDValue();
851 
852     // If we only want the lowest element and none of extended bits, then we can
853     // return the bitcasted source vector.
854     SDValue Src = Op.getOperand(0);
855     EVT SrcVT = Src.getValueType();
856     EVT DstVT = Op.getValueType();
857     if (IsLE && DemandedElts == 1 &&
858         DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
859         DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
860       return DAG.getBitcast(DstVT, Src);
861     }
862     break;
863   }
864   case ISD::INSERT_VECTOR_ELT: {
865     if (VT.isScalableVector())
866       return SDValue();
867 
868     // If we don't demand the inserted element, return the base vector.
869     SDValue Vec = Op.getOperand(0);
870     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
871     EVT VecVT = Vec.getValueType();
872     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
873         !DemandedElts[CIdx->getZExtValue()])
874       return Vec;
875     break;
876   }
877   case ISD::INSERT_SUBVECTOR: {
878     if (VT.isScalableVector())
879       return SDValue();
880 
881     SDValue Vec = Op.getOperand(0);
882     SDValue Sub = Op.getOperand(1);
883     uint64_t Idx = Op.getConstantOperandVal(2);
884     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
885     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
886     // If we don't demand the inserted subvector, return the base vector.
887     if (DemandedSubElts == 0)
888       return Vec;
889     break;
890   }
891   case ISD::VECTOR_SHUFFLE: {
892     assert(!VT.isScalableVector());
893     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
894 
895     // If all the demanded elts are from one operand and are inline,
896     // then we can use the operand directly.
897     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
898     for (unsigned i = 0; i != NumElts; ++i) {
899       int M = ShuffleMask[i];
900       if (M < 0 || !DemandedElts[i])
901         continue;
902       AllUndef = false;
903       IdentityLHS &= (M == (int)i);
904       IdentityRHS &= ((M - NumElts) == i);
905     }
906 
907     if (AllUndef)
908       return DAG.getUNDEF(Op.getValueType());
909     if (IdentityLHS)
910       return Op.getOperand(0);
911     if (IdentityRHS)
912       return Op.getOperand(1);
913     break;
914   }
915   default:
916     // TODO: Probably okay to remove after audit; here to reduce change size
917     // in initial enablement patch for scalable vectors
918     if (VT.isScalableVector())
919       return SDValue();
920 
921     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
922       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
923               Op, DemandedBits, DemandedElts, DAG, Depth))
924         return V;
925     break;
926   }
927   return SDValue();
928 }
929 
930 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
931     SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
932     unsigned Depth) const {
933   EVT VT = Op.getValueType();
934   // Since the number of lanes in a scalable vector is unknown at compile time,
935   // we track one bit which is implicitly broadcast to all lanes.  This means
936   // that all lanes in a scalable vector are considered demanded.
937   APInt DemandedElts = VT.isFixedLengthVector()
938                            ? APInt::getAllOnes(VT.getVectorNumElements())
939                            : APInt(1, 1);
940   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
941                                          Depth);
942 }
943 
944 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
945     SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
946     unsigned Depth) const {
947   APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
948   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
949                                          Depth);
950 }
951 
952 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
953 //      or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
954 static SDValue combineShiftToAVG(SDValue Op,
955                                  TargetLowering::TargetLoweringOpt &TLO,
956                                  const TargetLowering &TLI,
957                                  const APInt &DemandedBits,
958                                  const APInt &DemandedElts, unsigned Depth) {
959   assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
960          "SRL or SRA node is required here!");
961   // Is the right shift using an immediate value of 1?
962   ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
963   if (!N1C || !N1C->isOne())
964     return SDValue();
965 
966   // We are looking for an avgfloor
967   // add(ext, ext)
968   // or one of these as a avgceil
969   // add(add(ext, ext), 1)
970   // add(add(ext, 1), ext)
971   // add(ext, add(ext, 1))
972   SDValue Add = Op.getOperand(0);
973   if (Add.getOpcode() != ISD::ADD)
974     return SDValue();
975 
976   SDValue ExtOpA = Add.getOperand(0);
977   SDValue ExtOpB = Add.getOperand(1);
978   SDValue Add2;
979   auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
980     ConstantSDNode *ConstOp;
981     if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
982         ConstOp->isOne()) {
983       ExtOpA = Op1;
984       ExtOpB = Op3;
985       Add2 = A;
986       return true;
987     }
988     if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
989         ConstOp->isOne()) {
990       ExtOpA = Op1;
991       ExtOpB = Op2;
992       Add2 = A;
993       return true;
994     }
995     return false;
996   };
997   bool IsCeil =
998       (ExtOpA.getOpcode() == ISD::ADD &&
999        MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1000       (ExtOpB.getOpcode() == ISD::ADD &&
1001        MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1002 
1003   // If the shift is signed (sra):
1004   //  - Needs >= 2 sign bit for both operands.
1005   //  - Needs >= 2 zero bits.
1006   // If the shift is unsigned (srl):
1007   //  - Needs >= 1 zero bit for both operands.
1008   //  - Needs 1 demanded bit zero and >= 2 sign bits.
1009   SelectionDAG &DAG = TLO.DAG;
1010   unsigned ShiftOpc = Op.getOpcode();
1011   bool IsSigned = false;
1012   unsigned KnownBits;
1013   unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1014   unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1015   unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1016   unsigned NumZeroA =
1017       DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1018   unsigned NumZeroB =
1019       DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1020   unsigned NumZero = std::min(NumZeroA, NumZeroB);
1021 
1022   switch (ShiftOpc) {
1023   default:
1024     llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1025   case ISD::SRA: {
1026     if (NumZero >= 2 && NumSigned < NumZero) {
1027       IsSigned = false;
1028       KnownBits = NumZero;
1029       break;
1030     }
1031     if (NumSigned >= 1) {
1032       IsSigned = true;
1033       KnownBits = NumSigned;
1034       break;
1035     }
1036     return SDValue();
1037   }
1038   case ISD::SRL: {
1039     if (NumZero >= 1 && NumSigned < NumZero) {
1040       IsSigned = false;
1041       KnownBits = NumZero;
1042       break;
1043     }
1044     if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1045       IsSigned = true;
1046       KnownBits = NumSigned;
1047       break;
1048     }
1049     return SDValue();
1050   }
1051   }
1052 
1053   unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1054                            : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1055 
1056   // Find the smallest power-2 type that is legal for this vector size and
1057   // operation, given the original type size and the number of known sign/zero
1058   // bits.
1059   EVT VT = Op.getValueType();
1060   unsigned MinWidth =
1061       std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1062   EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1063   if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1064     return SDValue();
1065   if (VT.isVector())
1066     NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1067   if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1068     // If we could not transform, and (both) adds are nuw/nsw, we can use the
1069     // larger type size to do the transform.
1070     if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1071       return SDValue();
1072     if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1073                                Add.getOperand(1)) &&
1074         (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1075                                          Add2.getOperand(1))))
1076       NVT = VT;
1077     else
1078       return SDValue();
1079   }
1080 
1081   // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1082   // this is likely to stop other folds (reassociation, value tracking etc.)
1083   if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1084       (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1085     return SDValue();
1086 
1087   SDLoc DL(Op);
1088   SDValue ResultAVG =
1089       DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1090                   DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1091   return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1092 }
1093 
1094 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1095 /// result of Op are ever used downstream. If we can use this information to
1096 /// simplify Op, create a new simplified DAG node and return true, returning the
1097 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1098 /// return a mask of Known bits for the expression (used to simplify the
1099 /// caller).  The Known bits may only be accurate for those bits in the
1100 /// OriginalDemandedBits and OriginalDemandedElts.
1101 bool TargetLowering::SimplifyDemandedBits(
1102     SDValue Op, const APInt &OriginalDemandedBits,
1103     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1104     unsigned Depth, bool AssumeSingleUse) const {
1105   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1106   assert(Op.getScalarValueSizeInBits() == BitWidth &&
1107          "Mask size mismatches value type size!");
1108 
1109   // Don't know anything.
1110   Known = KnownBits(BitWidth);
1111 
1112   EVT VT = Op.getValueType();
1113   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1114   unsigned NumElts = OriginalDemandedElts.getBitWidth();
1115   assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1116          "Unexpected vector size");
1117 
1118   APInt DemandedBits = OriginalDemandedBits;
1119   APInt DemandedElts = OriginalDemandedElts;
1120   SDLoc dl(Op);
1121 
1122   // Undef operand.
1123   if (Op.isUndef())
1124     return false;
1125 
1126   // We can't simplify target constants.
1127   if (Op.getOpcode() == ISD::TargetConstant)
1128     return false;
1129 
1130   if (Op.getOpcode() == ISD::Constant) {
1131     // We know all of the bits for a constant!
1132     Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1133     return false;
1134   }
1135 
1136   if (Op.getOpcode() == ISD::ConstantFP) {
1137     // We know all of the bits for a floating point constant!
1138     Known = KnownBits::makeConstant(
1139         cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1140     return false;
1141   }
1142 
1143   // Other users may use these bits.
1144   bool HasMultiUse = false;
1145   if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1146     if (Depth >= SelectionDAG::MaxRecursionDepth) {
1147       // Limit search depth.
1148       return false;
1149     }
1150     // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1151     DemandedBits = APInt::getAllOnes(BitWidth);
1152     DemandedElts = APInt::getAllOnes(NumElts);
1153     HasMultiUse = true;
1154   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1155     // Not demanding any bits/elts from Op.
1156     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1157   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1158     // Limit search depth.
1159     return false;
1160   }
1161 
1162   KnownBits Known2;
1163   switch (Op.getOpcode()) {
1164   case ISD::SCALAR_TO_VECTOR: {
1165     if (VT.isScalableVector())
1166       return false;
1167     if (!DemandedElts[0])
1168       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1169 
1170     KnownBits SrcKnown;
1171     SDValue Src = Op.getOperand(0);
1172     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1173     APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1174     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1175       return true;
1176 
1177     // Upper elements are undef, so only get the knownbits if we just demand
1178     // the bottom element.
1179     if (DemandedElts == 1)
1180       Known = SrcKnown.anyextOrTrunc(BitWidth);
1181     break;
1182   }
1183   case ISD::BUILD_VECTOR:
1184     // Collect the known bits that are shared by every demanded element.
1185     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1186     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1187     return false; // Don't fall through, will infinitely loop.
1188   case ISD::SPLAT_VECTOR: {
1189     SDValue Scl = Op.getOperand(0);
1190     APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1191     KnownBits KnownScl;
1192     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1193       return true;
1194 
1195     // Implicitly truncate the bits to match the official semantics of
1196     // SPLAT_VECTOR.
1197     Known = KnownScl.trunc(BitWidth);
1198     break;
1199   }
1200   case ISD::LOAD: {
1201     auto *LD = cast<LoadSDNode>(Op);
1202     if (getTargetConstantFromLoad(LD)) {
1203       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1204       return false; // Don't fall through, will infinitely loop.
1205     }
1206     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1207       // If this is a ZEXTLoad and we are looking at the loaded value.
1208       EVT MemVT = LD->getMemoryVT();
1209       unsigned MemBits = MemVT.getScalarSizeInBits();
1210       Known.Zero.setBitsFrom(MemBits);
1211       return false; // Don't fall through, will infinitely loop.
1212     }
1213     break;
1214   }
1215   case ISD::INSERT_VECTOR_ELT: {
1216     if (VT.isScalableVector())
1217       return false;
1218     SDValue Vec = Op.getOperand(0);
1219     SDValue Scl = Op.getOperand(1);
1220     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1221     EVT VecVT = Vec.getValueType();
1222 
1223     // If index isn't constant, assume we need all vector elements AND the
1224     // inserted element.
1225     APInt DemandedVecElts(DemandedElts);
1226     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1227       unsigned Idx = CIdx->getZExtValue();
1228       DemandedVecElts.clearBit(Idx);
1229 
1230       // Inserted element is not required.
1231       if (!DemandedElts[Idx])
1232         return TLO.CombineTo(Op, Vec);
1233     }
1234 
1235     KnownBits KnownScl;
1236     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1237     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1238     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1239       return true;
1240 
1241     Known = KnownScl.anyextOrTrunc(BitWidth);
1242 
1243     KnownBits KnownVec;
1244     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1245                              Depth + 1))
1246       return true;
1247 
1248     if (!!DemandedVecElts)
1249       Known = Known.intersectWith(KnownVec);
1250 
1251     return false;
1252   }
1253   case ISD::INSERT_SUBVECTOR: {
1254     if (VT.isScalableVector())
1255       return false;
1256     // Demand any elements from the subvector and the remainder from the src its
1257     // inserted into.
1258     SDValue Src = Op.getOperand(0);
1259     SDValue Sub = Op.getOperand(1);
1260     uint64_t Idx = Op.getConstantOperandVal(2);
1261     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1262     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1263     APInt DemandedSrcElts = DemandedElts;
1264     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1265 
1266     KnownBits KnownSub, KnownSrc;
1267     if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1268                              Depth + 1))
1269       return true;
1270     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1271                              Depth + 1))
1272       return true;
1273 
1274     Known.Zero.setAllBits();
1275     Known.One.setAllBits();
1276     if (!!DemandedSubElts)
1277       Known = Known.intersectWith(KnownSub);
1278     if (!!DemandedSrcElts)
1279       Known = Known.intersectWith(KnownSrc);
1280 
1281     // Attempt to avoid multi-use src if we don't need anything from it.
1282     if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1283         !DemandedSrcElts.isAllOnes()) {
1284       SDValue NewSub = SimplifyMultipleUseDemandedBits(
1285           Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1286       SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1287           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1288       if (NewSub || NewSrc) {
1289         NewSub = NewSub ? NewSub : Sub;
1290         NewSrc = NewSrc ? NewSrc : Src;
1291         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1292                                         Op.getOperand(2));
1293         return TLO.CombineTo(Op, NewOp);
1294       }
1295     }
1296     break;
1297   }
1298   case ISD::EXTRACT_SUBVECTOR: {
1299     if (VT.isScalableVector())
1300       return false;
1301     // Offset the demanded elts by the subvector index.
1302     SDValue Src = Op.getOperand(0);
1303     if (Src.getValueType().isScalableVector())
1304       break;
1305     uint64_t Idx = Op.getConstantOperandVal(1);
1306     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1307     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1308 
1309     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1310                              Depth + 1))
1311       return true;
1312 
1313     // Attempt to avoid multi-use src if we don't need anything from it.
1314     if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1315       SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1316           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1317       if (DemandedSrc) {
1318         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1319                                         Op.getOperand(1));
1320         return TLO.CombineTo(Op, NewOp);
1321       }
1322     }
1323     break;
1324   }
1325   case ISD::CONCAT_VECTORS: {
1326     if (VT.isScalableVector())
1327       return false;
1328     Known.Zero.setAllBits();
1329     Known.One.setAllBits();
1330     EVT SubVT = Op.getOperand(0).getValueType();
1331     unsigned NumSubVecs = Op.getNumOperands();
1332     unsigned NumSubElts = SubVT.getVectorNumElements();
1333     for (unsigned i = 0; i != NumSubVecs; ++i) {
1334       APInt DemandedSubElts =
1335           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1336       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1337                                Known2, TLO, Depth + 1))
1338         return true;
1339       // Known bits are shared by every demanded subvector element.
1340       if (!!DemandedSubElts)
1341         Known = Known.intersectWith(Known2);
1342     }
1343     break;
1344   }
1345   case ISD::VECTOR_SHUFFLE: {
1346     assert(!VT.isScalableVector());
1347     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1348 
1349     // Collect demanded elements from shuffle operands..
1350     APInt DemandedLHS, DemandedRHS;
1351     if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1352                                 DemandedRHS))
1353       break;
1354 
1355     if (!!DemandedLHS || !!DemandedRHS) {
1356       SDValue Op0 = Op.getOperand(0);
1357       SDValue Op1 = Op.getOperand(1);
1358 
1359       Known.Zero.setAllBits();
1360       Known.One.setAllBits();
1361       if (!!DemandedLHS) {
1362         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1363                                  Depth + 1))
1364           return true;
1365         Known = Known.intersectWith(Known2);
1366       }
1367       if (!!DemandedRHS) {
1368         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1369                                  Depth + 1))
1370           return true;
1371         Known = Known.intersectWith(Known2);
1372       }
1373 
1374       // Attempt to avoid multi-use ops if we don't need anything from them.
1375       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1376           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1377       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1378           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1379       if (DemandedOp0 || DemandedOp1) {
1380         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1381         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1382         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1383         return TLO.CombineTo(Op, NewOp);
1384       }
1385     }
1386     break;
1387   }
1388   case ISD::AND: {
1389     SDValue Op0 = Op.getOperand(0);
1390     SDValue Op1 = Op.getOperand(1);
1391 
1392     // If the RHS is a constant, check to see if the LHS would be zero without
1393     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1394     // simplify the LHS, here we're using information from the LHS to simplify
1395     // the RHS.
1396     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1397       // Do not increment Depth here; that can cause an infinite loop.
1398       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1399       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1400       if ((LHSKnown.Zero & DemandedBits) ==
1401           (~RHSC->getAPIntValue() & DemandedBits))
1402         return TLO.CombineTo(Op, Op0);
1403 
1404       // If any of the set bits in the RHS are known zero on the LHS, shrink
1405       // the constant.
1406       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1407                                  DemandedElts, TLO))
1408         return true;
1409 
1410       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1411       // constant, but if this 'and' is only clearing bits that were just set by
1412       // the xor, then this 'and' can be eliminated by shrinking the mask of
1413       // the xor. For example, for a 32-bit X:
1414       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1415       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1416           LHSKnown.One == ~RHSC->getAPIntValue()) {
1417         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1418         return TLO.CombineTo(Op, Xor);
1419       }
1420     }
1421 
1422     // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1423     // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1424     if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1425         (Op0.getOperand(0).isUndef() ||
1426          ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
1427         Op0->hasOneUse()) {
1428       unsigned NumSubElts =
1429           Op0.getOperand(1).getValueType().getVectorNumElements();
1430       unsigned SubIdx = Op0.getConstantOperandVal(2);
1431       APInt DemandedSub =
1432           APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1433       KnownBits KnownSubMask =
1434           TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1435       if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1436         SDValue NewAnd =
1437             TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1438         SDValue NewInsert =
1439             TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1440                             Op0.getOperand(1), Op0.getOperand(2));
1441         return TLO.CombineTo(Op, NewInsert);
1442       }
1443     }
1444 
1445     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1446                              Depth + 1))
1447       return true;
1448     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1449                              Known2, TLO, Depth + 1))
1450       return true;
1451 
1452     // If all of the demanded bits are known one on one side, return the other.
1453     // These bits cannot contribute to the result of the 'and'.
1454     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1455       return TLO.CombineTo(Op, Op0);
1456     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1457       return TLO.CombineTo(Op, Op1);
1458     // If all of the demanded bits in the inputs are known zeros, return zero.
1459     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1460       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1461     // If the RHS is a constant, see if we can simplify it.
1462     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1463                                TLO))
1464       return true;
1465     // If the operation can be done in a smaller type, do so.
1466     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1467       return true;
1468 
1469     // Attempt to avoid multi-use ops if we don't need anything from them.
1470     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1471       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1472           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1473       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1474           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1475       if (DemandedOp0 || DemandedOp1) {
1476         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1477         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1478         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1479         return TLO.CombineTo(Op, NewOp);
1480       }
1481     }
1482 
1483     Known &= Known2;
1484     break;
1485   }
1486   case ISD::OR: {
1487     SDValue Op0 = Op.getOperand(0);
1488     SDValue Op1 = Op.getOperand(1);
1489     SDNodeFlags Flags = Op.getNode()->getFlags();
1490     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1491                              Depth + 1)) {
1492       if (Flags.hasDisjoint()) {
1493         Flags.setDisjoint(false);
1494         Op->setFlags(Flags);
1495       }
1496       return true;
1497     }
1498 
1499     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1500                              Known2, TLO, Depth + 1)) {
1501       if (Flags.hasDisjoint()) {
1502         Flags.setDisjoint(false);
1503         Op->setFlags(Flags);
1504       }
1505       return true;
1506     }
1507 
1508     // If all of the demanded bits are known zero on one side, return the other.
1509     // These bits cannot contribute to the result of the 'or'.
1510     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1511       return TLO.CombineTo(Op, Op0);
1512     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1513       return TLO.CombineTo(Op, Op1);
1514     // If the RHS is a constant, see if we can simplify it.
1515     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1516       return true;
1517     // If the operation can be done in a smaller type, do so.
1518     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1519       return true;
1520 
1521     // Attempt to avoid multi-use ops if we don't need anything from them.
1522     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1523       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1524           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1525       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1526           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1527       if (DemandedOp0 || DemandedOp1) {
1528         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1529         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1530         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1531         return TLO.CombineTo(Op, NewOp);
1532       }
1533     }
1534 
1535     // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1536     // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1537     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1538         Op0->hasOneUse() && Op1->hasOneUse()) {
1539       // Attempt to match all commutations - m_c_Or would've been useful!
1540       for (int I = 0; I != 2; ++I) {
1541         SDValue X = Op.getOperand(I).getOperand(0);
1542         SDValue C1 = Op.getOperand(I).getOperand(1);
1543         SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1544         SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1545         if (Alt.getOpcode() == ISD::OR) {
1546           for (int J = 0; J != 2; ++J) {
1547             if (X == Alt.getOperand(J)) {
1548               SDValue Y = Alt.getOperand(1 - J);
1549               if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1550                                                                {C1, C2})) {
1551                 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1552                 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1553                 return TLO.CombineTo(
1554                     Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1555               }
1556             }
1557           }
1558         }
1559       }
1560     }
1561 
1562     Known |= Known2;
1563     break;
1564   }
1565   case ISD::XOR: {
1566     SDValue Op0 = Op.getOperand(0);
1567     SDValue Op1 = Op.getOperand(1);
1568 
1569     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1570                              Depth + 1))
1571       return true;
1572     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1573                              Depth + 1))
1574       return true;
1575 
1576     // If all of the demanded bits are known zero on one side, return the other.
1577     // These bits cannot contribute to the result of the 'xor'.
1578     if (DemandedBits.isSubsetOf(Known.Zero))
1579       return TLO.CombineTo(Op, Op0);
1580     if (DemandedBits.isSubsetOf(Known2.Zero))
1581       return TLO.CombineTo(Op, Op1);
1582     // If the operation can be done in a smaller type, do so.
1583     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1584       return true;
1585 
1586     // If all of the unknown bits are known to be zero on one side or the other
1587     // turn this into an *inclusive* or.
1588     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1589     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1590       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1591 
1592     ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1593     if (C) {
1594       // If one side is a constant, and all of the set bits in the constant are
1595       // also known set on the other side, turn this into an AND, as we know
1596       // the bits will be cleared.
1597       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1598       // NB: it is okay if more bits are known than are requested
1599       if (C->getAPIntValue() == Known2.One) {
1600         SDValue ANDC =
1601             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1602         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1603       }
1604 
1605       // If the RHS is a constant, see if we can change it. Don't alter a -1
1606       // constant because that's a 'not' op, and that is better for combining
1607       // and codegen.
1608       if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1609         // We're flipping all demanded bits. Flip the undemanded bits too.
1610         SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1611         return TLO.CombineTo(Op, New);
1612       }
1613 
1614       unsigned Op0Opcode = Op0.getOpcode();
1615       if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1616         if (ConstantSDNode *ShiftC =
1617                 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1618           // Don't crash on an oversized shift. We can not guarantee that a
1619           // bogus shift has been simplified to undef.
1620           if (ShiftC->getAPIntValue().ult(BitWidth)) {
1621             uint64_t ShiftAmt = ShiftC->getZExtValue();
1622             APInt Ones = APInt::getAllOnes(BitWidth);
1623             Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1624                                          : Ones.lshr(ShiftAmt);
1625             if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1626                 isDesirableToCommuteXorWithShift(Op.getNode())) {
1627               // If the xor constant is a demanded mask, do a 'not' before the
1628               // shift:
1629               // xor (X << ShiftC), XorC --> (not X) << ShiftC
1630               // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1631               SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1632               return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1633                                                        Op0.getOperand(1)));
1634             }
1635           }
1636         }
1637       }
1638     }
1639 
1640     // If we can't turn this into a 'not', try to shrink the constant.
1641     if (!C || !C->isAllOnes())
1642       if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1643         return true;
1644 
1645     // Attempt to avoid multi-use ops if we don't need anything from them.
1646     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1647       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1648           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1649       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1650           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1651       if (DemandedOp0 || DemandedOp1) {
1652         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1653         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1654         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1655         return TLO.CombineTo(Op, NewOp);
1656       }
1657     }
1658 
1659     Known ^= Known2;
1660     break;
1661   }
1662   case ISD::SELECT:
1663     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1664                              Known, TLO, Depth + 1))
1665       return true;
1666     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1667                              Known2, TLO, Depth + 1))
1668       return true;
1669 
1670     // If the operands are constants, see if we can simplify them.
1671     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1672       return true;
1673 
1674     // Only known if known in both the LHS and RHS.
1675     Known = Known.intersectWith(Known2);
1676     break;
1677   case ISD::VSELECT:
1678     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1679                              Known, TLO, Depth + 1))
1680       return true;
1681     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1682                              Known2, TLO, Depth + 1))
1683       return true;
1684 
1685     // Only known if known in both the LHS and RHS.
1686     Known = Known.intersectWith(Known2);
1687     break;
1688   case ISD::SELECT_CC:
1689     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1690                              Known, TLO, Depth + 1))
1691       return true;
1692     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1693                              Known2, TLO, Depth + 1))
1694       return true;
1695 
1696     // If the operands are constants, see if we can simplify them.
1697     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1698       return true;
1699 
1700     // Only known if known in both the LHS and RHS.
1701     Known = Known.intersectWith(Known2);
1702     break;
1703   case ISD::SETCC: {
1704     SDValue Op0 = Op.getOperand(0);
1705     SDValue Op1 = Op.getOperand(1);
1706     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1707     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1708     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1709     // -1, we may be able to bypass the setcc.
1710     if (DemandedBits.isSignMask() &&
1711         Op0.getScalarValueSizeInBits() == BitWidth &&
1712         getBooleanContents(Op0.getValueType()) ==
1713             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1714       // If we're testing X < 0, then this compare isn't needed - just use X!
1715       // FIXME: We're limiting to integer types here, but this should also work
1716       // if we don't care about FP signed-zero. The use of SETLT with FP means
1717       // that we don't care about NaNs.
1718       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1719           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1720         return TLO.CombineTo(Op, Op0);
1721 
1722       // TODO: Should we check for other forms of sign-bit comparisons?
1723       // Examples: X <= -1, X >= 0
1724     }
1725     if (getBooleanContents(Op0.getValueType()) ==
1726             TargetLowering::ZeroOrOneBooleanContent &&
1727         BitWidth > 1)
1728       Known.Zero.setBitsFrom(1);
1729     break;
1730   }
1731   case ISD::SHL: {
1732     SDValue Op0 = Op.getOperand(0);
1733     SDValue Op1 = Op.getOperand(1);
1734     EVT ShiftVT = Op1.getValueType();
1735 
1736     if (std::optional<uint64_t> KnownSA =
1737             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1738       unsigned ShAmt = *KnownSA;
1739       if (ShAmt == 0)
1740         return TLO.CombineTo(Op, Op0);
1741 
1742       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1743       // single shift.  We can do this if the bottom bits (which are shifted
1744       // out) are never demanded.
1745       // TODO - support non-uniform vector amounts.
1746       if (Op0.getOpcode() == ISD::SRL) {
1747         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1748           if (std::optional<uint64_t> InnerSA =
1749                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1750             unsigned C1 = *InnerSA;
1751             unsigned Opc = ISD::SHL;
1752             int Diff = ShAmt - C1;
1753             if (Diff < 0) {
1754               Diff = -Diff;
1755               Opc = ISD::SRL;
1756             }
1757             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1758             return TLO.CombineTo(
1759                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1760           }
1761         }
1762       }
1763 
1764       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1765       // are not demanded. This will likely allow the anyext to be folded away.
1766       // TODO - support non-uniform vector amounts.
1767       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1768         SDValue InnerOp = Op0.getOperand(0);
1769         EVT InnerVT = InnerOp.getValueType();
1770         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1771         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1772             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1773           SDValue NarrowShl = TLO.DAG.getNode(
1774               ISD::SHL, dl, InnerVT, InnerOp,
1775               TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1776           return TLO.CombineTo(
1777               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1778         }
1779 
1780         // Repeat the SHL optimization above in cases where an extension
1781         // intervenes: (shl (anyext (shr x, c1)), c2) to
1782         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1783         // aren't demanded (as above) and that the shifted upper c1 bits of
1784         // x aren't demanded.
1785         // TODO - support non-uniform vector amounts.
1786         if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1787             InnerOp.hasOneUse()) {
1788           if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1789                   InnerOp, DemandedElts, Depth + 2)) {
1790             unsigned InnerShAmt = *SA2;
1791             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1792                 DemandedBits.getActiveBits() <=
1793                     (InnerBits - InnerShAmt + ShAmt) &&
1794                 DemandedBits.countr_zero() >= ShAmt) {
1795               SDValue NewSA =
1796                   TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1797               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1798                                                InnerOp.getOperand(0));
1799               return TLO.CombineTo(
1800                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1801             }
1802           }
1803         }
1804       }
1805 
1806       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1807       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1808                                Depth + 1)) {
1809         SDNodeFlags Flags = Op.getNode()->getFlags();
1810         if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1811           // Disable the nsw and nuw flags. We can no longer guarantee that we
1812           // won't wrap after simplification.
1813           Flags.setNoSignedWrap(false);
1814           Flags.setNoUnsignedWrap(false);
1815           Op->setFlags(Flags);
1816         }
1817         return true;
1818       }
1819       Known.Zero <<= ShAmt;
1820       Known.One <<= ShAmt;
1821       // low bits known zero.
1822       Known.Zero.setLowBits(ShAmt);
1823 
1824       // Attempt to avoid multi-use ops if we don't need anything from them.
1825       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1826         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1827             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1828         if (DemandedOp0) {
1829           SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1830           return TLO.CombineTo(Op, NewOp);
1831         }
1832       }
1833 
1834       // TODO: Can we merge this fold with the one below?
1835       // Try shrinking the operation as long as the shift amount will still be
1836       // in range.
1837       if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1838           Op.getNode()->hasOneUse()) {
1839         // Search for the smallest integer type with free casts to and from
1840         // Op's type. For expedience, just check power-of-2 integer types.
1841         unsigned DemandedSize = DemandedBits.getActiveBits();
1842         for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1843              SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1844           EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1845           if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1846               isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1847               isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1848               (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1849             assert(DemandedSize <= SmallVTBits &&
1850                    "Narrowed below demanded bits?");
1851             // We found a type with free casts.
1852             SDValue NarrowShl = TLO.DAG.getNode(
1853                 ISD::SHL, dl, SmallVT,
1854                 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1855                 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1856             return TLO.CombineTo(
1857                 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1858           }
1859         }
1860       }
1861 
1862       // Narrow shift to lower half - similar to ShrinkDemandedOp.
1863       // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1864       // Only do this if we demand the upper half so the knownbits are correct.
1865       unsigned HalfWidth = BitWidth / 2;
1866       if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1867           DemandedBits.countLeadingOnes() >= HalfWidth) {
1868         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1869         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1870             isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1871             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1872             (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1873           // If we're demanding the upper bits at all, we must ensure
1874           // that the upper bits of the shift result are known to be zero,
1875           // which is equivalent to the narrow shift being NUW.
1876           if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1877             bool IsNSW = Known.countMinSignBits() > HalfWidth;
1878             SDNodeFlags Flags;
1879             Flags.setNoSignedWrap(IsNSW);
1880             Flags.setNoUnsignedWrap(IsNUW);
1881             SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1882             SDValue NewShiftAmt =
1883                 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1884             SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1885                                                NewShiftAmt, Flags);
1886             SDValue NewExt =
1887                 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1888             return TLO.CombineTo(Op, NewExt);
1889           }
1890         }
1891       }
1892     } else {
1893       // This is a variable shift, so we can't shift the demand mask by a known
1894       // amount. But if we are not demanding high bits, then we are not
1895       // demanding those bits from the pre-shifted operand either.
1896       if (unsigned CTLZ = DemandedBits.countl_zero()) {
1897         APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1898         if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1899                                  Depth + 1)) {
1900           SDNodeFlags Flags = Op.getNode()->getFlags();
1901           if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1902             // Disable the nsw and nuw flags. We can no longer guarantee that we
1903             // won't wrap after simplification.
1904             Flags.setNoSignedWrap(false);
1905             Flags.setNoUnsignedWrap(false);
1906             Op->setFlags(Flags);
1907           }
1908           return true;
1909         }
1910         Known.resetAll();
1911       }
1912     }
1913 
1914     // If we are only demanding sign bits then we can use the shift source
1915     // directly.
1916     if (std::optional<uint64_t> MaxSA =
1917             TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1918       unsigned ShAmt = *MaxSA;
1919       unsigned NumSignBits =
1920           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1921       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1922       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1923         return TLO.CombineTo(Op, Op0);
1924     }
1925     break;
1926   }
1927   case ISD::SRL: {
1928     SDValue Op0 = Op.getOperand(0);
1929     SDValue Op1 = Op.getOperand(1);
1930     EVT ShiftVT = Op1.getValueType();
1931 
1932     if (std::optional<uint64_t> KnownSA =
1933             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1934       unsigned ShAmt = *KnownSA;
1935       if (ShAmt == 0)
1936         return TLO.CombineTo(Op, Op0);
1937 
1938       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1939       // single shift.  We can do this if the top bits (which are shifted out)
1940       // are never demanded.
1941       // TODO - support non-uniform vector amounts.
1942       if (Op0.getOpcode() == ISD::SHL) {
1943         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1944           if (std::optional<uint64_t> InnerSA =
1945                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1946             unsigned C1 = *InnerSA;
1947             unsigned Opc = ISD::SRL;
1948             int Diff = ShAmt - C1;
1949             if (Diff < 0) {
1950               Diff = -Diff;
1951               Opc = ISD::SHL;
1952             }
1953             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1954             return TLO.CombineTo(
1955                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1956           }
1957         }
1958       }
1959 
1960       // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1961       // single sra. We can do this if the top bits are never demanded.
1962       if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1963         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1964           if (std::optional<uint64_t> InnerSA =
1965                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1966             unsigned C1 = *InnerSA;
1967             // Clamp the combined shift amount if it exceeds the bit width.
1968             unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
1969             SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
1970             return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
1971                                                      Op0.getOperand(0), NewSA));
1972           }
1973         }
1974       }
1975 
1976       APInt InDemandedMask = (DemandedBits << ShAmt);
1977 
1978       // If the shift is exact, then it does demand the low bits (and knows that
1979       // they are zero).
1980       if (Op->getFlags().hasExact())
1981         InDemandedMask.setLowBits(ShAmt);
1982 
1983       // Narrow shift to lower half - similar to ShrinkDemandedOp.
1984       // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1985       if ((BitWidth % 2) == 0 && !VT.isVector()) {
1986         APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2);
1987         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1988         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1989             isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1990             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1991             (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1992             ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1993              TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1994           SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1995           SDValue NewShiftAmt =
1996               TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1997           SDValue NewShift =
1998               TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1999           return TLO.CombineTo(
2000               Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2001         }
2002       }
2003 
2004       // Compute the new bits that are at the top now.
2005       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2006                                Depth + 1))
2007         return true;
2008       Known.Zero.lshrInPlace(ShAmt);
2009       Known.One.lshrInPlace(ShAmt);
2010       // High bits known zero.
2011       Known.Zero.setHighBits(ShAmt);
2012 
2013       // Attempt to avoid multi-use ops if we don't need anything from them.
2014       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2015         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2016             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2017         if (DemandedOp0) {
2018           SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2019           return TLO.CombineTo(Op, NewOp);
2020         }
2021       }
2022     } else {
2023       // Use generic knownbits computation as it has support for non-uniform
2024       // shift amounts.
2025       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2026     }
2027 
2028     // Try to match AVG patterns (after shift simplification).
2029     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2030                                         DemandedElts, Depth + 1))
2031       return TLO.CombineTo(Op, AVG);
2032 
2033     break;
2034   }
2035   case ISD::SRA: {
2036     SDValue Op0 = Op.getOperand(0);
2037     SDValue Op1 = Op.getOperand(1);
2038     EVT ShiftVT = Op1.getValueType();
2039 
2040     // If we only want bits that already match the signbit then we don't need
2041     // to shift.
2042     unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2043     if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2044         NumHiDemandedBits)
2045       return TLO.CombineTo(Op, Op0);
2046 
2047     // If this is an arithmetic shift right and only the low-bit is set, we can
2048     // always convert this into a logical shr, even if the shift amount is
2049     // variable.  The low bit of the shift cannot be an input sign bit unless
2050     // the shift amount is >= the size of the datatype, which is undefined.
2051     if (DemandedBits.isOne())
2052       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2053 
2054     if (std::optional<uint64_t> KnownSA =
2055             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2056       unsigned ShAmt = *KnownSA;
2057       if (ShAmt == 0)
2058         return TLO.CombineTo(Op, Op0);
2059 
2060       // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2061       // supports sext_inreg.
2062       if (Op0.getOpcode() == ISD::SHL) {
2063         if (std::optional<uint64_t> InnerSA =
2064                 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2065           unsigned LowBits = BitWidth - ShAmt;
2066           EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2067           if (VT.isVector())
2068             ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2069                                      VT.getVectorElementCount());
2070 
2071           if (*InnerSA == ShAmt) {
2072             if (!TLO.LegalOperations() ||
2073                 getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
2074               return TLO.CombineTo(
2075                   Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2076                                       Op0.getOperand(0),
2077                                       TLO.DAG.getValueType(ExtVT)));
2078 
2079             // Even if we can't convert to sext_inreg, we might be able to
2080             // remove this shift pair if the input is already sign extended.
2081             unsigned NumSignBits =
2082                 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2083             if (NumSignBits > ShAmt)
2084               return TLO.CombineTo(Op, Op0.getOperand(0));
2085           }
2086         }
2087       }
2088 
2089       APInt InDemandedMask = (DemandedBits << ShAmt);
2090 
2091       // If the shift is exact, then it does demand the low bits (and knows that
2092       // they are zero).
2093       if (Op->getFlags().hasExact())
2094         InDemandedMask.setLowBits(ShAmt);
2095 
2096       // If any of the demanded bits are produced by the sign extension, we also
2097       // demand the input sign bit.
2098       if (DemandedBits.countl_zero() < ShAmt)
2099         InDemandedMask.setSignBit();
2100 
2101       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2102                                Depth + 1))
2103         return true;
2104       Known.Zero.lshrInPlace(ShAmt);
2105       Known.One.lshrInPlace(ShAmt);
2106 
2107       // If the input sign bit is known to be zero, or if none of the top bits
2108       // are demanded, turn this into an unsigned shift right.
2109       if (Known.Zero[BitWidth - ShAmt - 1] ||
2110           DemandedBits.countl_zero() >= ShAmt) {
2111         SDNodeFlags Flags;
2112         Flags.setExact(Op->getFlags().hasExact());
2113         return TLO.CombineTo(
2114             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2115       }
2116 
2117       int Log2 = DemandedBits.exactLogBase2();
2118       if (Log2 >= 0) {
2119         // The bit must come from the sign.
2120         SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2121         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2122       }
2123 
2124       if (Known.One[BitWidth - ShAmt - 1])
2125         // New bits are known one.
2126         Known.One.setHighBits(ShAmt);
2127 
2128       // Attempt to avoid multi-use ops if we don't need anything from them.
2129       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2130         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2131             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2132         if (DemandedOp0) {
2133           SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2134           return TLO.CombineTo(Op, NewOp);
2135         }
2136       }
2137     }
2138 
2139     // Try to match AVG patterns (after shift simplification).
2140     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2141                                         DemandedElts, Depth + 1))
2142       return TLO.CombineTo(Op, AVG);
2143 
2144     break;
2145   }
2146   case ISD::FSHL:
2147   case ISD::FSHR: {
2148     SDValue Op0 = Op.getOperand(0);
2149     SDValue Op1 = Op.getOperand(1);
2150     SDValue Op2 = Op.getOperand(2);
2151     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2152 
2153     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2154       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2155 
2156       // For fshl, 0-shift returns the 1st arg.
2157       // For fshr, 0-shift returns the 2nd arg.
2158       if (Amt == 0) {
2159         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2160                                  Known, TLO, Depth + 1))
2161           return true;
2162         break;
2163       }
2164 
2165       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2166       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2167       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2168       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2169       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2170                                Depth + 1))
2171         return true;
2172       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2173                                Depth + 1))
2174         return true;
2175 
2176       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2177       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2178       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2179       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2180       Known = Known.unionWith(Known2);
2181 
2182       // Attempt to avoid multi-use ops if we don't need anything from them.
2183       if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2184           !DemandedElts.isAllOnes()) {
2185         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2186             Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2187         SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2188             Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2189         if (DemandedOp0 || DemandedOp1) {
2190           DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2191           DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2192           SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2193                                           DemandedOp1, Op2);
2194           return TLO.CombineTo(Op, NewOp);
2195         }
2196       }
2197     }
2198 
2199     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2200     if (isPowerOf2_32(BitWidth)) {
2201       APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2202       if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2203                                Known2, TLO, Depth + 1))
2204         return true;
2205     }
2206     break;
2207   }
2208   case ISD::ROTL:
2209   case ISD::ROTR: {
2210     SDValue Op0 = Op.getOperand(0);
2211     SDValue Op1 = Op.getOperand(1);
2212     bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2213 
2214     // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2215     if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2216       return TLO.CombineTo(Op, Op0);
2217 
2218     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2219       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2220       unsigned RevAmt = BitWidth - Amt;
2221 
2222       // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2223       // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2224       APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2225       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2226                                Depth + 1))
2227         return true;
2228 
2229       // rot*(x, 0) --> x
2230       if (Amt == 0)
2231         return TLO.CombineTo(Op, Op0);
2232 
2233       // See if we don't demand either half of the rotated bits.
2234       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2235           DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2236         Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2237         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2238       }
2239       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2240           DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2241         Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2242         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2243       }
2244     }
2245 
2246     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2247     if (isPowerOf2_32(BitWidth)) {
2248       APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2249       if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2250                                Depth + 1))
2251         return true;
2252     }
2253     break;
2254   }
2255   case ISD::SMIN:
2256   case ISD::SMAX:
2257   case ISD::UMIN:
2258   case ISD::UMAX: {
2259     unsigned Opc = Op.getOpcode();
2260     SDValue Op0 = Op.getOperand(0);
2261     SDValue Op1 = Op.getOperand(1);
2262 
2263     // If we're only demanding signbits, then we can simplify to OR/AND node.
2264     unsigned BitOp =
2265         (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2266     unsigned NumSignBits =
2267         std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2268                  TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2269     unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2270     if (NumSignBits >= NumDemandedUpperBits)
2271       return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2272 
2273     // Check if one arg is always less/greater than (or equal) to the other arg.
2274     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2275     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2276     switch (Opc) {
2277     case ISD::SMIN:
2278       if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2279         return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2280       if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2281         return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2282       Known = KnownBits::smin(Known0, Known1);
2283       break;
2284     case ISD::SMAX:
2285       if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2286         return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2287       if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2288         return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2289       Known = KnownBits::smax(Known0, Known1);
2290       break;
2291     case ISD::UMIN:
2292       if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2293         return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2294       if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2295         return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2296       Known = KnownBits::umin(Known0, Known1);
2297       break;
2298     case ISD::UMAX:
2299       if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2300         return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2301       if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2302         return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2303       Known = KnownBits::umax(Known0, Known1);
2304       break;
2305     }
2306     break;
2307   }
2308   case ISD::BITREVERSE: {
2309     SDValue Src = Op.getOperand(0);
2310     APInt DemandedSrcBits = DemandedBits.reverseBits();
2311     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2312                              Depth + 1))
2313       return true;
2314     Known.One = Known2.One.reverseBits();
2315     Known.Zero = Known2.Zero.reverseBits();
2316     break;
2317   }
2318   case ISD::BSWAP: {
2319     SDValue Src = Op.getOperand(0);
2320 
2321     // If the only bits demanded come from one byte of the bswap result,
2322     // just shift the input byte into position to eliminate the bswap.
2323     unsigned NLZ = DemandedBits.countl_zero();
2324     unsigned NTZ = DemandedBits.countr_zero();
2325 
2326     // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
2327     // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
2328     // have 14 leading zeros, round to 8.
2329     NLZ = alignDown(NLZ, 8);
2330     NTZ = alignDown(NTZ, 8);
2331     // If we need exactly one byte, we can do this transformation.
2332     if (BitWidth - NLZ - NTZ == 8) {
2333       // Replace this with either a left or right shift to get the byte into
2334       // the right place.
2335       unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2336       if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2337         unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2338         SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2339         SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2340         return TLO.CombineTo(Op, NewOp);
2341       }
2342     }
2343 
2344     APInt DemandedSrcBits = DemandedBits.byteSwap();
2345     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2346                              Depth + 1))
2347       return true;
2348     Known.One = Known2.One.byteSwap();
2349     Known.Zero = Known2.Zero.byteSwap();
2350     break;
2351   }
2352   case ISD::CTPOP: {
2353     // If only 1 bit is demanded, replace with PARITY as long as we're before
2354     // op legalization.
2355     // FIXME: Limit to scalars for now.
2356     if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2357       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2358                                                Op.getOperand(0)));
2359 
2360     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2361     break;
2362   }
2363   case ISD::SIGN_EXTEND_INREG: {
2364     SDValue Op0 = Op.getOperand(0);
2365     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2366     unsigned ExVTBits = ExVT.getScalarSizeInBits();
2367 
2368     // If we only care about the highest bit, don't bother shifting right.
2369     if (DemandedBits.isSignMask()) {
2370       unsigned MinSignedBits =
2371           TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2372       bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2373       // However if the input is already sign extended we expect the sign
2374       // extension to be dropped altogether later and do not simplify.
2375       if (!AlreadySignExtended) {
2376         // Compute the correct shift amount type, which must be getShiftAmountTy
2377         // for scalar types after legalization.
2378         SDValue ShiftAmt =
2379             TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2380         return TLO.CombineTo(Op,
2381                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2382       }
2383     }
2384 
2385     // If none of the extended bits are demanded, eliminate the sextinreg.
2386     if (DemandedBits.getActiveBits() <= ExVTBits)
2387       return TLO.CombineTo(Op, Op0);
2388 
2389     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2390 
2391     // Since the sign extended bits are demanded, we know that the sign
2392     // bit is demanded.
2393     InputDemandedBits.setBit(ExVTBits - 1);
2394 
2395     if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2396                              Depth + 1))
2397       return true;
2398 
2399     // If the sign bit of the input is known set or clear, then we know the
2400     // top bits of the result.
2401 
2402     // If the input sign bit is known zero, convert this into a zero extension.
2403     if (Known.Zero[ExVTBits - 1])
2404       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2405 
2406     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2407     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2408       Known.One.setBitsFrom(ExVTBits);
2409       Known.Zero &= Mask;
2410     } else { // Input sign bit unknown
2411       Known.Zero &= Mask;
2412       Known.One &= Mask;
2413     }
2414     break;
2415   }
2416   case ISD::BUILD_PAIR: {
2417     EVT HalfVT = Op.getOperand(0).getValueType();
2418     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2419 
2420     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2421     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2422 
2423     KnownBits KnownLo, KnownHi;
2424 
2425     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2426       return true;
2427 
2428     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2429       return true;
2430 
2431     Known = KnownHi.concat(KnownLo);
2432     break;
2433   }
2434   case ISD::ZERO_EXTEND_VECTOR_INREG:
2435     if (VT.isScalableVector())
2436       return false;
2437     [[fallthrough]];
2438   case ISD::ZERO_EXTEND: {
2439     SDValue Src = Op.getOperand(0);
2440     EVT SrcVT = Src.getValueType();
2441     unsigned InBits = SrcVT.getScalarSizeInBits();
2442     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2443     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2444 
2445     // If none of the top bits are demanded, convert this into an any_extend.
2446     if (DemandedBits.getActiveBits() <= InBits) {
2447       // If we only need the non-extended bits of the bottom element
2448       // then we can just bitcast to the result.
2449       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2450           VT.getSizeInBits() == SrcVT.getSizeInBits())
2451         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2452 
2453       unsigned Opc =
2454           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2455       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2456         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2457     }
2458 
2459     SDNodeFlags Flags = Op->getFlags();
2460     APInt InDemandedBits = DemandedBits.trunc(InBits);
2461     APInt InDemandedElts = DemandedElts.zext(InElts);
2462     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2463                              Depth + 1)) {
2464       if (Flags.hasNonNeg()) {
2465         Flags.setNonNeg(false);
2466         Op->setFlags(Flags);
2467       }
2468       return true;
2469     }
2470     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2471     Known = Known.zext(BitWidth);
2472 
2473     // Attempt to avoid multi-use ops if we don't need anything from them.
2474     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2475             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2476       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2477     break;
2478   }
2479   case ISD::SIGN_EXTEND_VECTOR_INREG:
2480     if (VT.isScalableVector())
2481       return false;
2482     [[fallthrough]];
2483   case ISD::SIGN_EXTEND: {
2484     SDValue Src = Op.getOperand(0);
2485     EVT SrcVT = Src.getValueType();
2486     unsigned InBits = SrcVT.getScalarSizeInBits();
2487     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2488     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2489 
2490     APInt InDemandedElts = DemandedElts.zext(InElts);
2491     APInt InDemandedBits = DemandedBits.trunc(InBits);
2492 
2493     // Since some of the sign extended bits are demanded, we know that the sign
2494     // bit is demanded.
2495     InDemandedBits.setBit(InBits - 1);
2496 
2497     // If none of the top bits are demanded, convert this into an any_extend.
2498     if (DemandedBits.getActiveBits() <= InBits) {
2499       // If we only need the non-extended bits of the bottom element
2500       // then we can just bitcast to the result.
2501       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2502           VT.getSizeInBits() == SrcVT.getSizeInBits())
2503         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2504 
2505       // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2506       if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
2507           TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2508               InBits) {
2509         unsigned Opc =
2510             IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2511         if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2512           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2513       }
2514     }
2515 
2516     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2517                              Depth + 1))
2518       return true;
2519     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2520 
2521     // If the sign bit is known one, the top bits match.
2522     Known = Known.sext(BitWidth);
2523 
2524     // If the sign bit is known zero, convert this to a zero extend.
2525     if (Known.isNonNegative()) {
2526       unsigned Opc =
2527           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2528       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2529         SDNodeFlags Flags;
2530         if (!IsVecInReg)
2531           Flags.setNonNeg(true);
2532         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2533       }
2534     }
2535 
2536     // Attempt to avoid multi-use ops if we don't need anything from them.
2537     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2538             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2539       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2540     break;
2541   }
2542   case ISD::ANY_EXTEND_VECTOR_INREG:
2543     if (VT.isScalableVector())
2544       return false;
2545     [[fallthrough]];
2546   case ISD::ANY_EXTEND: {
2547     SDValue Src = Op.getOperand(0);
2548     EVT SrcVT = Src.getValueType();
2549     unsigned InBits = SrcVT.getScalarSizeInBits();
2550     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2551     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2552 
2553     // If we only need the bottom element then we can just bitcast.
2554     // TODO: Handle ANY_EXTEND?
2555     if (IsLE && IsVecInReg && DemandedElts == 1 &&
2556         VT.getSizeInBits() == SrcVT.getSizeInBits())
2557       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2558 
2559     APInt InDemandedBits = DemandedBits.trunc(InBits);
2560     APInt InDemandedElts = DemandedElts.zext(InElts);
2561     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2562                              Depth + 1))
2563       return true;
2564     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2565     Known = Known.anyext(BitWidth);
2566 
2567     // Attempt to avoid multi-use ops if we don't need anything from them.
2568     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2569             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2570       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2571     break;
2572   }
2573   case ISD::TRUNCATE: {
2574     SDValue Src = Op.getOperand(0);
2575 
2576     // Simplify the input, using demanded bit information, and compute the known
2577     // zero/one bits live out.
2578     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2579     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2580     if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2581                              Depth + 1))
2582       return true;
2583     Known = Known.trunc(BitWidth);
2584 
2585     // Attempt to avoid multi-use ops if we don't need anything from them.
2586     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2587             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2588       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2589 
2590     // If the input is only used by this truncate, see if we can shrink it based
2591     // on the known demanded bits.
2592     switch (Src.getOpcode()) {
2593     default:
2594       break;
2595     case ISD::SRL:
2596       // Shrink SRL by a constant if none of the high bits shifted in are
2597       // demanded.
2598       if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2599         // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2600         // undesirable.
2601         break;
2602 
2603       if (Src.getNode()->hasOneUse()) {
2604         if (isTruncateFree(Src, VT) &&
2605             !isTruncateFree(Src.getValueType(), VT)) {
2606           // If truncate is only free at trunc(srl), do not turn it into
2607           // srl(trunc). The check is done by first check the truncate is free
2608           // at Src's opcode(srl), then check the truncate is not done by
2609           // referencing sub-register. In test, if both trunc(srl) and
2610           // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2611           // trunc(srl)'s trunc is free, trunc(srl) is better.
2612           break;
2613         }
2614 
2615         std::optional<uint64_t> ShAmtC =
2616             TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2617         if (!ShAmtC || *ShAmtC >= BitWidth)
2618           break;
2619         uint64_t ShVal = *ShAmtC;
2620 
2621         APInt HighBits =
2622             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2623         HighBits.lshrInPlace(ShVal);
2624         HighBits = HighBits.trunc(BitWidth);
2625         if (!(HighBits & DemandedBits)) {
2626           // None of the shifted in bits are needed.  Add a truncate of the
2627           // shift input, then shift it.
2628           SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2629           SDValue NewTrunc =
2630               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2631           return TLO.CombineTo(
2632               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2633         }
2634       }
2635       break;
2636     }
2637 
2638     break;
2639   }
2640   case ISD::AssertZext: {
2641     // AssertZext demands all of the high bits, plus any of the low bits
2642     // demanded by its users.
2643     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2644     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2645     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2646                              TLO, Depth + 1))
2647       return true;
2648 
2649     Known.Zero |= ~InMask;
2650     Known.One &= (~Known.Zero);
2651     break;
2652   }
2653   case ISD::EXTRACT_VECTOR_ELT: {
2654     SDValue Src = Op.getOperand(0);
2655     SDValue Idx = Op.getOperand(1);
2656     ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2657     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2658 
2659     if (SrcEltCnt.isScalable())
2660       return false;
2661 
2662     // Demand the bits from every vector element without a constant index.
2663     unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2664     APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2665     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2666       if (CIdx->getAPIntValue().ult(NumSrcElts))
2667         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2668 
2669     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2670     // anything about the extended bits.
2671     APInt DemandedSrcBits = DemandedBits;
2672     if (BitWidth > EltBitWidth)
2673       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2674 
2675     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2676                              Depth + 1))
2677       return true;
2678 
2679     // Attempt to avoid multi-use ops if we don't need anything from them.
2680     if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2681       if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2682               Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2683         SDValue NewOp =
2684             TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2685         return TLO.CombineTo(Op, NewOp);
2686       }
2687     }
2688 
2689     Known = Known2;
2690     if (BitWidth > EltBitWidth)
2691       Known = Known.anyext(BitWidth);
2692     break;
2693   }
2694   case ISD::BITCAST: {
2695     if (VT.isScalableVector())
2696       return false;
2697     SDValue Src = Op.getOperand(0);
2698     EVT SrcVT = Src.getValueType();
2699     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2700 
2701     // If this is an FP->Int bitcast and if the sign bit is the only
2702     // thing demanded, turn this into a FGETSIGN.
2703     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2704         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2705         SrcVT.isFloatingPoint()) {
2706       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2707       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2708       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2709           SrcVT != MVT::f128) {
2710         // Cannot eliminate/lower SHL for f128 yet.
2711         EVT Ty = OpVTLegal ? VT : MVT::i32;
2712         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2713         // place.  We expect the SHL to be eliminated by other optimizations.
2714         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2715         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2716         if (!OpVTLegal && OpVTSizeInBits > 32)
2717           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2718         unsigned ShVal = Op.getValueSizeInBits() - 1;
2719         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2720         return TLO.CombineTo(Op,
2721                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2722       }
2723     }
2724 
2725     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2726     // Demand the elt/bit if any of the original elts/bits are demanded.
2727     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2728       unsigned Scale = BitWidth / NumSrcEltBits;
2729       unsigned NumSrcElts = SrcVT.getVectorNumElements();
2730       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2731       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2732       for (unsigned i = 0; i != Scale; ++i) {
2733         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2734         unsigned BitOffset = EltOffset * NumSrcEltBits;
2735         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2736         if (!Sub.isZero()) {
2737           DemandedSrcBits |= Sub;
2738           for (unsigned j = 0; j != NumElts; ++j)
2739             if (DemandedElts[j])
2740               DemandedSrcElts.setBit((j * Scale) + i);
2741         }
2742       }
2743 
2744       APInt KnownSrcUndef, KnownSrcZero;
2745       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2746                                      KnownSrcZero, TLO, Depth + 1))
2747         return true;
2748 
2749       KnownBits KnownSrcBits;
2750       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2751                                KnownSrcBits, TLO, Depth + 1))
2752         return true;
2753     } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2754       // TODO - bigendian once we have test coverage.
2755       unsigned Scale = NumSrcEltBits / BitWidth;
2756       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2757       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2758       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2759       for (unsigned i = 0; i != NumElts; ++i)
2760         if (DemandedElts[i]) {
2761           unsigned Offset = (i % Scale) * BitWidth;
2762           DemandedSrcBits.insertBits(DemandedBits, Offset);
2763           DemandedSrcElts.setBit(i / Scale);
2764         }
2765 
2766       if (SrcVT.isVector()) {
2767         APInt KnownSrcUndef, KnownSrcZero;
2768         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2769                                        KnownSrcZero, TLO, Depth + 1))
2770           return true;
2771       }
2772 
2773       KnownBits KnownSrcBits;
2774       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2775                                KnownSrcBits, TLO, Depth + 1))
2776         return true;
2777 
2778       // Attempt to avoid multi-use ops if we don't need anything from them.
2779       if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2780         if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2781                 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2782           SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2783           return TLO.CombineTo(Op, NewOp);
2784         }
2785       }
2786     }
2787 
2788     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2789     // recursive call where Known may be useful to the caller.
2790     if (Depth > 0) {
2791       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2792       return false;
2793     }
2794     break;
2795   }
2796   case ISD::MUL:
2797     if (DemandedBits.isPowerOf2()) {
2798       // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2799       // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2800       // odd (has LSB set), then the left-shifted low bit of X is the answer.
2801       unsigned CTZ = DemandedBits.countr_zero();
2802       ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2803       if (C && C->getAPIntValue().countr_zero() == CTZ) {
2804         SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2805         SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2806         return TLO.CombineTo(Op, Shl);
2807       }
2808     }
2809     // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2810     // X * X is odd iff X is odd.
2811     // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2812     if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2813       SDValue One = TLO.DAG.getConstant(1, dl, VT);
2814       SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2815       return TLO.CombineTo(Op, And1);
2816     }
2817     [[fallthrough]];
2818   case ISD::ADD:
2819   case ISD::SUB: {
2820     // Add, Sub, and Mul don't demand any bits in positions beyond that
2821     // of the highest bit demanded of them.
2822     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2823     SDNodeFlags Flags = Op.getNode()->getFlags();
2824     unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2825     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2826     KnownBits KnownOp0, KnownOp1;
2827     auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2828                                       const KnownBits &KnownRHS) {
2829       if (Op.getOpcode() == ISD::MUL)
2830         Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2831       return Demanded;
2832     };
2833     if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2834                              Depth + 1) ||
2835         SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2836                              DemandedElts, KnownOp0, TLO, Depth + 1) ||
2837         // See if the operation should be performed at a smaller bit width.
2838         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2839       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2840         // Disable the nsw and nuw flags. We can no longer guarantee that we
2841         // won't wrap after simplification.
2842         Flags.setNoSignedWrap(false);
2843         Flags.setNoUnsignedWrap(false);
2844         Op->setFlags(Flags);
2845       }
2846       return true;
2847     }
2848 
2849     // neg x with only low bit demanded is simply x.
2850     if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2851         isNullConstant(Op0))
2852       return TLO.CombineTo(Op, Op1);
2853 
2854     // Attempt to avoid multi-use ops if we don't need anything from them.
2855     if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2856       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2857           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2858       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2859           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2860       if (DemandedOp0 || DemandedOp1) {
2861         Flags.setNoSignedWrap(false);
2862         Flags.setNoUnsignedWrap(false);
2863         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2864         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2865         SDValue NewOp =
2866             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2867         return TLO.CombineTo(Op, NewOp);
2868       }
2869     }
2870 
2871     // If we have a constant operand, we may be able to turn it into -1 if we
2872     // do not demand the high bits. This can make the constant smaller to
2873     // encode, allow more general folding, or match specialized instruction
2874     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2875     // is probably not useful (and could be detrimental).
2876     ConstantSDNode *C = isConstOrConstSplat(Op1);
2877     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2878     if (C && !C->isAllOnes() && !C->isOne() &&
2879         (C->getAPIntValue() | HighMask).isAllOnes()) {
2880       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2881       // Disable the nsw and nuw flags. We can no longer guarantee that we
2882       // won't wrap after simplification.
2883       Flags.setNoSignedWrap(false);
2884       Flags.setNoUnsignedWrap(false);
2885       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2886       return TLO.CombineTo(Op, NewOp);
2887     }
2888 
2889     // Match a multiply with a disguised negated-power-of-2 and convert to a
2890     // an equivalent shift-left amount.
2891     // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2892     auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2893       if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2894         return 0;
2895 
2896       // Don't touch opaque constants. Also, ignore zero and power-of-2
2897       // multiplies. Those will get folded later.
2898       ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2899       if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2900           !MulC->getAPIntValue().isPowerOf2()) {
2901         APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2902         if (UnmaskedC.isNegatedPowerOf2())
2903           return (-UnmaskedC).logBase2();
2904       }
2905       return 0;
2906     };
2907 
2908     auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2909                        unsigned ShlAmt) {
2910       SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2911       SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2912       SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2913       return TLO.CombineTo(Op, Res);
2914     };
2915 
2916     if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2917       if (Op.getOpcode() == ISD::ADD) {
2918         // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2919         if (unsigned ShAmt = getShiftLeftAmt(Op0))
2920           return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2921         // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2922         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2923           return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2924       }
2925       if (Op.getOpcode() == ISD::SUB) {
2926         // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2927         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2928           return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2929       }
2930     }
2931 
2932     if (Op.getOpcode() == ISD::MUL) {
2933       Known = KnownBits::mul(KnownOp0, KnownOp1);
2934     } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2935       Known = KnownBits::computeForAddSub(
2936           Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2937           Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2938     }
2939     break;
2940   }
2941   default:
2942     // We also ask the target about intrinsics (which could be specific to it).
2943     if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2944         Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2945       // TODO: Probably okay to remove after audit; here to reduce change size
2946       // in initial enablement patch for scalable vectors
2947       if (Op.getValueType().isScalableVector())
2948         break;
2949       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2950                                             Known, TLO, Depth))
2951         return true;
2952       break;
2953     }
2954 
2955     // Just use computeKnownBits to compute output bits.
2956     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2957     break;
2958   }
2959 
2960   // If we know the value of all of the demanded bits, return this as a
2961   // constant.
2962   if (!isTargetCanonicalConstantNode(Op) &&
2963       DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2964     // Avoid folding to a constant if any OpaqueConstant is involved.
2965     const SDNode *N = Op.getNode();
2966     for (SDNode *Op :
2967          llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
2968       if (auto *C = dyn_cast<ConstantSDNode>(Op))
2969         if (C->isOpaque())
2970           return false;
2971     }
2972     if (VT.isInteger())
2973       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2974     if (VT.isFloatingPoint())
2975       return TLO.CombineTo(
2976           Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
2977                                     dl, VT));
2978   }
2979 
2980   // A multi use 'all demanded elts' simplify failed to find any knownbits.
2981   // Try again just for the original demanded elts.
2982   // Ensure we do this AFTER constant folding above.
2983   if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2984     Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2985 
2986   return false;
2987 }
2988 
2989 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2990                                                 const APInt &DemandedElts,
2991                                                 DAGCombinerInfo &DCI) const {
2992   SelectionDAG &DAG = DCI.DAG;
2993   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2994                         !DCI.isBeforeLegalizeOps());
2995 
2996   APInt KnownUndef, KnownZero;
2997   bool Simplified =
2998       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2999   if (Simplified) {
3000     DCI.AddToWorklist(Op.getNode());
3001     DCI.CommitTargetLoweringOpt(TLO);
3002   }
3003 
3004   return Simplified;
3005 }
3006 
3007 /// Given a vector binary operation and known undefined elements for each input
3008 /// operand, compute whether each element of the output is undefined.
3009 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
3010                                          const APInt &UndefOp0,
3011                                          const APInt &UndefOp1) {
3012   EVT VT = BO.getValueType();
3013   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3014          "Vector binop only");
3015 
3016   EVT EltVT = VT.getVectorElementType();
3017   unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3018   assert(UndefOp0.getBitWidth() == NumElts &&
3019          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3020 
3021   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3022                                    const APInt &UndefVals) {
3023     if (UndefVals[Index])
3024       return DAG.getUNDEF(EltVT);
3025 
3026     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3027       // Try hard to make sure that the getNode() call is not creating temporary
3028       // nodes. Ignore opaque integers because they do not constant fold.
3029       SDValue Elt = BV->getOperand(Index);
3030       auto *C = dyn_cast<ConstantSDNode>(Elt);
3031       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3032         return Elt;
3033     }
3034 
3035     return SDValue();
3036   };
3037 
3038   APInt KnownUndef = APInt::getZero(NumElts);
3039   for (unsigned i = 0; i != NumElts; ++i) {
3040     // If both inputs for this element are either constant or undef and match
3041     // the element type, compute the constant/undef result for this element of
3042     // the vector.
3043     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3044     // not handle FP constants. The code within getNode() should be refactored
3045     // to avoid the danger of creating a bogus temporary node here.
3046     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3047     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3048     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3049       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3050         KnownUndef.setBit(i);
3051   }
3052   return KnownUndef;
3053 }
3054 
3055 bool TargetLowering::SimplifyDemandedVectorElts(
3056     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3057     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3058     bool AssumeSingleUse) const {
3059   EVT VT = Op.getValueType();
3060   unsigned Opcode = Op.getOpcode();
3061   APInt DemandedElts = OriginalDemandedElts;
3062   unsigned NumElts = DemandedElts.getBitWidth();
3063   assert(VT.isVector() && "Expected vector op");
3064 
3065   KnownUndef = KnownZero = APInt::getZero(NumElts);
3066 
3067   if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3068     return false;
3069 
3070   // TODO: For now we assume we know nothing about scalable vectors.
3071   if (VT.isScalableVector())
3072     return false;
3073 
3074   assert(VT.getVectorNumElements() == NumElts &&
3075          "Mask size mismatches value type element count!");
3076 
3077   // Undef operand.
3078   if (Op.isUndef()) {
3079     KnownUndef.setAllBits();
3080     return false;
3081   }
3082 
3083   // If Op has other users, assume that all elements are needed.
3084   if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3085     DemandedElts.setAllBits();
3086 
3087   // Not demanding any elements from Op.
3088   if (DemandedElts == 0) {
3089     KnownUndef.setAllBits();
3090     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3091   }
3092 
3093   // Limit search depth.
3094   if (Depth >= SelectionDAG::MaxRecursionDepth)
3095     return false;
3096 
3097   SDLoc DL(Op);
3098   unsigned EltSizeInBits = VT.getScalarSizeInBits();
3099   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3100 
3101   // Helper for demanding the specified elements and all the bits of both binary
3102   // operands.
3103   auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3104     SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3105                                                            TLO.DAG, Depth + 1);
3106     SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3107                                                            TLO.DAG, Depth + 1);
3108     if (NewOp0 || NewOp1) {
3109       SDValue NewOp =
3110           TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3111                           NewOp1 ? NewOp1 : Op1, Op->getFlags());
3112       return TLO.CombineTo(Op, NewOp);
3113     }
3114     return false;
3115   };
3116 
3117   switch (Opcode) {
3118   case ISD::SCALAR_TO_VECTOR: {
3119     if (!DemandedElts[0]) {
3120       KnownUndef.setAllBits();
3121       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3122     }
3123     SDValue ScalarSrc = Op.getOperand(0);
3124     if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3125       SDValue Src = ScalarSrc.getOperand(0);
3126       SDValue Idx = ScalarSrc.getOperand(1);
3127       EVT SrcVT = Src.getValueType();
3128 
3129       ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3130 
3131       if (SrcEltCnt.isScalable())
3132         return false;
3133 
3134       unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3135       if (isNullConstant(Idx)) {
3136         APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3137         APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3138         APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3139         if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3140                                        TLO, Depth + 1))
3141           return true;
3142       }
3143     }
3144     KnownUndef.setHighBits(NumElts - 1);
3145     break;
3146   }
3147   case ISD::BITCAST: {
3148     SDValue Src = Op.getOperand(0);
3149     EVT SrcVT = Src.getValueType();
3150 
3151     // We only handle vectors here.
3152     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3153     if (!SrcVT.isVector())
3154       break;
3155 
3156     // Fast handling of 'identity' bitcasts.
3157     unsigned NumSrcElts = SrcVT.getVectorNumElements();
3158     if (NumSrcElts == NumElts)
3159       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3160                                         KnownZero, TLO, Depth + 1);
3161 
3162     APInt SrcDemandedElts, SrcZero, SrcUndef;
3163 
3164     // Bitcast from 'large element' src vector to 'small element' vector, we
3165     // must demand a source element if any DemandedElt maps to it.
3166     if ((NumElts % NumSrcElts) == 0) {
3167       unsigned Scale = NumElts / NumSrcElts;
3168       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3169       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3170                                      TLO, Depth + 1))
3171         return true;
3172 
3173       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3174       // of the large element.
3175       // TODO - bigendian once we have test coverage.
3176       if (IsLE) {
3177         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3178         APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3179         for (unsigned i = 0; i != NumElts; ++i)
3180           if (DemandedElts[i]) {
3181             unsigned Ofs = (i % Scale) * EltSizeInBits;
3182             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3183           }
3184 
3185         KnownBits Known;
3186         if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3187                                  TLO, Depth + 1))
3188           return true;
3189 
3190         // The bitcast has split each wide element into a number of
3191         // narrow subelements. We have just computed the Known bits
3192         // for wide elements. See if element splitting results in
3193         // some subelements being zero. Only for demanded elements!
3194         for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3195           if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3196                    .isAllOnes())
3197             continue;
3198           for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3199             unsigned Elt = Scale * SrcElt + SubElt;
3200             if (DemandedElts[Elt])
3201               KnownZero.setBit(Elt);
3202           }
3203         }
3204       }
3205 
3206       // If the src element is zero/undef then all the output elements will be -
3207       // only demanded elements are guaranteed to be correct.
3208       for (unsigned i = 0; i != NumSrcElts; ++i) {
3209         if (SrcDemandedElts[i]) {
3210           if (SrcZero[i])
3211             KnownZero.setBits(i * Scale, (i + 1) * Scale);
3212           if (SrcUndef[i])
3213             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3214         }
3215       }
3216     }
3217 
3218     // Bitcast from 'small element' src vector to 'large element' vector, we
3219     // demand all smaller source elements covered by the larger demanded element
3220     // of this vector.
3221     if ((NumSrcElts % NumElts) == 0) {
3222       unsigned Scale = NumSrcElts / NumElts;
3223       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3224       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3225                                      TLO, Depth + 1))
3226         return true;
3227 
3228       // If all the src elements covering an output element are zero/undef, then
3229       // the output element will be as well, assuming it was demanded.
3230       for (unsigned i = 0; i != NumElts; ++i) {
3231         if (DemandedElts[i]) {
3232           if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3233             KnownZero.setBit(i);
3234           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3235             KnownUndef.setBit(i);
3236         }
3237       }
3238     }
3239     break;
3240   }
3241   case ISD::FREEZE: {
3242     SDValue N0 = Op.getOperand(0);
3243     if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3244                                                  /*PoisonOnly=*/false))
3245       return TLO.CombineTo(Op, N0);
3246 
3247     // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3248     // freeze(op(x, ...)) -> op(freeze(x), ...).
3249     if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3250       return TLO.CombineTo(
3251           Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
3252                               TLO.DAG.getFreeze(N0.getOperand(0))));
3253     break;
3254   }
3255   case ISD::BUILD_VECTOR: {
3256     // Check all elements and simplify any unused elements with UNDEF.
3257     if (!DemandedElts.isAllOnes()) {
3258       // Don't simplify BROADCASTS.
3259       if (llvm::any_of(Op->op_values(),
3260                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3261         SmallVector<SDValue, 32> Ops(Op->ops());
3262         bool Updated = false;
3263         for (unsigned i = 0; i != NumElts; ++i) {
3264           if (!DemandedElts[i] && !Ops[i].isUndef()) {
3265             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3266             KnownUndef.setBit(i);
3267             Updated = true;
3268           }
3269         }
3270         if (Updated)
3271           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3272       }
3273     }
3274     for (unsigned i = 0; i != NumElts; ++i) {
3275       SDValue SrcOp = Op.getOperand(i);
3276       if (SrcOp.isUndef()) {
3277         KnownUndef.setBit(i);
3278       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3279                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
3280         KnownZero.setBit(i);
3281       }
3282     }
3283     break;
3284   }
3285   case ISD::CONCAT_VECTORS: {
3286     EVT SubVT = Op.getOperand(0).getValueType();
3287     unsigned NumSubVecs = Op.getNumOperands();
3288     unsigned NumSubElts = SubVT.getVectorNumElements();
3289     for (unsigned i = 0; i != NumSubVecs; ++i) {
3290       SDValue SubOp = Op.getOperand(i);
3291       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3292       APInt SubUndef, SubZero;
3293       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3294                                      Depth + 1))
3295         return true;
3296       KnownUndef.insertBits(SubUndef, i * NumSubElts);
3297       KnownZero.insertBits(SubZero, i * NumSubElts);
3298     }
3299 
3300     // Attempt to avoid multi-use ops if we don't need anything from them.
3301     if (!DemandedElts.isAllOnes()) {
3302       bool FoundNewSub = false;
3303       SmallVector<SDValue, 2> DemandedSubOps;
3304       for (unsigned i = 0; i != NumSubVecs; ++i) {
3305         SDValue SubOp = Op.getOperand(i);
3306         APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3307         SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3308             SubOp, SubElts, TLO.DAG, Depth + 1);
3309         DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3310         FoundNewSub = NewSubOp ? true : FoundNewSub;
3311       }
3312       if (FoundNewSub) {
3313         SDValue NewOp =
3314             TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3315         return TLO.CombineTo(Op, NewOp);
3316       }
3317     }
3318     break;
3319   }
3320   case ISD::INSERT_SUBVECTOR: {
3321     // Demand any elements from the subvector and the remainder from the src its
3322     // inserted into.
3323     SDValue Src = Op.getOperand(0);
3324     SDValue Sub = Op.getOperand(1);
3325     uint64_t Idx = Op.getConstantOperandVal(2);
3326     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3327     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3328     APInt DemandedSrcElts = DemandedElts;
3329     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3330 
3331     APInt SubUndef, SubZero;
3332     if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3333                                    Depth + 1))
3334       return true;
3335 
3336     // If none of the src operand elements are demanded, replace it with undef.
3337     if (!DemandedSrcElts && !Src.isUndef())
3338       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3339                                                TLO.DAG.getUNDEF(VT), Sub,
3340                                                Op.getOperand(2)));
3341 
3342     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3343                                    TLO, Depth + 1))
3344       return true;
3345     KnownUndef.insertBits(SubUndef, Idx);
3346     KnownZero.insertBits(SubZero, Idx);
3347 
3348     // Attempt to avoid multi-use ops if we don't need anything from them.
3349     if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3350       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3351           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3352       SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3353           Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3354       if (NewSrc || NewSub) {
3355         NewSrc = NewSrc ? NewSrc : Src;
3356         NewSub = NewSub ? NewSub : Sub;
3357         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3358                                         NewSub, Op.getOperand(2));
3359         return TLO.CombineTo(Op, NewOp);
3360       }
3361     }
3362     break;
3363   }
3364   case ISD::EXTRACT_SUBVECTOR: {
3365     // Offset the demanded elts by the subvector index.
3366     SDValue Src = Op.getOperand(0);
3367     if (Src.getValueType().isScalableVector())
3368       break;
3369     uint64_t Idx = Op.getConstantOperandVal(1);
3370     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3371     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3372 
3373     APInt SrcUndef, SrcZero;
3374     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3375                                    Depth + 1))
3376       return true;
3377     KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3378     KnownZero = SrcZero.extractBits(NumElts, Idx);
3379 
3380     // Attempt to avoid multi-use ops if we don't need anything from them.
3381     if (!DemandedElts.isAllOnes()) {
3382       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3383           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3384       if (NewSrc) {
3385         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3386                                         Op.getOperand(1));
3387         return TLO.CombineTo(Op, NewOp);
3388       }
3389     }
3390     break;
3391   }
3392   case ISD::INSERT_VECTOR_ELT: {
3393     SDValue Vec = Op.getOperand(0);
3394     SDValue Scl = Op.getOperand(1);
3395     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3396 
3397     // For a legal, constant insertion index, if we don't need this insertion
3398     // then strip it, else remove it from the demanded elts.
3399     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3400       unsigned Idx = CIdx->getZExtValue();
3401       if (!DemandedElts[Idx])
3402         return TLO.CombineTo(Op, Vec);
3403 
3404       APInt DemandedVecElts(DemandedElts);
3405       DemandedVecElts.clearBit(Idx);
3406       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3407                                      KnownZero, TLO, Depth + 1))
3408         return true;
3409 
3410       KnownUndef.setBitVal(Idx, Scl.isUndef());
3411 
3412       KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3413       break;
3414     }
3415 
3416     APInt VecUndef, VecZero;
3417     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3418                                    Depth + 1))
3419       return true;
3420     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3421     break;
3422   }
3423   case ISD::VSELECT: {
3424     SDValue Sel = Op.getOperand(0);
3425     SDValue LHS = Op.getOperand(1);
3426     SDValue RHS = Op.getOperand(2);
3427 
3428     // Try to transform the select condition based on the current demanded
3429     // elements.
3430     APInt UndefSel, ZeroSel;
3431     if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3432                                    Depth + 1))
3433       return true;
3434 
3435     // See if we can simplify either vselect operand.
3436     APInt DemandedLHS(DemandedElts);
3437     APInt DemandedRHS(DemandedElts);
3438     APInt UndefLHS, ZeroLHS;
3439     APInt UndefRHS, ZeroRHS;
3440     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3441                                    Depth + 1))
3442       return true;
3443     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3444                                    Depth + 1))
3445       return true;
3446 
3447     KnownUndef = UndefLHS & UndefRHS;
3448     KnownZero = ZeroLHS & ZeroRHS;
3449 
3450     // If we know that the selected element is always zero, we don't need the
3451     // select value element.
3452     APInt DemandedSel = DemandedElts & ~KnownZero;
3453     if (DemandedSel != DemandedElts)
3454       if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3455                                      Depth + 1))
3456         return true;
3457 
3458     break;
3459   }
3460   case ISD::VECTOR_SHUFFLE: {
3461     SDValue LHS = Op.getOperand(0);
3462     SDValue RHS = Op.getOperand(1);
3463     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3464 
3465     // Collect demanded elements from shuffle operands..
3466     APInt DemandedLHS(NumElts, 0);
3467     APInt DemandedRHS(NumElts, 0);
3468     for (unsigned i = 0; i != NumElts; ++i) {
3469       int M = ShuffleMask[i];
3470       if (M < 0 || !DemandedElts[i])
3471         continue;
3472       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3473       if (M < (int)NumElts)
3474         DemandedLHS.setBit(M);
3475       else
3476         DemandedRHS.setBit(M - NumElts);
3477     }
3478 
3479     // See if we can simplify either shuffle operand.
3480     APInt UndefLHS, ZeroLHS;
3481     APInt UndefRHS, ZeroRHS;
3482     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3483                                    Depth + 1))
3484       return true;
3485     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3486                                    Depth + 1))
3487       return true;
3488 
3489     // Simplify mask using undef elements from LHS/RHS.
3490     bool Updated = false;
3491     bool IdentityLHS = true, IdentityRHS = true;
3492     SmallVector<int, 32> NewMask(ShuffleMask);
3493     for (unsigned i = 0; i != NumElts; ++i) {
3494       int &M = NewMask[i];
3495       if (M < 0)
3496         continue;
3497       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3498           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3499         Updated = true;
3500         M = -1;
3501       }
3502       IdentityLHS &= (M < 0) || (M == (int)i);
3503       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3504     }
3505 
3506     // Update legal shuffle masks based on demanded elements if it won't reduce
3507     // to Identity which can cause premature removal of the shuffle mask.
3508     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3509       SDValue LegalShuffle =
3510           buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3511       if (LegalShuffle)
3512         return TLO.CombineTo(Op, LegalShuffle);
3513     }
3514 
3515     // Propagate undef/zero elements from LHS/RHS.
3516     for (unsigned i = 0; i != NumElts; ++i) {
3517       int M = ShuffleMask[i];
3518       if (M < 0) {
3519         KnownUndef.setBit(i);
3520       } else if (M < (int)NumElts) {
3521         if (UndefLHS[M])
3522           KnownUndef.setBit(i);
3523         if (ZeroLHS[M])
3524           KnownZero.setBit(i);
3525       } else {
3526         if (UndefRHS[M - NumElts])
3527           KnownUndef.setBit(i);
3528         if (ZeroRHS[M - NumElts])
3529           KnownZero.setBit(i);
3530       }
3531     }
3532     break;
3533   }
3534   case ISD::ANY_EXTEND_VECTOR_INREG:
3535   case ISD::SIGN_EXTEND_VECTOR_INREG:
3536   case ISD::ZERO_EXTEND_VECTOR_INREG: {
3537     APInt SrcUndef, SrcZero;
3538     SDValue Src = Op.getOperand(0);
3539     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3540     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3541     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3542                                    Depth + 1))
3543       return true;
3544     KnownZero = SrcZero.zextOrTrunc(NumElts);
3545     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3546 
3547     if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3548         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3549         DemandedSrcElts == 1) {
3550       // aext - if we just need the bottom element then we can bitcast.
3551       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3552     }
3553 
3554     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3555       // zext(undef) upper bits are guaranteed to be zero.
3556       if (DemandedElts.isSubsetOf(KnownUndef))
3557         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3558       KnownUndef.clearAllBits();
3559 
3560       // zext - if we just need the bottom element then we can mask:
3561       // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3562       if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3563           Op->isOnlyUserOf(Src.getNode()) &&
3564           Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3565         SDLoc DL(Op);
3566         EVT SrcVT = Src.getValueType();
3567         EVT SrcSVT = SrcVT.getScalarType();
3568         SmallVector<SDValue> MaskElts;
3569         MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3570         MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3571         SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3572         if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3573                 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3574           Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3575           return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3576         }
3577       }
3578     }
3579     break;
3580   }
3581 
3582   // TODO: There are more binop opcodes that could be handled here - MIN,
3583   // MAX, saturated math, etc.
3584   case ISD::ADD: {
3585     SDValue Op0 = Op.getOperand(0);
3586     SDValue Op1 = Op.getOperand(1);
3587     if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3588       APInt UndefLHS, ZeroLHS;
3589       if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3590                                      Depth + 1, /*AssumeSingleUse*/ true))
3591         return true;
3592     }
3593     [[fallthrough]];
3594   }
3595   case ISD::AVGCEILS:
3596   case ISD::AVGCEILU:
3597   case ISD::AVGFLOORS:
3598   case ISD::AVGFLOORU:
3599   case ISD::OR:
3600   case ISD::XOR:
3601   case ISD::SUB:
3602   case ISD::FADD:
3603   case ISD::FSUB:
3604   case ISD::FMUL:
3605   case ISD::FDIV:
3606   case ISD::FREM: {
3607     SDValue Op0 = Op.getOperand(0);
3608     SDValue Op1 = Op.getOperand(1);
3609 
3610     APInt UndefRHS, ZeroRHS;
3611     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3612                                    Depth + 1))
3613       return true;
3614     APInt UndefLHS, ZeroLHS;
3615     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3616                                    Depth + 1))
3617       return true;
3618 
3619     KnownZero = ZeroLHS & ZeroRHS;
3620     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3621 
3622     // Attempt to avoid multi-use ops if we don't need anything from them.
3623     // TODO - use KnownUndef to relax the demandedelts?
3624     if (!DemandedElts.isAllOnes())
3625       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3626         return true;
3627     break;
3628   }
3629   case ISD::SHL:
3630   case ISD::SRL:
3631   case ISD::SRA:
3632   case ISD::ROTL:
3633   case ISD::ROTR: {
3634     SDValue Op0 = Op.getOperand(0);
3635     SDValue Op1 = Op.getOperand(1);
3636 
3637     APInt UndefRHS, ZeroRHS;
3638     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3639                                    Depth + 1))
3640       return true;
3641     APInt UndefLHS, ZeroLHS;
3642     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3643                                    Depth + 1))
3644       return true;
3645 
3646     KnownZero = ZeroLHS;
3647     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3648 
3649     // Attempt to avoid multi-use ops if we don't need anything from them.
3650     // TODO - use KnownUndef to relax the demandedelts?
3651     if (!DemandedElts.isAllOnes())
3652       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3653         return true;
3654     break;
3655   }
3656   case ISD::MUL:
3657   case ISD::MULHU:
3658   case ISD::MULHS:
3659   case ISD::AND: {
3660     SDValue Op0 = Op.getOperand(0);
3661     SDValue Op1 = Op.getOperand(1);
3662 
3663     APInt SrcUndef, SrcZero;
3664     if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3665                                    Depth + 1))
3666       return true;
3667     // If we know that a demanded element was zero in Op1 we don't need to
3668     // demand it in Op0 - its guaranteed to be zero.
3669     APInt DemandedElts0 = DemandedElts & ~SrcZero;
3670     if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3671                                    TLO, Depth + 1))
3672       return true;
3673 
3674     KnownUndef &= DemandedElts0;
3675     KnownZero &= DemandedElts0;
3676 
3677     // If every element pair has a zero/undef then just fold to zero.
3678     // fold (and x, undef) -> 0  /  (and x, 0) -> 0
3679     // fold (mul x, undef) -> 0  /  (mul x, 0) -> 0
3680     if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3681       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3682 
3683     // If either side has a zero element, then the result element is zero, even
3684     // if the other is an UNDEF.
3685     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3686     // and then handle 'and' nodes with the rest of the binop opcodes.
3687     KnownZero |= SrcZero;
3688     KnownUndef &= SrcUndef;
3689     KnownUndef &= ~KnownZero;
3690 
3691     // Attempt to avoid multi-use ops if we don't need anything from them.
3692     if (!DemandedElts.isAllOnes())
3693       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3694         return true;
3695     break;
3696   }
3697   case ISD::TRUNCATE:
3698   case ISD::SIGN_EXTEND:
3699   case ISD::ZERO_EXTEND:
3700     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3701                                    KnownZero, TLO, Depth + 1))
3702       return true;
3703 
3704     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3705       // zext(undef) upper bits are guaranteed to be zero.
3706       if (DemandedElts.isSubsetOf(KnownUndef))
3707         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3708       KnownUndef.clearAllBits();
3709     }
3710     break;
3711   default: {
3712     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3713       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3714                                                   KnownZero, TLO, Depth))
3715         return true;
3716     } else {
3717       KnownBits Known;
3718       APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3719       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3720                                TLO, Depth, AssumeSingleUse))
3721         return true;
3722     }
3723     break;
3724   }
3725   }
3726   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3727 
3728   // Constant fold all undef cases.
3729   // TODO: Handle zero cases as well.
3730   if (DemandedElts.isSubsetOf(KnownUndef))
3731     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3732 
3733   return false;
3734 }
3735 
3736 /// Determine which of the bits specified in Mask are known to be either zero or
3737 /// one and return them in the Known.
3738 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3739                                                    KnownBits &Known,
3740                                                    const APInt &DemandedElts,
3741                                                    const SelectionDAG &DAG,
3742                                                    unsigned Depth) const {
3743   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3744           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3745           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3746           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3747          "Should use MaskedValueIsZero if you don't know whether Op"
3748          " is a target node!");
3749   Known.resetAll();
3750 }
3751 
3752 void TargetLowering::computeKnownBitsForTargetInstr(
3753     GISelKnownBits &Analysis, Register R, KnownBits &Known,
3754     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3755     unsigned Depth) const {
3756   Known.resetAll();
3757 }
3758 
3759 void TargetLowering::computeKnownBitsForFrameIndex(
3760   const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3761   // The low bits are known zero if the pointer is aligned.
3762   Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3763 }
3764 
3765 Align TargetLowering::computeKnownAlignForTargetInstr(
3766   GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3767   unsigned Depth) const {
3768   return Align(1);
3769 }
3770 
3771 /// This method can be implemented by targets that want to expose additional
3772 /// information about sign bits to the DAG Combiner.
3773 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3774                                                          const APInt &,
3775                                                          const SelectionDAG &,
3776                                                          unsigned Depth) const {
3777   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3778           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3779           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3780           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3781          "Should use ComputeNumSignBits if you don't know whether Op"
3782          " is a target node!");
3783   return 1;
3784 }
3785 
3786 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3787   GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3788   const MachineRegisterInfo &MRI, unsigned Depth) const {
3789   return 1;
3790 }
3791 
3792 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3793     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3794     TargetLoweringOpt &TLO, unsigned Depth) const {
3795   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3796           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3797           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3798           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3799          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3800          " is a target node!");
3801   return false;
3802 }
3803 
3804 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3805     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3806     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3807   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3808           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3809           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3810           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3811          "Should use SimplifyDemandedBits if you don't know whether Op"
3812          " is a target node!");
3813   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3814   return false;
3815 }
3816 
3817 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3818     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3819     SelectionDAG &DAG, unsigned Depth) const {
3820   assert(
3821       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3822        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3823        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3824        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3825       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3826       " is a target node!");
3827   return SDValue();
3828 }
3829 
3830 SDValue
3831 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3832                                         SDValue N1, MutableArrayRef<int> Mask,
3833                                         SelectionDAG &DAG) const {
3834   bool LegalMask = isShuffleMaskLegal(Mask, VT);
3835   if (!LegalMask) {
3836     std::swap(N0, N1);
3837     ShuffleVectorSDNode::commuteMask(Mask);
3838     LegalMask = isShuffleMaskLegal(Mask, VT);
3839   }
3840 
3841   if (!LegalMask)
3842     return SDValue();
3843 
3844   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3845 }
3846 
3847 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3848   return nullptr;
3849 }
3850 
3851 bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3852     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3853     bool PoisonOnly, unsigned Depth) const {
3854   assert(
3855       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3856        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3857        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3858        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3859       "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3860       " is a target node!");
3861 
3862   // If Op can't create undef/poison and none of its operands are undef/poison
3863   // then Op is never undef/poison.
3864   return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3865                                               /*ConsiderFlags*/ true, Depth) &&
3866          all_of(Op->ops(), [&](SDValue V) {
3867            return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3868                                                        Depth + 1);
3869          });
3870 }
3871 
3872 bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3873     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3874     bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3875   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3876           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3877           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3878           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3879          "Should use canCreateUndefOrPoison if you don't know whether Op"
3880          " is a target node!");
3881   // Be conservative and return true.
3882   return true;
3883 }
3884 
3885 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3886                                                   const SelectionDAG &DAG,
3887                                                   bool SNaN,
3888                                                   unsigned Depth) const {
3889   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3890           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3891           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3892           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3893          "Should use isKnownNeverNaN if you don't know whether Op"
3894          " is a target node!");
3895   return false;
3896 }
3897 
3898 bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3899                                                const APInt &DemandedElts,
3900                                                APInt &UndefElts,
3901                                                const SelectionDAG &DAG,
3902                                                unsigned Depth) const {
3903   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3904           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3905           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3906           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3907          "Should use isSplatValue if you don't know whether Op"
3908          " is a target node!");
3909   return false;
3910 }
3911 
3912 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3913 // work with truncating build vectors and vectors with elements of less than
3914 // 8 bits.
3915 bool TargetLowering::isConstTrueVal(SDValue N) const {
3916   if (!N)
3917     return false;
3918 
3919   unsigned EltWidth;
3920   APInt CVal;
3921   if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3922                                                /*AllowTruncation=*/true)) {
3923     CVal = CN->getAPIntValue();
3924     EltWidth = N.getValueType().getScalarSizeInBits();
3925   } else
3926     return false;
3927 
3928   // If this is a truncating splat, truncate the splat value.
3929   // Otherwise, we may fail to match the expected values below.
3930   if (EltWidth < CVal.getBitWidth())
3931     CVal = CVal.trunc(EltWidth);
3932 
3933   switch (getBooleanContents(N.getValueType())) {
3934   case UndefinedBooleanContent:
3935     return CVal[0];
3936   case ZeroOrOneBooleanContent:
3937     return CVal.isOne();
3938   case ZeroOrNegativeOneBooleanContent:
3939     return CVal.isAllOnes();
3940   }
3941 
3942   llvm_unreachable("Invalid boolean contents");
3943 }
3944 
3945 bool TargetLowering::isConstFalseVal(SDValue N) const {
3946   if (!N)
3947     return false;
3948 
3949   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3950   if (!CN) {
3951     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3952     if (!BV)
3953       return false;
3954 
3955     // Only interested in constant splats, we don't care about undef
3956     // elements in identifying boolean constants and getConstantSplatNode
3957     // returns NULL if all ops are undef;
3958     CN = BV->getConstantSplatNode();
3959     if (!CN)
3960       return false;
3961   }
3962 
3963   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3964     return !CN->getAPIntValue()[0];
3965 
3966   return CN->isZero();
3967 }
3968 
3969 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3970                                        bool SExt) const {
3971   if (VT == MVT::i1)
3972     return N->isOne();
3973 
3974   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3975   switch (Cnt) {
3976   case TargetLowering::ZeroOrOneBooleanContent:
3977     // An extended value of 1 is always true, unless its original type is i1,
3978     // in which case it will be sign extended to -1.
3979     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3980   case TargetLowering::UndefinedBooleanContent:
3981   case TargetLowering::ZeroOrNegativeOneBooleanContent:
3982     return N->isAllOnes() && SExt;
3983   }
3984   llvm_unreachable("Unexpected enumeration.");
3985 }
3986 
3987 /// This helper function of SimplifySetCC tries to optimize the comparison when
3988 /// either operand of the SetCC node is a bitwise-and instruction.
3989 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3990                                          ISD::CondCode Cond, const SDLoc &DL,
3991                                          DAGCombinerInfo &DCI) const {
3992   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3993     std::swap(N0, N1);
3994 
3995   SelectionDAG &DAG = DCI.DAG;
3996   EVT OpVT = N0.getValueType();
3997   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3998       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3999     return SDValue();
4000 
4001   // (X & Y) != 0 --> zextOrTrunc(X & Y)
4002   // iff everything but LSB is known zero:
4003   if (Cond == ISD::SETNE && isNullConstant(N1) &&
4004       (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
4005        getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
4006     unsigned NumEltBits = OpVT.getScalarSizeInBits();
4007     APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4008     if (DAG.MaskedValueIsZero(N0, UpperBits))
4009       return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4010   }
4011 
4012   // Try to eliminate a power-of-2 mask constant by converting to a signbit
4013   // test in a narrow type that we can truncate to with no cost. Examples:
4014   // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4015   // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4016   // TODO: This conservatively checks for type legality on the source and
4017   //       destination types. That may inhibit optimizations, but it also
4018   //       allows setcc->shift transforms that may be more beneficial.
4019   auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4020   if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4021       isTypeLegal(OpVT) && N0.hasOneUse()) {
4022     EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4023                                      AndC->getAPIntValue().getActiveBits());
4024     if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4025       SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4026       SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4027       return DAG.getSetCC(DL, VT, Trunc, Zero,
4028                           Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4029     }
4030   }
4031 
4032   // Match these patterns in any of their permutations:
4033   // (X & Y) == Y
4034   // (X & Y) != Y
4035   SDValue X, Y;
4036   if (N0.getOperand(0) == N1) {
4037     X = N0.getOperand(1);
4038     Y = N0.getOperand(0);
4039   } else if (N0.getOperand(1) == N1) {
4040     X = N0.getOperand(0);
4041     Y = N0.getOperand(1);
4042   } else {
4043     return SDValue();
4044   }
4045 
4046   // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4047   // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4048   // its liable to create and infinite loop.
4049   SDValue Zero = DAG.getConstant(0, DL, OpVT);
4050   if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4051       DAG.isKnownToBeAPowerOfTwo(Y)) {
4052     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4053     // Note that where Y is variable and is known to have at most one bit set
4054     // (for example, if it is Z & 1) we cannot do this; the expressions are not
4055     // equivalent when Y == 0.
4056     assert(OpVT.isInteger());
4057     Cond = ISD::getSetCCInverse(Cond, OpVT);
4058     if (DCI.isBeforeLegalizeOps() ||
4059         isCondCodeLegal(Cond, N0.getSimpleValueType()))
4060       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4061   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4062     // If the target supports an 'and-not' or 'and-complement' logic operation,
4063     // try to use that to make a comparison operation more efficient.
4064     // But don't do this transform if the mask is a single bit because there are
4065     // more efficient ways to deal with that case (for example, 'bt' on x86 or
4066     // 'rlwinm' on PPC).
4067 
4068     // Bail out if the compare operand that we want to turn into a zero is
4069     // already a zero (otherwise, infinite loop).
4070     if (isNullConstant(Y))
4071       return SDValue();
4072 
4073     // Transform this into: ~X & Y == 0.
4074     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4075     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4076     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4077   }
4078 
4079   return SDValue();
4080 }
4081 
4082 /// There are multiple IR patterns that could be checking whether certain
4083 /// truncation of a signed number would be lossy or not. The pattern which is
4084 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
4085 /// We are looking for the following pattern: (KeptBits is a constant)
4086 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4087 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4088 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
4089 /// We will unfold it into the natural trunc+sext pattern:
4090 ///   ((%x << C) a>> C) dstcond %x
4091 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
4092 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4093     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4094     const SDLoc &DL) const {
4095   // We must be comparing with a constant.
4096   ConstantSDNode *C1;
4097   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4098     return SDValue();
4099 
4100   // N0 should be:  add %x, (1 << (KeptBits-1))
4101   if (N0->getOpcode() != ISD::ADD)
4102     return SDValue();
4103 
4104   // And we must be 'add'ing a constant.
4105   ConstantSDNode *C01;
4106   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4107     return SDValue();
4108 
4109   SDValue X = N0->getOperand(0);
4110   EVT XVT = X.getValueType();
4111 
4112   // Validate constants ...
4113 
4114   APInt I1 = C1->getAPIntValue();
4115 
4116   ISD::CondCode NewCond;
4117   if (Cond == ISD::CondCode::SETULT) {
4118     NewCond = ISD::CondCode::SETEQ;
4119   } else if (Cond == ISD::CondCode::SETULE) {
4120     NewCond = ISD::CondCode::SETEQ;
4121     // But need to 'canonicalize' the constant.
4122     I1 += 1;
4123   } else if (Cond == ISD::CondCode::SETUGT) {
4124     NewCond = ISD::CondCode::SETNE;
4125     // But need to 'canonicalize' the constant.
4126     I1 += 1;
4127   } else if (Cond == ISD::CondCode::SETUGE) {
4128     NewCond = ISD::CondCode::SETNE;
4129   } else
4130     return SDValue();
4131 
4132   APInt I01 = C01->getAPIntValue();
4133 
4134   auto checkConstants = [&I1, &I01]() -> bool {
4135     // Both of them must be power-of-two, and the constant from setcc is bigger.
4136     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4137   };
4138 
4139   if (checkConstants()) {
4140     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
4141   } else {
4142     // What if we invert constants? (and the target predicate)
4143     I1.negate();
4144     I01.negate();
4145     assert(XVT.isInteger());
4146     NewCond = getSetCCInverse(NewCond, XVT);
4147     if (!checkConstants())
4148       return SDValue();
4149     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
4150   }
4151 
4152   // They are power-of-two, so which bit is set?
4153   const unsigned KeptBits = I1.logBase2();
4154   const unsigned KeptBitsMinusOne = I01.logBase2();
4155 
4156   // Magic!
4157   if (KeptBits != (KeptBitsMinusOne + 1))
4158     return SDValue();
4159   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4160 
4161   // We don't want to do this in every single case.
4162   SelectionDAG &DAG = DCI.DAG;
4163   if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4164     return SDValue();
4165 
4166   // Unfold into:  sext_inreg(%x) cond %x
4167   // Where 'cond' will be either 'eq' or 'ne'.
4168   SDValue SExtInReg = DAG.getNode(
4169       ISD::SIGN_EXTEND_INREG, DL, XVT, X,
4170       DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4171   return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4172 }
4173 
4174 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4175 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4176     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4177     DAGCombinerInfo &DCI, const SDLoc &DL) const {
4178   assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4179          "Should be a comparison with 0.");
4180   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4181          "Valid only for [in]equality comparisons.");
4182 
4183   unsigned NewShiftOpcode;
4184   SDValue X, C, Y;
4185 
4186   SelectionDAG &DAG = DCI.DAG;
4187 
4188   // Look for '(C l>>/<< Y)'.
4189   auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4190     // The shift should be one-use.
4191     if (!V.hasOneUse())
4192       return false;
4193     unsigned OldShiftOpcode = V.getOpcode();
4194     switch (OldShiftOpcode) {
4195     case ISD::SHL:
4196       NewShiftOpcode = ISD::SRL;
4197       break;
4198     case ISD::SRL:
4199       NewShiftOpcode = ISD::SHL;
4200       break;
4201     default:
4202       return false; // must be a logical shift.
4203     }
4204     // We should be shifting a constant.
4205     // FIXME: best to use isConstantOrConstantVector().
4206     C = V.getOperand(0);
4207     ConstantSDNode *CC =
4208         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4209     if (!CC)
4210       return false;
4211     Y = V.getOperand(1);
4212 
4213     ConstantSDNode *XC =
4214         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4215     return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4216         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4217   };
4218 
4219   // LHS of comparison should be an one-use 'and'.
4220   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4221     return SDValue();
4222 
4223   X = N0.getOperand(0);
4224   SDValue Mask = N0.getOperand(1);
4225 
4226   // 'and' is commutative!
4227   if (!Match(Mask)) {
4228     std::swap(X, Mask);
4229     if (!Match(Mask))
4230       return SDValue();
4231   }
4232 
4233   EVT VT = X.getValueType();
4234 
4235   // Produce:
4236   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4237   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4238   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4239   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4240   return T2;
4241 }
4242 
4243 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4244 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4245 /// handle the commuted versions of these patterns.
4246 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4247                                            ISD::CondCode Cond, const SDLoc &DL,
4248                                            DAGCombinerInfo &DCI) const {
4249   unsigned BOpcode = N0.getOpcode();
4250   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4251          "Unexpected binop");
4252   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4253 
4254   // (X + Y) == X --> Y == 0
4255   // (X - Y) == X --> Y == 0
4256   // (X ^ Y) == X --> Y == 0
4257   SelectionDAG &DAG = DCI.DAG;
4258   EVT OpVT = N0.getValueType();
4259   SDValue X = N0.getOperand(0);
4260   SDValue Y = N0.getOperand(1);
4261   if (X == N1)
4262     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4263 
4264   if (Y != N1)
4265     return SDValue();
4266 
4267   // (X + Y) == Y --> X == 0
4268   // (X ^ Y) == Y --> X == 0
4269   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4270     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4271 
4272   // The shift would not be valid if the operands are boolean (i1).
4273   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4274     return SDValue();
4275 
4276   // (X - Y) == Y --> X == Y << 1
4277   SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4278   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4279   if (!DCI.isCalledByLegalizer())
4280     DCI.AddToWorklist(YShl1.getNode());
4281   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4282 }
4283 
4284 static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4285                                       SDValue N0, const APInt &C1,
4286                                       ISD::CondCode Cond, const SDLoc &dl,
4287                                       SelectionDAG &DAG) {
4288   // Look through truncs that don't change the value of a ctpop.
4289   // FIXME: Add vector support? Need to be careful with setcc result type below.
4290   SDValue CTPOP = N0;
4291   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4292       N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
4293     CTPOP = N0.getOperand(0);
4294 
4295   if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4296     return SDValue();
4297 
4298   EVT CTVT = CTPOP.getValueType();
4299   SDValue CTOp = CTPOP.getOperand(0);
4300 
4301   // Expand a power-of-2-or-zero comparison based on ctpop:
4302   // (ctpop x) u< 2 -> (x & x-1) == 0
4303   // (ctpop x) u> 1 -> (x & x-1) != 0
4304   if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4305     // Keep the CTPOP if it is a cheap vector op.
4306     if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4307       return SDValue();
4308 
4309     unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4310     if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4311       return SDValue();
4312     if (C1 == 0 && (Cond == ISD::SETULT))
4313       return SDValue(); // This is handled elsewhere.
4314 
4315     unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4316 
4317     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4318     SDValue Result = CTOp;
4319     for (unsigned i = 0; i < Passes; i++) {
4320       SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4321       Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4322     }
4323     ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4324     return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4325   }
4326 
4327   // Expand a power-of-2 comparison based on ctpop
4328   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4329     // Keep the CTPOP if it is cheap.
4330     if (TLI.isCtpopFast(CTVT))
4331       return SDValue();
4332 
4333     SDValue Zero = DAG.getConstant(0, dl, CTVT);
4334     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4335     assert(CTVT.isInteger());
4336     SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4337 
4338     // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4339     // check before emitting a potentially unnecessary op.
4340     if (DAG.isKnownNeverZero(CTOp)) {
4341       // (ctpop x) == 1 --> (x & x-1) == 0
4342       // (ctpop x) != 1 --> (x & x-1) != 0
4343       SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4344       SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4345       return RHS;
4346     }
4347 
4348     // (ctpop x) == 1 --> (x ^ x-1) >  x-1
4349     // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4350     SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4351     ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4352     return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4353   }
4354 
4355   return SDValue();
4356 }
4357 
4358 static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4359                                    ISD::CondCode Cond, const SDLoc &dl,
4360                                    SelectionDAG &DAG) {
4361   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4362     return SDValue();
4363 
4364   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4365   if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4366     return SDValue();
4367 
4368   auto getRotateSource = [](SDValue X) {
4369     if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4370       return X.getOperand(0);
4371     return SDValue();
4372   };
4373 
4374   // Peek through a rotated value compared against 0 or -1:
4375   // (rot X, Y) == 0/-1 --> X == 0/-1
4376   // (rot X, Y) != 0/-1 --> X != 0/-1
4377   if (SDValue R = getRotateSource(N0))
4378     return DAG.getSetCC(dl, VT, R, N1, Cond);
4379 
4380   // Peek through an 'or' of a rotated value compared against 0:
4381   // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4382   // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4383   //
4384   // TODO: Add the 'and' with -1 sibling.
4385   // TODO: Recurse through a series of 'or' ops to find the rotate.
4386   EVT OpVT = N0.getValueType();
4387   if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4388     if (SDValue R = getRotateSource(N0.getOperand(0))) {
4389       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4390       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4391     }
4392     if (SDValue R = getRotateSource(N0.getOperand(1))) {
4393       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4394       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4395     }
4396   }
4397 
4398   return SDValue();
4399 }
4400 
4401 static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4402                                         ISD::CondCode Cond, const SDLoc &dl,
4403                                         SelectionDAG &DAG) {
4404   // If we are testing for all-bits-clear, we might be able to do that with
4405   // less shifting since bit-order does not matter.
4406   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4407     return SDValue();
4408 
4409   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4410   if (!C1 || !C1->isZero())
4411     return SDValue();
4412 
4413   if (!N0.hasOneUse() ||
4414       (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4415     return SDValue();
4416 
4417   unsigned BitWidth = N0.getScalarValueSizeInBits();
4418   auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4419   if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4420     return SDValue();
4421 
4422   // Canonicalize fshr as fshl to reduce pattern-matching.
4423   unsigned ShAmt = ShAmtC->getZExtValue();
4424   if (N0.getOpcode() == ISD::FSHR)
4425     ShAmt = BitWidth - ShAmt;
4426 
4427   // Match an 'or' with a specific operand 'Other' in either commuted variant.
4428   SDValue X, Y;
4429   auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4430     if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4431       return false;
4432     if (Or.getOperand(0) == Other) {
4433       X = Or.getOperand(0);
4434       Y = Or.getOperand(1);
4435       return true;
4436     }
4437     if (Or.getOperand(1) == Other) {
4438       X = Or.getOperand(1);
4439       Y = Or.getOperand(0);
4440       return true;
4441     }
4442     return false;
4443   };
4444 
4445   EVT OpVT = N0.getValueType();
4446   EVT ShAmtVT = N0.getOperand(2).getValueType();
4447   SDValue F0 = N0.getOperand(0);
4448   SDValue F1 = N0.getOperand(1);
4449   if (matchOr(F0, F1)) {
4450     // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4451     SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4452     SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4453     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4454     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4455   }
4456   if (matchOr(F1, F0)) {
4457     // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4458     SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4459     SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4460     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4461     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4462   }
4463 
4464   return SDValue();
4465 }
4466 
4467 /// Try to simplify a setcc built with the specified operands and cc. If it is
4468 /// unable to simplify it, return a null SDValue.
4469 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4470                                       ISD::CondCode Cond, bool foldBooleans,
4471                                       DAGCombinerInfo &DCI,
4472                                       const SDLoc &dl) const {
4473   SelectionDAG &DAG = DCI.DAG;
4474   const DataLayout &Layout = DAG.getDataLayout();
4475   EVT OpVT = N0.getValueType();
4476   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4477 
4478   // Constant fold or commute setcc.
4479   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4480     return Fold;
4481 
4482   bool N0ConstOrSplat =
4483       isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4484   bool N1ConstOrSplat =
4485       isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4486 
4487   // Canonicalize toward having the constant on the RHS.
4488   // TODO: Handle non-splat vector constants. All undef causes trouble.
4489   // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4490   // infinite loop here when we encounter one.
4491   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
4492   if (N0ConstOrSplat && !N1ConstOrSplat &&
4493       (DCI.isBeforeLegalizeOps() ||
4494        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4495     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4496 
4497   // If we have a subtract with the same 2 non-constant operands as this setcc
4498   // -- but in reverse order -- then try to commute the operands of this setcc
4499   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4500   // instruction on some targets.
4501   if (!N0ConstOrSplat && !N1ConstOrSplat &&
4502       (DCI.isBeforeLegalizeOps() ||
4503        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4504       DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4505       !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4506     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4507 
4508   if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4509     return V;
4510 
4511   if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4512     return V;
4513 
4514   if (auto *N1C = isConstOrConstSplat(N1)) {
4515     const APInt &C1 = N1C->getAPIntValue();
4516 
4517     // Optimize some CTPOP cases.
4518     if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4519       return V;
4520 
4521     // For equality to 0 of a no-wrap multiply, decompose and test each op:
4522     // X * Y == 0 --> (X == 0) || (Y == 0)
4523     // X * Y != 0 --> (X != 0) && (Y != 0)
4524     // TODO: This bails out if minsize is set, but if the target doesn't have a
4525     //       single instruction multiply for this type, it would likely be
4526     //       smaller to decompose.
4527     if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4528         N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4529         (N0->getFlags().hasNoUnsignedWrap() ||
4530          N0->getFlags().hasNoSignedWrap()) &&
4531         !Attr.hasFnAttr(Attribute::MinSize)) {
4532       SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4533       SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4534       unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4535       return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4536     }
4537 
4538     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4539     // equality comparison, then we're just comparing whether X itself is
4540     // zero.
4541     if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4542         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4543         llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4544       if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4545         if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4546             ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4547           if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4548             // (srl (ctlz x), 5) == 0  -> X != 0
4549             // (srl (ctlz x), 5) != 1  -> X != 0
4550             Cond = ISD::SETNE;
4551           } else {
4552             // (srl (ctlz x), 5) != 0  -> X == 0
4553             // (srl (ctlz x), 5) == 1  -> X == 0
4554             Cond = ISD::SETEQ;
4555           }
4556           SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4557           return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4558                               Cond);
4559         }
4560       }
4561     }
4562   }
4563 
4564   // FIXME: Support vectors.
4565   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4566     const APInt &C1 = N1C->getAPIntValue();
4567 
4568     // (zext x) == C --> x == (trunc C)
4569     // (sext x) == C --> x == (trunc C)
4570     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4571         DCI.isBeforeLegalize() && N0->hasOneUse()) {
4572       unsigned MinBits = N0.getValueSizeInBits();
4573       SDValue PreExt;
4574       bool Signed = false;
4575       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4576         // ZExt
4577         MinBits = N0->getOperand(0).getValueSizeInBits();
4578         PreExt = N0->getOperand(0);
4579       } else if (N0->getOpcode() == ISD::AND) {
4580         // DAGCombine turns costly ZExts into ANDs
4581         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4582           if ((C->getAPIntValue()+1).isPowerOf2()) {
4583             MinBits = C->getAPIntValue().countr_one();
4584             PreExt = N0->getOperand(0);
4585           }
4586       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4587         // SExt
4588         MinBits = N0->getOperand(0).getValueSizeInBits();
4589         PreExt = N0->getOperand(0);
4590         Signed = true;
4591       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4592         // ZEXTLOAD / SEXTLOAD
4593         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4594           MinBits = LN0->getMemoryVT().getSizeInBits();
4595           PreExt = N0;
4596         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4597           Signed = true;
4598           MinBits = LN0->getMemoryVT().getSizeInBits();
4599           PreExt = N0;
4600         }
4601       }
4602 
4603       // Figure out how many bits we need to preserve this constant.
4604       unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4605 
4606       // Make sure we're not losing bits from the constant.
4607       if (MinBits > 0 &&
4608           MinBits < C1.getBitWidth() &&
4609           MinBits >= ReqdBits) {
4610         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4611         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4612           // Will get folded away.
4613           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4614           if (MinBits == 1 && C1 == 1)
4615             // Invert the condition.
4616             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4617                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4618           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4619           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4620         }
4621 
4622         // If truncating the setcc operands is not desirable, we can still
4623         // simplify the expression in some cases:
4624         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4625         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4626         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4627         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4628         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4629         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4630         SDValue TopSetCC = N0->getOperand(0);
4631         unsigned N0Opc = N0->getOpcode();
4632         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4633         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4634             TopSetCC.getOpcode() == ISD::SETCC &&
4635             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4636             (isConstFalseVal(N1) ||
4637              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4638 
4639           bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4640                          (!N1C->isZero() && Cond == ISD::SETNE);
4641 
4642           if (!Inverse)
4643             return TopSetCC;
4644 
4645           ISD::CondCode InvCond = ISD::getSetCCInverse(
4646               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4647               TopSetCC.getOperand(0).getValueType());
4648           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4649                                       TopSetCC.getOperand(1),
4650                                       InvCond);
4651         }
4652       }
4653     }
4654 
4655     // If the LHS is '(and load, const)', the RHS is 0, the test is for
4656     // equality or unsigned, and all 1 bits of the const are in the same
4657     // partial word, see if we can shorten the load.
4658     if (DCI.isBeforeLegalize() &&
4659         !ISD::isSignedIntSetCC(Cond) &&
4660         N0.getOpcode() == ISD::AND && C1 == 0 &&
4661         N0.getNode()->hasOneUse() &&
4662         isa<LoadSDNode>(N0.getOperand(0)) &&
4663         N0.getOperand(0).getNode()->hasOneUse() &&
4664         isa<ConstantSDNode>(N0.getOperand(1))) {
4665       auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4666       APInt bestMask;
4667       unsigned bestWidth = 0, bestOffset = 0;
4668       if (Lod->isSimple() && Lod->isUnindexed() &&
4669           (Lod->getMemoryVT().isByteSized() ||
4670            isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4671         unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4672         unsigned origWidth = N0.getValueSizeInBits();
4673         unsigned maskWidth = origWidth;
4674         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4675         // 8 bits, but have to be careful...
4676         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4677           origWidth = Lod->getMemoryVT().getSizeInBits();
4678         const APInt &Mask = N0.getConstantOperandAPInt(1);
4679         // Only consider power-of-2 widths (and at least one byte) as candiates
4680         // for the narrowed load.
4681         for (unsigned width = 8; width < origWidth; width *= 2) {
4682           EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4683           if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4684             continue;
4685           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4686           // Avoid accessing any padding here for now (we could use memWidth
4687           // instead of origWidth here otherwise).
4688           unsigned maxOffset = origWidth - width;
4689           for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4690             if (Mask.isSubsetOf(newMask)) {
4691               unsigned ptrOffset =
4692                   Layout.isLittleEndian() ? offset : memWidth - width - offset;
4693               unsigned IsFast = 0;
4694               Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4695               if (allowsMemoryAccess(
4696                       *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4697                       NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4698                   IsFast) {
4699                 bestOffset = ptrOffset / 8;
4700                 bestMask = Mask.lshr(offset);
4701                 bestWidth = width;
4702                 break;
4703               }
4704             }
4705             newMask <<= 8;
4706           }
4707           if (bestWidth)
4708             break;
4709         }
4710       }
4711       if (bestWidth) {
4712         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4713         SDValue Ptr = Lod->getBasePtr();
4714         if (bestOffset != 0)
4715           Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4716         SDValue NewLoad =
4717             DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4718                         Lod->getPointerInfo().getWithOffset(bestOffset),
4719                         Lod->getOriginalAlign());
4720         SDValue And =
4721             DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4722                         DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4723         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4724       }
4725     }
4726 
4727     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4728     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4729       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4730 
4731       // If the comparison constant has bits in the upper part, the
4732       // zero-extended value could never match.
4733       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4734                                               C1.getBitWidth() - InSize))) {
4735         switch (Cond) {
4736         case ISD::SETUGT:
4737         case ISD::SETUGE:
4738         case ISD::SETEQ:
4739           return DAG.getConstant(0, dl, VT);
4740         case ISD::SETULT:
4741         case ISD::SETULE:
4742         case ISD::SETNE:
4743           return DAG.getConstant(1, dl, VT);
4744         case ISD::SETGT:
4745         case ISD::SETGE:
4746           // True if the sign bit of C1 is set.
4747           return DAG.getConstant(C1.isNegative(), dl, VT);
4748         case ISD::SETLT:
4749         case ISD::SETLE:
4750           // True if the sign bit of C1 isn't set.
4751           return DAG.getConstant(C1.isNonNegative(), dl, VT);
4752         default:
4753           break;
4754         }
4755       }
4756 
4757       // Otherwise, we can perform the comparison with the low bits.
4758       switch (Cond) {
4759       case ISD::SETEQ:
4760       case ISD::SETNE:
4761       case ISD::SETUGT:
4762       case ISD::SETUGE:
4763       case ISD::SETULT:
4764       case ISD::SETULE: {
4765         EVT newVT = N0.getOperand(0).getValueType();
4766         // FIXME: Should use isNarrowingProfitable.
4767         if (DCI.isBeforeLegalizeOps() ||
4768             (isOperationLegal(ISD::SETCC, newVT) &&
4769              isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4770              isTypeDesirableForOp(ISD::SETCC, newVT))) {
4771           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4772           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4773 
4774           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4775                                           NewConst, Cond);
4776           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4777         }
4778         break;
4779       }
4780       default:
4781         break; // todo, be more careful with signed comparisons
4782       }
4783     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4784                (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4785                !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4786                                       OpVT)) {
4787       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4788       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4789       EVT ExtDstTy = N0.getValueType();
4790       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4791 
4792       // If the constant doesn't fit into the number of bits for the source of
4793       // the sign extension, it is impossible for both sides to be equal.
4794       if (C1.getSignificantBits() > ExtSrcTyBits)
4795         return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4796 
4797       assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4798              ExtDstTy != ExtSrcTy && "Unexpected types!");
4799       APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4800       SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4801                                    DAG.getConstant(Imm, dl, ExtDstTy));
4802       if (!DCI.isCalledByLegalizer())
4803         DCI.AddToWorklist(ZextOp.getNode());
4804       // Otherwise, make this a use of a zext.
4805       return DAG.getSetCC(dl, VT, ZextOp,
4806                           DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4807     } else if ((N1C->isZero() || N1C->isOne()) &&
4808                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4809       // SETCC (X), [0|1], [EQ|NE]  -> X if X is known 0/1. i1 types are
4810       // excluded as they are handled below whilst checking for foldBooleans.
4811       if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4812           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4813           (N0.getValueType() == MVT::i1 ||
4814            getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4815           DAG.MaskedValueIsZero(
4816               N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {
4817         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4818         if (TrueWhenTrue)
4819           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4820         // Invert the condition.
4821         if (N0.getOpcode() == ISD::SETCC) {
4822           ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4823           CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4824           if (DCI.isBeforeLegalizeOps() ||
4825               isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4826             return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4827         }
4828       }
4829 
4830       if ((N0.getOpcode() == ISD::XOR ||
4831            (N0.getOpcode() == ISD::AND &&
4832             N0.getOperand(0).getOpcode() == ISD::XOR &&
4833             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4834           isOneConstant(N0.getOperand(1))) {
4835         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
4836         // can only do this if the top bits are known zero.
4837         unsigned BitWidth = N0.getValueSizeInBits();
4838         if (DAG.MaskedValueIsZero(N0,
4839                                   APInt::getHighBitsSet(BitWidth,
4840                                                         BitWidth-1))) {
4841           // Okay, get the un-inverted input value.
4842           SDValue Val;
4843           if (N0.getOpcode() == ISD::XOR) {
4844             Val = N0.getOperand(0);
4845           } else {
4846             assert(N0.getOpcode() == ISD::AND &&
4847                     N0.getOperand(0).getOpcode() == ISD::XOR);
4848             // ((X^1)&1)^1 -> X & 1
4849             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4850                               N0.getOperand(0).getOperand(0),
4851                               N0.getOperand(1));
4852           }
4853 
4854           return DAG.getSetCC(dl, VT, Val, N1,
4855                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4856         }
4857       } else if (N1C->isOne()) {
4858         SDValue Op0 = N0;
4859         if (Op0.getOpcode() == ISD::TRUNCATE)
4860           Op0 = Op0.getOperand(0);
4861 
4862         if ((Op0.getOpcode() == ISD::XOR) &&
4863             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4864             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4865           SDValue XorLHS = Op0.getOperand(0);
4866           SDValue XorRHS = Op0.getOperand(1);
4867           // Ensure that the input setccs return an i1 type or 0/1 value.
4868           if (Op0.getValueType() == MVT::i1 ||
4869               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4870                       ZeroOrOneBooleanContent &&
4871                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4872                         ZeroOrOneBooleanContent)) {
4873             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4874             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4875             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4876           }
4877         }
4878         if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4879           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4880           if (Op0.getValueType().bitsGT(VT))
4881             Op0 = DAG.getNode(ISD::AND, dl, VT,
4882                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4883                           DAG.getConstant(1, dl, VT));
4884           else if (Op0.getValueType().bitsLT(VT))
4885             Op0 = DAG.getNode(ISD::AND, dl, VT,
4886                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4887                         DAG.getConstant(1, dl, VT));
4888 
4889           return DAG.getSetCC(dl, VT, Op0,
4890                               DAG.getConstant(0, dl, Op0.getValueType()),
4891                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4892         }
4893         if (Op0.getOpcode() == ISD::AssertZext &&
4894             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4895           return DAG.getSetCC(dl, VT, Op0,
4896                               DAG.getConstant(0, dl, Op0.getValueType()),
4897                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4898       }
4899     }
4900 
4901     // Given:
4902     //   icmp eq/ne (urem %x, %y), 0
4903     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4904     //   icmp eq/ne %x, 0
4905     if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4906         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4907       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4908       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4909       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4910         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4911     }
4912 
4913     // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4914     //  and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4915     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4916         N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4917         N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4918         N1C->isAllOnes()) {
4919       return DAG.getSetCC(dl, VT, N0.getOperand(0),
4920                           DAG.getConstant(0, dl, OpVT),
4921                           Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4922     }
4923 
4924     if (SDValue V =
4925             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4926       return V;
4927   }
4928 
4929   // These simplifications apply to splat vectors as well.
4930   // TODO: Handle more splat vector cases.
4931   if (auto *N1C = isConstOrConstSplat(N1)) {
4932     const APInt &C1 = N1C->getAPIntValue();
4933 
4934     APInt MinVal, MaxVal;
4935     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4936     if (ISD::isSignedIntSetCC(Cond)) {
4937       MinVal = APInt::getSignedMinValue(OperandBitSize);
4938       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4939     } else {
4940       MinVal = APInt::getMinValue(OperandBitSize);
4941       MaxVal = APInt::getMaxValue(OperandBitSize);
4942     }
4943 
4944     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4945     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4946       // X >= MIN --> true
4947       if (C1 == MinVal)
4948         return DAG.getBoolConstant(true, dl, VT, OpVT);
4949 
4950       if (!VT.isVector()) { // TODO: Support this for vectors.
4951         // X >= C0 --> X > (C0 - 1)
4952         APInt C = C1 - 1;
4953         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4954         if ((DCI.isBeforeLegalizeOps() ||
4955              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4956             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4957                                   isLegalICmpImmediate(C.getSExtValue())))) {
4958           return DAG.getSetCC(dl, VT, N0,
4959                               DAG.getConstant(C, dl, N1.getValueType()),
4960                               NewCC);
4961         }
4962       }
4963     }
4964 
4965     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4966       // X <= MAX --> true
4967       if (C1 == MaxVal)
4968         return DAG.getBoolConstant(true, dl, VT, OpVT);
4969 
4970       // X <= C0 --> X < (C0 + 1)
4971       if (!VT.isVector()) { // TODO: Support this for vectors.
4972         APInt C = C1 + 1;
4973         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4974         if ((DCI.isBeforeLegalizeOps() ||
4975              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4976             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4977                                   isLegalICmpImmediate(C.getSExtValue())))) {
4978           return DAG.getSetCC(dl, VT, N0,
4979                               DAG.getConstant(C, dl, N1.getValueType()),
4980                               NewCC);
4981         }
4982       }
4983     }
4984 
4985     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4986       if (C1 == MinVal)
4987         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4988 
4989       // TODO: Support this for vectors after legalize ops.
4990       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4991         // Canonicalize setlt X, Max --> setne X, Max
4992         if (C1 == MaxVal)
4993           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4994 
4995         // If we have setult X, 1, turn it into seteq X, 0
4996         if (C1 == MinVal+1)
4997           return DAG.getSetCC(dl, VT, N0,
4998                               DAG.getConstant(MinVal, dl, N0.getValueType()),
4999                               ISD::SETEQ);
5000       }
5001     }
5002 
5003     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5004       if (C1 == MaxVal)
5005         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5006 
5007       // TODO: Support this for vectors after legalize ops.
5008       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5009         // Canonicalize setgt X, Min --> setne X, Min
5010         if (C1 == MinVal)
5011           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5012 
5013         // If we have setugt X, Max-1, turn it into seteq X, Max
5014         if (C1 == MaxVal-1)
5015           return DAG.getSetCC(dl, VT, N0,
5016                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
5017                               ISD::SETEQ);
5018       }
5019     }
5020 
5021     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5022       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
5023       if (C1.isZero())
5024         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5025                 VT, N0, N1, Cond, DCI, dl))
5026           return CC;
5027 
5028       // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5029       // For example, when high 32-bits of i64 X are known clear:
5030       // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0
5031       // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1
5032       bool CmpZero = N1C->isZero();
5033       bool CmpNegOne = N1C->isAllOnes();
5034       if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5035         // Match or(lo,shl(hi,bw/2)) pattern.
5036         auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5037           unsigned EltBits = V.getScalarValueSizeInBits();
5038           if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5039             return false;
5040           SDValue LHS = V.getOperand(0);
5041           SDValue RHS = V.getOperand(1);
5042           APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5043           // Unshifted element must have zero upperbits.
5044           if (RHS.getOpcode() == ISD::SHL &&
5045               isa<ConstantSDNode>(RHS.getOperand(1)) &&
5046               RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5047               DAG.MaskedValueIsZero(LHS, HiBits)) {
5048             Lo = LHS;
5049             Hi = RHS.getOperand(0);
5050             return true;
5051           }
5052           if (LHS.getOpcode() == ISD::SHL &&
5053               isa<ConstantSDNode>(LHS.getOperand(1)) &&
5054               LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5055               DAG.MaskedValueIsZero(RHS, HiBits)) {
5056             Lo = RHS;
5057             Hi = LHS.getOperand(0);
5058             return true;
5059           }
5060           return false;
5061         };
5062 
5063         auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5064           unsigned EltBits = N0.getScalarValueSizeInBits();
5065           unsigned HalfBits = EltBits / 2;
5066           APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5067           SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5068           SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5069           SDValue NewN0 =
5070               DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5071           SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5072           return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5073         };
5074 
5075         SDValue Lo, Hi;
5076         if (IsConcat(N0, Lo, Hi))
5077           return MergeConcat(Lo, Hi);
5078 
5079         if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5080           SDValue Lo0, Lo1, Hi0, Hi1;
5081           if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5082               IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5083             return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5084                                DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5085           }
5086         }
5087       }
5088     }
5089 
5090     // If we have "setcc X, C0", check to see if we can shrink the immediate
5091     // by changing cc.
5092     // TODO: Support this for vectors after legalize ops.
5093     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5094       // SETUGT X, SINTMAX  -> SETLT X, 0
5095       // SETUGE X, SINTMIN -> SETLT X, 0
5096       if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5097           (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5098         return DAG.getSetCC(dl, VT, N0,
5099                             DAG.getConstant(0, dl, N1.getValueType()),
5100                             ISD::SETLT);
5101 
5102       // SETULT X, SINTMIN  -> SETGT X, -1
5103       // SETULE X, SINTMAX  -> SETGT X, -1
5104       if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5105           (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5106         return DAG.getSetCC(dl, VT, N0,
5107                             DAG.getAllOnesConstant(dl, N1.getValueType()),
5108                             ISD::SETGT);
5109     }
5110   }
5111 
5112   // Back to non-vector simplifications.
5113   // TODO: Can we do these for vector splats?
5114   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5115     const APInt &C1 = N1C->getAPIntValue();
5116     EVT ShValTy = N0.getValueType();
5117 
5118     // Fold bit comparisons when we can. This will result in an
5119     // incorrect value when boolean false is negative one, unless
5120     // the bitsize is 1 in which case the false value is the same
5121     // in practice regardless of the representation.
5122     if ((VT.getSizeInBits() == 1 ||
5123          getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
5124         (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5125         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5126         N0.getOpcode() == ISD::AND) {
5127       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5128         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
5129           // Perform the xform if the AND RHS is a single bit.
5130           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5131           if (AndRHS->getAPIntValue().isPowerOf2() &&
5132               !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5133             return DAG.getNode(
5134                 ISD::TRUNCATE, dl, VT,
5135                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5136                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5137           }
5138         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5139           // (X & 8) == 8  -->  (X & 8) >> 3
5140           // Perform the xform if C1 is a single bit.
5141           unsigned ShCt = C1.logBase2();
5142           if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5143             return DAG.getNode(
5144                 ISD::TRUNCATE, dl, VT,
5145                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5146                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5147           }
5148         }
5149       }
5150     }
5151 
5152     if (C1.getSignificantBits() <= 64 &&
5153         !isLegalICmpImmediate(C1.getSExtValue())) {
5154       // (X & -256) == 256 -> (X >> 8) == 1
5155       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5156           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5157         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5158           const APInt &AndRHSC = AndRHS->getAPIntValue();
5159           if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5160             unsigned ShiftBits = AndRHSC.countr_zero();
5161             if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5162               SDValue Shift = DAG.getNode(
5163                   ISD::SRL, dl, ShValTy, N0.getOperand(0),
5164                   DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5165               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5166               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5167             }
5168           }
5169         }
5170       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5171                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5172         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5173         // X <  0x100000000 -> (X >> 32) <  1
5174         // X >= 0x100000000 -> (X >> 32) >= 1
5175         // X <= 0x0ffffffff -> (X >> 32) <  1
5176         // X >  0x0ffffffff -> (X >> 32) >= 1
5177         unsigned ShiftBits;
5178         APInt NewC = C1;
5179         ISD::CondCode NewCond = Cond;
5180         if (AdjOne) {
5181           ShiftBits = C1.countr_one();
5182           NewC = NewC + 1;
5183           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5184         } else {
5185           ShiftBits = C1.countr_zero();
5186         }
5187         NewC.lshrInPlace(ShiftBits);
5188         if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5189             isLegalICmpImmediate(NewC.getSExtValue()) &&
5190             !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5191           SDValue Shift =
5192               DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5193                           DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5194           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5195           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5196         }
5197       }
5198     }
5199   }
5200 
5201   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5202     auto *CFP = cast<ConstantFPSDNode>(N1);
5203     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5204 
5205     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
5206     // constant if knowing that the operand is non-nan is enough.  We prefer to
5207     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5208     // materialize 0.0.
5209     if (Cond == ISD::SETO || Cond == ISD::SETUO)
5210       return DAG.getSetCC(dl, VT, N0, N0, Cond);
5211 
5212     // setcc (fneg x), C -> setcc swap(pred) x, -C
5213     if (N0.getOpcode() == ISD::FNEG) {
5214       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
5215       if (DCI.isBeforeLegalizeOps() ||
5216           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5217         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5218         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5219       }
5220     }
5221 
5222     // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5223     if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
5224         !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5225       bool IsFabs = N0.getOpcode() == ISD::FABS;
5226       SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5227       if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5228         FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5229                                              : (IsFabs ? fcInf : fcPosInf);
5230         if (Cond == ISD::SETUEQ)
5231           Flag |= fcNan;
5232         return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5233                            DAG.getTargetConstant(Flag, dl, MVT::i32));
5234       }
5235     }
5236 
5237     // If the condition is not legal, see if we can find an equivalent one
5238     // which is legal.
5239     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
5240       // If the comparison was an awkward floating-point == or != and one of
5241       // the comparison operands is infinity or negative infinity, convert the
5242       // condition to a less-awkward <= or >=.
5243       if (CFP->getValueAPF().isInfinity()) {
5244         bool IsNegInf = CFP->getValueAPF().isNegative();
5245         ISD::CondCode NewCond = ISD::SETCC_INVALID;
5246         switch (Cond) {
5247         case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5248         case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5249         case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5250         case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5251         default: break;
5252         }
5253         if (NewCond != ISD::SETCC_INVALID &&
5254             isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5255           return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5256       }
5257     }
5258   }
5259 
5260   if (N0 == N1) {
5261     // The sext(setcc()) => setcc() optimization relies on the appropriate
5262     // constant being emitted.
5263     assert(!N0.getValueType().isInteger() &&
5264            "Integer types should be handled by FoldSetCC");
5265 
5266     bool EqTrue = ISD::isTrueWhenEqual(Cond);
5267     unsigned UOF = ISD::getUnorderedFlavor(Cond);
5268     if (UOF == 2) // FP operators that are undefined on NaNs.
5269       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5270     if (UOF == unsigned(EqTrue))
5271       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5272     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
5273     // if it is not already.
5274     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5275     if (NewCond != Cond &&
5276         (DCI.isBeforeLegalizeOps() ||
5277                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5278       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5279   }
5280 
5281   // ~X > ~Y --> Y > X
5282   // ~X < ~Y --> Y < X
5283   // ~X < C --> X > ~C
5284   // ~X > C --> X < ~C
5285   if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5286       N0.getValueType().isInteger()) {
5287     if (isBitwiseNot(N0)) {
5288       if (isBitwiseNot(N1))
5289         return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5290 
5291       if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
5292           !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
5293         SDValue Not = DAG.getNOT(dl, N1, OpVT);
5294         return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5295       }
5296     }
5297   }
5298 
5299   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5300       N0.getValueType().isInteger()) {
5301     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5302         N0.getOpcode() == ISD::XOR) {
5303       // Simplify (X+Y) == (X+Z) -->  Y == Z
5304       if (N0.getOpcode() == N1.getOpcode()) {
5305         if (N0.getOperand(0) == N1.getOperand(0))
5306           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5307         if (N0.getOperand(1) == N1.getOperand(1))
5308           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5309         if (isCommutativeBinOp(N0.getOpcode())) {
5310           // If X op Y == Y op X, try other combinations.
5311           if (N0.getOperand(0) == N1.getOperand(1))
5312             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5313                                 Cond);
5314           if (N0.getOperand(1) == N1.getOperand(0))
5315             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5316                                 Cond);
5317         }
5318       }
5319 
5320       // If RHS is a legal immediate value for a compare instruction, we need
5321       // to be careful about increasing register pressure needlessly.
5322       bool LegalRHSImm = false;
5323 
5324       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5325         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5326           // Turn (X+C1) == C2 --> X == C2-C1
5327           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5328             return DAG.getSetCC(
5329                 dl, VT, N0.getOperand(0),
5330                 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5331                                 dl, N0.getValueType()),
5332                 Cond);
5333 
5334           // Turn (X^C1) == C2 --> X == C1^C2
5335           if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5336             return DAG.getSetCC(
5337                 dl, VT, N0.getOperand(0),
5338                 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5339                                 dl, N0.getValueType()),
5340                 Cond);
5341         }
5342 
5343         // Turn (C1-X) == C2 --> X == C1-C2
5344         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5345           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5346             return DAG.getSetCC(
5347                 dl, VT, N0.getOperand(1),
5348                 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5349                                 dl, N0.getValueType()),
5350                 Cond);
5351 
5352         // Could RHSC fold directly into a compare?
5353         if (RHSC->getValueType(0).getSizeInBits() <= 64)
5354           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5355       }
5356 
5357       // (X+Y) == X --> Y == 0 and similar folds.
5358       // Don't do this if X is an immediate that can fold into a cmp
5359       // instruction and X+Y has other uses. It could be an induction variable
5360       // chain, and the transform would increase register pressure.
5361       if (!LegalRHSImm || N0.hasOneUse())
5362         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5363           return V;
5364     }
5365 
5366     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5367         N1.getOpcode() == ISD::XOR)
5368       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5369         return V;
5370 
5371     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5372       return V;
5373   }
5374 
5375   // Fold remainder of division by a constant.
5376   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5377       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5378     // When division is cheap or optimizing for minimum size,
5379     // fall through to DIVREM creation by skipping this fold.
5380     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5381       if (N0.getOpcode() == ISD::UREM) {
5382         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5383           return Folded;
5384       } else if (N0.getOpcode() == ISD::SREM) {
5385         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5386           return Folded;
5387       }
5388     }
5389   }
5390 
5391   // Fold away ALL boolean setcc's.
5392   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5393     SDValue Temp;
5394     switch (Cond) {
5395     default: llvm_unreachable("Unknown integer setcc!");
5396     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
5397       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5398       N0 = DAG.getNOT(dl, Temp, OpVT);
5399       if (!DCI.isCalledByLegalizer())
5400         DCI.AddToWorklist(Temp.getNode());
5401       break;
5402     case ISD::SETNE:  // X != Y   -->  (X^Y)
5403       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5404       break;
5405     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
5406     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
5407       Temp = DAG.getNOT(dl, N0, OpVT);
5408       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5409       if (!DCI.isCalledByLegalizer())
5410         DCI.AddToWorklist(Temp.getNode());
5411       break;
5412     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
5413     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
5414       Temp = DAG.getNOT(dl, N1, OpVT);
5415       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5416       if (!DCI.isCalledByLegalizer())
5417         DCI.AddToWorklist(Temp.getNode());
5418       break;
5419     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
5420     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
5421       Temp = DAG.getNOT(dl, N0, OpVT);
5422       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5423       if (!DCI.isCalledByLegalizer())
5424         DCI.AddToWorklist(Temp.getNode());
5425       break;
5426     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
5427     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
5428       Temp = DAG.getNOT(dl, N1, OpVT);
5429       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5430       break;
5431     }
5432     if (VT.getScalarType() != MVT::i1) {
5433       if (!DCI.isCalledByLegalizer())
5434         DCI.AddToWorklist(N0.getNode());
5435       // FIXME: If running after legalize, we probably can't do this.
5436       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
5437       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5438     }
5439     return N0;
5440   }
5441 
5442   // Could not fold it.
5443   return SDValue();
5444 }
5445 
5446 /// Returns true (and the GlobalValue and the offset) if the node is a
5447 /// GlobalAddress + offset.
5448 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5449                                     int64_t &Offset) const {
5450 
5451   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5452 
5453   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5454     GA = GASD->getGlobal();
5455     Offset += GASD->getOffset();
5456     return true;
5457   }
5458 
5459   if (N->getOpcode() == ISD::ADD) {
5460     SDValue N1 = N->getOperand(0);
5461     SDValue N2 = N->getOperand(1);
5462     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5463       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5464         Offset += V->getSExtValue();
5465         return true;
5466       }
5467     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5468       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5469         Offset += V->getSExtValue();
5470         return true;
5471       }
5472     }
5473   }
5474 
5475   return false;
5476 }
5477 
5478 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5479                                           DAGCombinerInfo &DCI) const {
5480   // Default implementation: no optimization.
5481   return SDValue();
5482 }
5483 
5484 //===----------------------------------------------------------------------===//
5485 //  Inline Assembler Implementation Methods
5486 //===----------------------------------------------------------------------===//
5487 
5488 TargetLowering::ConstraintType
5489 TargetLowering::getConstraintType(StringRef Constraint) const {
5490   unsigned S = Constraint.size();
5491 
5492   if (S == 1) {
5493     switch (Constraint[0]) {
5494     default: break;
5495     case 'r':
5496       return C_RegisterClass;
5497     case 'm': // memory
5498     case 'o': // offsetable
5499     case 'V': // not offsetable
5500       return C_Memory;
5501     case 'p': // Address.
5502       return C_Address;
5503     case 'n': // Simple Integer
5504     case 'E': // Floating Point Constant
5505     case 'F': // Floating Point Constant
5506       return C_Immediate;
5507     case 'i': // Simple Integer or Relocatable Constant
5508     case 's': // Relocatable Constant
5509     case 'X': // Allow ANY value.
5510     case 'I': // Target registers.
5511     case 'J':
5512     case 'K':
5513     case 'L':
5514     case 'M':
5515     case 'N':
5516     case 'O':
5517     case 'P':
5518     case '<':
5519     case '>':
5520       return C_Other;
5521     }
5522   }
5523 
5524   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5525     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5526       return C_Memory;
5527     return C_Register;
5528   }
5529   return C_Unknown;
5530 }
5531 
5532 /// Try to replace an X constraint, which matches anything, with another that
5533 /// has more specific requirements based on the type of the corresponding
5534 /// operand.
5535 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5536   if (ConstraintVT.isInteger())
5537     return "r";
5538   if (ConstraintVT.isFloatingPoint())
5539     return "f"; // works for many targets
5540   return nullptr;
5541 }
5542 
5543 SDValue TargetLowering::LowerAsmOutputForConstraint(
5544     SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5545     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5546   return SDValue();
5547 }
5548 
5549 /// Lower the specified operand into the Ops vector.
5550 /// If it is invalid, don't add anything to Ops.
5551 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5552                                                   StringRef Constraint,
5553                                                   std::vector<SDValue> &Ops,
5554                                                   SelectionDAG &DAG) const {
5555 
5556   if (Constraint.size() > 1)
5557     return;
5558 
5559   char ConstraintLetter = Constraint[0];
5560   switch (ConstraintLetter) {
5561   default: break;
5562   case 'X':    // Allows any operand
5563   case 'i':    // Simple Integer or Relocatable Constant
5564   case 'n':    // Simple Integer
5565   case 's': {  // Relocatable Constant
5566 
5567     ConstantSDNode *C;
5568     uint64_t Offset = 0;
5569 
5570     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5571     // etc., since getelementpointer is variadic. We can't use
5572     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5573     // while in this case the GA may be furthest from the root node which is
5574     // likely an ISD::ADD.
5575     while (true) {
5576       if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5577         // gcc prints these as sign extended.  Sign extend value to 64 bits
5578         // now; without this it would get ZExt'd later in
5579         // ScheduleDAGSDNodes::EmitNode, which is very generic.
5580         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5581         BooleanContent BCont = getBooleanContents(MVT::i64);
5582         ISD::NodeType ExtOpc =
5583             IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5584         int64_t ExtVal =
5585             ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5586         Ops.push_back(
5587             DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5588         return;
5589       }
5590       if (ConstraintLetter != 'n') {
5591         if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5592           Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5593                                                    GA->getValueType(0),
5594                                                    Offset + GA->getOffset()));
5595           return;
5596         }
5597         if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5598           Ops.push_back(DAG.getTargetBlockAddress(
5599               BA->getBlockAddress(), BA->getValueType(0),
5600               Offset + BA->getOffset(), BA->getTargetFlags()));
5601           return;
5602         }
5603         if (isa<BasicBlockSDNode>(Op)) {
5604           Ops.push_back(Op);
5605           return;
5606         }
5607       }
5608       const unsigned OpCode = Op.getOpcode();
5609       if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5610         if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5611           Op = Op.getOperand(1);
5612         // Subtraction is not commutative.
5613         else if (OpCode == ISD::ADD &&
5614                  (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5615           Op = Op.getOperand(0);
5616         else
5617           return;
5618         Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5619         continue;
5620       }
5621       return;
5622     }
5623     break;
5624   }
5625   }
5626 }
5627 
5628 void TargetLowering::CollectTargetIntrinsicOperands(
5629     const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5630 }
5631 
5632 std::pair<unsigned, const TargetRegisterClass *>
5633 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5634                                              StringRef Constraint,
5635                                              MVT VT) const {
5636   if (!Constraint.starts_with("{"))
5637     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5638   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5639 
5640   // Remove the braces from around the name.
5641   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5642 
5643   std::pair<unsigned, const TargetRegisterClass *> R =
5644       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5645 
5646   // Figure out which register class contains this reg.
5647   for (const TargetRegisterClass *RC : RI->regclasses()) {
5648     // If none of the value types for this register class are valid, we
5649     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5650     if (!isLegalRC(*RI, *RC))
5651       continue;
5652 
5653     for (const MCPhysReg &PR : *RC) {
5654       if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5655         std::pair<unsigned, const TargetRegisterClass *> S =
5656             std::make_pair(PR, RC);
5657 
5658         // If this register class has the requested value type, return it,
5659         // otherwise keep searching and return the first class found
5660         // if no other is found which explicitly has the requested type.
5661         if (RI->isTypeLegalForClass(*RC, VT))
5662           return S;
5663         if (!R.second)
5664           R = S;
5665       }
5666     }
5667   }
5668 
5669   return R;
5670 }
5671 
5672 //===----------------------------------------------------------------------===//
5673 // Constraint Selection.
5674 
5675 /// Return true of this is an input operand that is a matching constraint like
5676 /// "4".
5677 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5678   assert(!ConstraintCode.empty() && "No known constraint!");
5679   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5680 }
5681 
5682 /// If this is an input matching constraint, this method returns the output
5683 /// operand it matches.
5684 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5685   assert(!ConstraintCode.empty() && "No known constraint!");
5686   return atoi(ConstraintCode.c_str());
5687 }
5688 
5689 /// Split up the constraint string from the inline assembly value into the
5690 /// specific constraints and their prefixes, and also tie in the associated
5691 /// operand values.
5692 /// If this returns an empty vector, and if the constraint string itself
5693 /// isn't empty, there was an error parsing.
5694 TargetLowering::AsmOperandInfoVector
5695 TargetLowering::ParseConstraints(const DataLayout &DL,
5696                                  const TargetRegisterInfo *TRI,
5697                                  const CallBase &Call) const {
5698   /// Information about all of the constraints.
5699   AsmOperandInfoVector ConstraintOperands;
5700   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5701   unsigned maCount = 0; // Largest number of multiple alternative constraints.
5702 
5703   // Do a prepass over the constraints, canonicalizing them, and building up the
5704   // ConstraintOperands list.
5705   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5706   unsigned ResNo = 0; // ResNo - The result number of the next output.
5707   unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5708 
5709   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5710     ConstraintOperands.emplace_back(std::move(CI));
5711     AsmOperandInfo &OpInfo = ConstraintOperands.back();
5712 
5713     // Update multiple alternative constraint count.
5714     if (OpInfo.multipleAlternatives.size() > maCount)
5715       maCount = OpInfo.multipleAlternatives.size();
5716 
5717     OpInfo.ConstraintVT = MVT::Other;
5718 
5719     // Compute the value type for each operand.
5720     switch (OpInfo.Type) {
5721     case InlineAsm::isOutput:
5722       // Indirect outputs just consume an argument.
5723       if (OpInfo.isIndirect) {
5724         OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5725         break;
5726       }
5727 
5728       // The return value of the call is this value.  As such, there is no
5729       // corresponding argument.
5730       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5731       if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5732         OpInfo.ConstraintVT =
5733             getSimpleValueType(DL, STy->getElementType(ResNo));
5734       } else {
5735         assert(ResNo == 0 && "Asm only has one result!");
5736         OpInfo.ConstraintVT =
5737             getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5738       }
5739       ++ResNo;
5740       break;
5741     case InlineAsm::isInput:
5742       OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5743       break;
5744     case InlineAsm::isLabel:
5745       OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5746       ++LabelNo;
5747       continue;
5748     case InlineAsm::isClobber:
5749       // Nothing to do.
5750       break;
5751     }
5752 
5753     if (OpInfo.CallOperandVal) {
5754       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5755       if (OpInfo.isIndirect) {
5756         OpTy = Call.getParamElementType(ArgNo);
5757         assert(OpTy && "Indirect operand must have elementtype attribute");
5758       }
5759 
5760       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5761       if (StructType *STy = dyn_cast<StructType>(OpTy))
5762         if (STy->getNumElements() == 1)
5763           OpTy = STy->getElementType(0);
5764 
5765       // If OpTy is not a single value, it may be a struct/union that we
5766       // can tile with integers.
5767       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5768         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5769         switch (BitSize) {
5770         default: break;
5771         case 1:
5772         case 8:
5773         case 16:
5774         case 32:
5775         case 64:
5776         case 128:
5777           OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5778           break;
5779         }
5780       }
5781 
5782       EVT VT = getAsmOperandValueType(DL, OpTy, true);
5783       OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5784       ArgNo++;
5785     }
5786   }
5787 
5788   // If we have multiple alternative constraints, select the best alternative.
5789   if (!ConstraintOperands.empty()) {
5790     if (maCount) {
5791       unsigned bestMAIndex = 0;
5792       int bestWeight = -1;
5793       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
5794       int weight = -1;
5795       unsigned maIndex;
5796       // Compute the sums of the weights for each alternative, keeping track
5797       // of the best (highest weight) one so far.
5798       for (maIndex = 0; maIndex < maCount; ++maIndex) {
5799         int weightSum = 0;
5800         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5801              cIndex != eIndex; ++cIndex) {
5802           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5803           if (OpInfo.Type == InlineAsm::isClobber)
5804             continue;
5805 
5806           // If this is an output operand with a matching input operand,
5807           // look up the matching input. If their types mismatch, e.g. one
5808           // is an integer, the other is floating point, or their sizes are
5809           // different, flag it as an maCantMatch.
5810           if (OpInfo.hasMatchingInput()) {
5811             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5812             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5813               if ((OpInfo.ConstraintVT.isInteger() !=
5814                    Input.ConstraintVT.isInteger()) ||
5815                   (OpInfo.ConstraintVT.getSizeInBits() !=
5816                    Input.ConstraintVT.getSizeInBits())) {
5817                 weightSum = -1; // Can't match.
5818                 break;
5819               }
5820             }
5821           }
5822           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5823           if (weight == -1) {
5824             weightSum = -1;
5825             break;
5826           }
5827           weightSum += weight;
5828         }
5829         // Update best.
5830         if (weightSum > bestWeight) {
5831           bestWeight = weightSum;
5832           bestMAIndex = maIndex;
5833         }
5834       }
5835 
5836       // Now select chosen alternative in each constraint.
5837       for (AsmOperandInfo &cInfo : ConstraintOperands)
5838         if (cInfo.Type != InlineAsm::isClobber)
5839           cInfo.selectAlternative(bestMAIndex);
5840     }
5841   }
5842 
5843   // Check and hook up tied operands, choose constraint code to use.
5844   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5845        cIndex != eIndex; ++cIndex) {
5846     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5847 
5848     // If this is an output operand with a matching input operand, look up the
5849     // matching input. If their types mismatch, e.g. one is an integer, the
5850     // other is floating point, or their sizes are different, flag it as an
5851     // error.
5852     if (OpInfo.hasMatchingInput()) {
5853       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5854 
5855       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5856         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5857             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5858                                          OpInfo.ConstraintVT);
5859         std::pair<unsigned, const TargetRegisterClass *> InputRC =
5860             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5861                                          Input.ConstraintVT);
5862         const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
5863                                     OpInfo.ConstraintVT.isFloatingPoint();
5864         const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
5865                                    Input.ConstraintVT.isFloatingPoint();
5866         if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
5867             (MatchRC.second != InputRC.second)) {
5868           report_fatal_error("Unsupported asm: input constraint"
5869                              " with a matching output constraint of"
5870                              " incompatible type!");
5871         }
5872       }
5873     }
5874   }
5875 
5876   return ConstraintOperands;
5877 }
5878 
5879 /// Return a number indicating our preference for chosing a type of constraint
5880 /// over another, for the purpose of sorting them. Immediates are almost always
5881 /// preferrable (when they can be emitted). A higher return value means a
5882 /// stronger preference for one constraint type relative to another.
5883 /// FIXME: We should prefer registers over memory but doing so may lead to
5884 /// unrecoverable register exhaustion later.
5885 /// https://github.com/llvm/llvm-project/issues/20571
5886 static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5887   switch (CT) {
5888   case TargetLowering::C_Immediate:
5889   case TargetLowering::C_Other:
5890     return 4;
5891   case TargetLowering::C_Memory:
5892   case TargetLowering::C_Address:
5893     return 3;
5894   case TargetLowering::C_RegisterClass:
5895     return 2;
5896   case TargetLowering::C_Register:
5897     return 1;
5898   case TargetLowering::C_Unknown:
5899     return 0;
5900   }
5901   llvm_unreachable("Invalid constraint type");
5902 }
5903 
5904 /// Examine constraint type and operand type and determine a weight value.
5905 /// This object must already have been set up with the operand type
5906 /// and the current alternative constraint selected.
5907 TargetLowering::ConstraintWeight
5908   TargetLowering::getMultipleConstraintMatchWeight(
5909     AsmOperandInfo &info, int maIndex) const {
5910   InlineAsm::ConstraintCodeVector *rCodes;
5911   if (maIndex >= (int)info.multipleAlternatives.size())
5912     rCodes = &info.Codes;
5913   else
5914     rCodes = &info.multipleAlternatives[maIndex].Codes;
5915   ConstraintWeight BestWeight = CW_Invalid;
5916 
5917   // Loop over the options, keeping track of the most general one.
5918   for (const std::string &rCode : *rCodes) {
5919     ConstraintWeight weight =
5920         getSingleConstraintMatchWeight(info, rCode.c_str());
5921     if (weight > BestWeight)
5922       BestWeight = weight;
5923   }
5924 
5925   return BestWeight;
5926 }
5927 
5928 /// Examine constraint type and operand type and determine a weight value.
5929 /// This object must already have been set up with the operand type
5930 /// and the current alternative constraint selected.
5931 TargetLowering::ConstraintWeight
5932   TargetLowering::getSingleConstraintMatchWeight(
5933     AsmOperandInfo &info, const char *constraint) const {
5934   ConstraintWeight weight = CW_Invalid;
5935   Value *CallOperandVal = info.CallOperandVal;
5936     // If we don't have a value, we can't do a match,
5937     // but allow it at the lowest weight.
5938   if (!CallOperandVal)
5939     return CW_Default;
5940   // Look at the constraint type.
5941   switch (*constraint) {
5942     case 'i': // immediate integer.
5943     case 'n': // immediate integer with a known value.
5944       if (isa<ConstantInt>(CallOperandVal))
5945         weight = CW_Constant;
5946       break;
5947     case 's': // non-explicit intregal immediate.
5948       if (isa<GlobalValue>(CallOperandVal))
5949         weight = CW_Constant;
5950       break;
5951     case 'E': // immediate float if host format.
5952     case 'F': // immediate float.
5953       if (isa<ConstantFP>(CallOperandVal))
5954         weight = CW_Constant;
5955       break;
5956     case '<': // memory operand with autodecrement.
5957     case '>': // memory operand with autoincrement.
5958     case 'm': // memory operand.
5959     case 'o': // offsettable memory operand
5960     case 'V': // non-offsettable memory operand
5961       weight = CW_Memory;
5962       break;
5963     case 'r': // general register.
5964     case 'g': // general register, memory operand or immediate integer.
5965               // note: Clang converts "g" to "imr".
5966       if (CallOperandVal->getType()->isIntegerTy())
5967         weight = CW_Register;
5968       break;
5969     case 'X': // any operand.
5970   default:
5971     weight = CW_Default;
5972     break;
5973   }
5974   return weight;
5975 }
5976 
5977 /// If there are multiple different constraints that we could pick for this
5978 /// operand (e.g. "imr") try to pick the 'best' one.
5979 /// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5980 /// into seven classes:
5981 ///    Register      -> one specific register
5982 ///    RegisterClass -> a group of regs
5983 ///    Memory        -> memory
5984 ///    Address       -> a symbolic memory reference
5985 ///    Immediate     -> immediate values
5986 ///    Other         -> magic values (such as "Flag Output Operands")
5987 ///    Unknown       -> something we don't recognize yet and can't handle
5988 /// Ideally, we would pick the most specific constraint possible: if we have
5989 /// something that fits into a register, we would pick it.  The problem here
5990 /// is that if we have something that could either be in a register or in
5991 /// memory that use of the register could cause selection of *other*
5992 /// operands to fail: they might only succeed if we pick memory.  Because of
5993 /// this the heuristic we use is:
5994 ///
5995 ///  1) If there is an 'other' constraint, and if the operand is valid for
5996 ///     that constraint, use it.  This makes us take advantage of 'i'
5997 ///     constraints when available.
5998 ///  2) Otherwise, pick the most general constraint present.  This prefers
5999 ///     'm' over 'r', for example.
6000 ///
6001 TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
6002     TargetLowering::AsmOperandInfo &OpInfo) const {
6003   ConstraintGroup Ret;
6004 
6005   Ret.reserve(OpInfo.Codes.size());
6006   for (StringRef Code : OpInfo.Codes) {
6007     TargetLowering::ConstraintType CType = getConstraintType(Code);
6008 
6009     // Indirect 'other' or 'immediate' constraints are not allowed.
6010     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6011                                CType == TargetLowering::C_Register ||
6012                                CType == TargetLowering::C_RegisterClass))
6013       continue;
6014 
6015     // Things with matching constraints can only be registers, per gcc
6016     // documentation.  This mainly affects "g" constraints.
6017     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6018       continue;
6019 
6020     Ret.emplace_back(Code, CType);
6021   }
6022 
6023   std::stable_sort(
6024       Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
6025         return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6026       });
6027 
6028   return Ret;
6029 }
6030 
6031 /// If we have an immediate, see if we can lower it. Return true if we can,
6032 /// false otherwise.
6033 static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6034                                      SDValue Op, SelectionDAG *DAG,
6035                                      const TargetLowering &TLI) {
6036 
6037   assert((P.second == TargetLowering::C_Other ||
6038           P.second == TargetLowering::C_Immediate) &&
6039          "need immediate or other");
6040 
6041   if (!Op.getNode())
6042     return false;
6043 
6044   std::vector<SDValue> ResultOps;
6045   TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6046   return !ResultOps.empty();
6047 }
6048 
6049 /// Determines the constraint code and constraint type to use for the specific
6050 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6051 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6052                                             SDValue Op,
6053                                             SelectionDAG *DAG) const {
6054   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6055 
6056   // Single-letter constraints ('r') are very common.
6057   if (OpInfo.Codes.size() == 1) {
6058     OpInfo.ConstraintCode = OpInfo.Codes[0];
6059     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6060   } else {
6061     ConstraintGroup G = getConstraintPreferences(OpInfo);
6062     if (G.empty())
6063       return;
6064 
6065     unsigned BestIdx = 0;
6066     for (const unsigned E = G.size();
6067          BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6068                          G[BestIdx].second == TargetLowering::C_Immediate);
6069          ++BestIdx) {
6070       if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6071         break;
6072       // If we're out of constraints, just pick the first one.
6073       if (BestIdx + 1 == E) {
6074         BestIdx = 0;
6075         break;
6076       }
6077     }
6078 
6079     OpInfo.ConstraintCode = G[BestIdx].first;
6080     OpInfo.ConstraintType = G[BestIdx].second;
6081   }
6082 
6083   // 'X' matches anything.
6084   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6085     // Constants are handled elsewhere.  For Functions, the type here is the
6086     // type of the result, which is not what we want to look at; leave them
6087     // alone.
6088     Value *v = OpInfo.CallOperandVal;
6089     if (isa<ConstantInt>(v) || isa<Function>(v)) {
6090       return;
6091     }
6092 
6093     if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6094       OpInfo.ConstraintCode = "i";
6095       return;
6096     }
6097 
6098     // Otherwise, try to resolve it to something we know about by looking at
6099     // the actual operand type.
6100     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6101       OpInfo.ConstraintCode = Repl;
6102       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6103     }
6104   }
6105 }
6106 
6107 /// Given an exact SDIV by a constant, create a multiplication
6108 /// with the multiplicative inverse of the constant.
6109 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6110 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6111                               const SDLoc &dl, SelectionDAG &DAG,
6112                               SmallVectorImpl<SDNode *> &Created) {
6113   SDValue Op0 = N->getOperand(0);
6114   SDValue Op1 = N->getOperand(1);
6115   EVT VT = N->getValueType(0);
6116   EVT SVT = VT.getScalarType();
6117   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6118   EVT ShSVT = ShVT.getScalarType();
6119 
6120   bool UseSRA = false;
6121   SmallVector<SDValue, 16> Shifts, Factors;
6122 
6123   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6124     if (C->isZero())
6125       return false;
6126     APInt Divisor = C->getAPIntValue();
6127     unsigned Shift = Divisor.countr_zero();
6128     if (Shift) {
6129       Divisor.ashrInPlace(Shift);
6130       UseSRA = true;
6131     }
6132     APInt Factor = Divisor.multiplicativeInverse();
6133     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6134     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6135     return true;
6136   };
6137 
6138   // Collect all magic values from the build vector.
6139   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6140     return SDValue();
6141 
6142   SDValue Shift, Factor;
6143   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6144     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6145     Factor = DAG.getBuildVector(VT, dl, Factors);
6146   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6147     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6148            "Expected matchUnaryPredicate to return one element for scalable "
6149            "vectors");
6150     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6151     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6152   } else {
6153     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6154     Shift = Shifts[0];
6155     Factor = Factors[0];
6156   }
6157 
6158   SDValue Res = Op0;
6159   if (UseSRA) {
6160     SDNodeFlags Flags;
6161     Flags.setExact(true);
6162     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
6163     Created.push_back(Res.getNode());
6164   }
6165 
6166   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6167 }
6168 
6169 /// Given an exact UDIV by a constant, create a multiplication
6170 /// with the multiplicative inverse of the constant.
6171 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6172 static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6173                               const SDLoc &dl, SelectionDAG &DAG,
6174                               SmallVectorImpl<SDNode *> &Created) {
6175   EVT VT = N->getValueType(0);
6176   EVT SVT = VT.getScalarType();
6177   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6178   EVT ShSVT = ShVT.getScalarType();
6179 
6180   bool UseSRL = false;
6181   SmallVector<SDValue, 16> Shifts, Factors;
6182 
6183   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6184     if (C->isZero())
6185       return false;
6186     APInt Divisor = C->getAPIntValue();
6187     unsigned Shift = Divisor.countr_zero();
6188     if (Shift) {
6189       Divisor.lshrInPlace(Shift);
6190       UseSRL = true;
6191     }
6192     // Calculate the multiplicative inverse modulo BW.
6193     APInt Factor = Divisor.multiplicativeInverse();
6194     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6195     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6196     return true;
6197   };
6198 
6199   SDValue Op1 = N->getOperand(1);
6200 
6201   // Collect all magic values from the build vector.
6202   if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6203     return SDValue();
6204 
6205   SDValue Shift, Factor;
6206   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6207     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6208     Factor = DAG.getBuildVector(VT, dl, Factors);
6209   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6210     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6211            "Expected matchUnaryPredicate to return one element for scalable "
6212            "vectors");
6213     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6214     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6215   } else {
6216     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6217     Shift = Shifts[0];
6218     Factor = Factors[0];
6219   }
6220 
6221   SDValue Res = N->getOperand(0);
6222   if (UseSRL) {
6223     SDNodeFlags Flags;
6224     Flags.setExact(true);
6225     Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, Flags);
6226     Created.push_back(Res.getNode());
6227   }
6228 
6229   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6230 }
6231 
6232 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6233                               SelectionDAG &DAG,
6234                               SmallVectorImpl<SDNode *> &Created) const {
6235   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6236   if (isIntDivCheap(N->getValueType(0), Attr))
6237     return SDValue(N, 0); // Lower SDIV as SDIV
6238   return SDValue();
6239 }
6240 
6241 SDValue
6242 TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6243                               SelectionDAG &DAG,
6244                               SmallVectorImpl<SDNode *> &Created) const {
6245   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6246   if (isIntDivCheap(N->getValueType(0), Attr))
6247     return SDValue(N, 0); // Lower SREM as SREM
6248   return SDValue();
6249 }
6250 
6251 /// Build sdiv by power-of-2 with conditional move instructions
6252 /// Ref: "Hacker's Delight" by Henry Warren 10-1
6253 /// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6254 ///   bgez x, label
6255 ///   add x, x, 2**k-1
6256 /// label:
6257 ///   sra res, x, k
6258 ///   neg res, res (when the divisor is negative)
6259 SDValue TargetLowering::buildSDIVPow2WithCMov(
6260     SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6261     SmallVectorImpl<SDNode *> &Created) const {
6262   unsigned Lg2 = Divisor.countr_zero();
6263   EVT VT = N->getValueType(0);
6264 
6265   SDLoc DL(N);
6266   SDValue N0 = N->getOperand(0);
6267   SDValue Zero = DAG.getConstant(0, DL, VT);
6268   APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6269   SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6270 
6271   // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6272   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6273   SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6274   SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6275   SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6276 
6277   Created.push_back(Cmp.getNode());
6278   Created.push_back(Add.getNode());
6279   Created.push_back(CMov.getNode());
6280 
6281   // Divide by pow2.
6282   SDValue SRA =
6283       DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6284 
6285   // If we're dividing by a positive value, we're done.  Otherwise, we must
6286   // negate the result.
6287   if (Divisor.isNonNegative())
6288     return SRA;
6289 
6290   Created.push_back(SRA.getNode());
6291   return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6292 }
6293 
6294 /// Given an ISD::SDIV node expressing a divide by constant,
6295 /// return a DAG expression to select that will generate the same value by
6296 /// multiplying by a magic number.
6297 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6298 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6299                                   bool IsAfterLegalization,
6300                                   bool IsAfterLegalTypes,
6301                                   SmallVectorImpl<SDNode *> &Created) const {
6302   SDLoc dl(N);
6303   EVT VT = N->getValueType(0);
6304   EVT SVT = VT.getScalarType();
6305   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6306   EVT ShSVT = ShVT.getScalarType();
6307   unsigned EltBits = VT.getScalarSizeInBits();
6308   EVT MulVT;
6309 
6310   // Check to see if we can do this.
6311   // FIXME: We should be more aggressive here.
6312   if (!isTypeLegal(VT)) {
6313     // Limit this to simple scalars for now.
6314     if (VT.isVector() || !VT.isSimple())
6315       return SDValue();
6316 
6317     // If this type will be promoted to a large enough type with a legal
6318     // multiply operation, we can go ahead and do this transform.
6319     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6320       return SDValue();
6321 
6322     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6323     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6324         !isOperationLegal(ISD::MUL, MulVT))
6325       return SDValue();
6326   }
6327 
6328   // If the sdiv has an 'exact' bit we can use a simpler lowering.
6329   if (N->getFlags().hasExact())
6330     return BuildExactSDIV(*this, N, dl, DAG, Created);
6331 
6332   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6333 
6334   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6335     if (C->isZero())
6336       return false;
6337 
6338     const APInt &Divisor = C->getAPIntValue();
6339     SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
6340     int NumeratorFactor = 0;
6341     int ShiftMask = -1;
6342 
6343     if (Divisor.isOne() || Divisor.isAllOnes()) {
6344       // If d is +1/-1, we just multiply the numerator by +1/-1.
6345       NumeratorFactor = Divisor.getSExtValue();
6346       magics.Magic = 0;
6347       magics.ShiftAmount = 0;
6348       ShiftMask = 0;
6349     } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6350       // If d > 0 and m < 0, add the numerator.
6351       NumeratorFactor = 1;
6352     } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6353       // If d < 0 and m > 0, subtract the numerator.
6354       NumeratorFactor = -1;
6355     }
6356 
6357     MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6358     Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6359     Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6360     ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6361     return true;
6362   };
6363 
6364   SDValue N0 = N->getOperand(0);
6365   SDValue N1 = N->getOperand(1);
6366 
6367   // Collect the shifts / magic values from each element.
6368   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6369     return SDValue();
6370 
6371   SDValue MagicFactor, Factor, Shift, ShiftMask;
6372   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6373     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6374     Factor = DAG.getBuildVector(VT, dl, Factors);
6375     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6376     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6377   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6378     assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6379            Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6380            "Expected matchUnaryPredicate to return one element for scalable "
6381            "vectors");
6382     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6383     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6384     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6385     ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6386   } else {
6387     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6388     MagicFactor = MagicFactors[0];
6389     Factor = Factors[0];
6390     Shift = Shifts[0];
6391     ShiftMask = ShiftMasks[0];
6392   }
6393 
6394   // Multiply the numerator (operand 0) by the magic value.
6395   // FIXME: We should support doing a MUL in a wider type.
6396   auto GetMULHS = [&](SDValue X, SDValue Y) {
6397     // If the type isn't legal, use a wider mul of the type calculated
6398     // earlier.
6399     if (!isTypeLegal(VT)) {
6400       X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6401       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6402       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6403       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6404                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6405       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6406     }
6407 
6408     if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6409       return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6410     if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6411       SDValue LoHi =
6412           DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6413       return SDValue(LoHi.getNode(), 1);
6414     }
6415     // If type twice as wide legal, widen and use a mul plus a shift.
6416     unsigned Size = VT.getScalarSizeInBits();
6417     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6418     if (VT.isVector())
6419       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6420                                 VT.getVectorElementCount());
6421     // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6422     // custom lowered. This is very expensive so avoid it at all costs for
6423     // constant divisors.
6424     if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6425          isOperationCustom(ISD::SDIVREM, VT.getScalarType())) ||
6426         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6427       X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6428       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6429       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6430       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6431                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6432       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6433     }
6434     return SDValue();
6435   };
6436 
6437   SDValue Q = GetMULHS(N0, MagicFactor);
6438   if (!Q)
6439     return SDValue();
6440 
6441   Created.push_back(Q.getNode());
6442 
6443   // (Optionally) Add/subtract the numerator using Factor.
6444   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6445   Created.push_back(Factor.getNode());
6446   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6447   Created.push_back(Q.getNode());
6448 
6449   // Shift right algebraic by shift value.
6450   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6451   Created.push_back(Q.getNode());
6452 
6453   // Extract the sign bit, mask it and add it to the quotient.
6454   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6455   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6456   Created.push_back(T.getNode());
6457   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6458   Created.push_back(T.getNode());
6459   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6460 }
6461 
6462 /// Given an ISD::UDIV node expressing a divide by constant,
6463 /// return a DAG expression to select that will generate the same value by
6464 /// multiplying by a magic number.
6465 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6466 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6467                                   bool IsAfterLegalization,
6468                                   bool IsAfterLegalTypes,
6469                                   SmallVectorImpl<SDNode *> &Created) const {
6470   SDLoc dl(N);
6471   EVT VT = N->getValueType(0);
6472   EVT SVT = VT.getScalarType();
6473   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6474   EVT ShSVT = ShVT.getScalarType();
6475   unsigned EltBits = VT.getScalarSizeInBits();
6476   EVT MulVT;
6477 
6478   // Check to see if we can do this.
6479   // FIXME: We should be more aggressive here.
6480   if (!isTypeLegal(VT)) {
6481     // Limit this to simple scalars for now.
6482     if (VT.isVector() || !VT.isSimple())
6483       return SDValue();
6484 
6485     // If this type will be promoted to a large enough type with a legal
6486     // multiply operation, we can go ahead and do this transform.
6487     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6488       return SDValue();
6489 
6490     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6491     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6492         !isOperationLegal(ISD::MUL, MulVT))
6493       return SDValue();
6494   }
6495 
6496   // If the udiv has an 'exact' bit we can use a simpler lowering.
6497   if (N->getFlags().hasExact())
6498     return BuildExactUDIV(*this, N, dl, DAG, Created);
6499 
6500   SDValue N0 = N->getOperand(0);
6501   SDValue N1 = N->getOperand(1);
6502 
6503   // Try to use leading zeros of the dividend to reduce the multiplier and
6504   // avoid expensive fixups.
6505   unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6506 
6507   bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6508   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6509 
6510   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6511     if (C->isZero())
6512       return false;
6513     const APInt& Divisor = C->getAPIntValue();
6514 
6515     SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6516 
6517     // Magic algorithm doesn't work for division by 1. We need to emit a select
6518     // at the end.
6519     if (Divisor.isOne()) {
6520       PreShift = PostShift = DAG.getUNDEF(ShSVT);
6521       MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6522     } else {
6523       UnsignedDivisionByConstantInfo magics =
6524           UnsignedDivisionByConstantInfo::get(
6525               Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6526 
6527       MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6528 
6529       assert(magics.PreShift < Divisor.getBitWidth() &&
6530              "We shouldn't generate an undefined shift!");
6531       assert(magics.PostShift < Divisor.getBitWidth() &&
6532              "We shouldn't generate an undefined shift!");
6533       assert((!magics.IsAdd || magics.PreShift == 0) &&
6534              "Unexpected pre-shift");
6535       PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6536       PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6537       NPQFactor = DAG.getConstant(
6538           magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6539                        : APInt::getZero(EltBits),
6540           dl, SVT);
6541       UseNPQ |= magics.IsAdd;
6542       UsePreShift |= magics.PreShift != 0;
6543       UsePostShift |= magics.PostShift != 0;
6544     }
6545 
6546     PreShifts.push_back(PreShift);
6547     MagicFactors.push_back(MagicFactor);
6548     NPQFactors.push_back(NPQFactor);
6549     PostShifts.push_back(PostShift);
6550     return true;
6551   };
6552 
6553   // Collect the shifts/magic values from each element.
6554   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6555     return SDValue();
6556 
6557   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6558   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6559     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6560     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6561     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6562     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6563   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6564     assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6565            NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6566            "Expected matchUnaryPredicate to return one for scalable vectors");
6567     PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6568     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6569     NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6570     PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6571   } else {
6572     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6573     PreShift = PreShifts[0];
6574     MagicFactor = MagicFactors[0];
6575     PostShift = PostShifts[0];
6576   }
6577 
6578   SDValue Q = N0;
6579   if (UsePreShift) {
6580     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6581     Created.push_back(Q.getNode());
6582   }
6583 
6584   // FIXME: We should support doing a MUL in a wider type.
6585   auto GetMULHU = [&](SDValue X, SDValue Y) {
6586     // If the type isn't legal, use a wider mul of the type calculated
6587     // earlier.
6588     if (!isTypeLegal(VT)) {
6589       X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6590       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6591       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6592       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6593                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6594       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6595     }
6596 
6597     if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6598       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6599     if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6600       SDValue LoHi =
6601           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6602       return SDValue(LoHi.getNode(), 1);
6603     }
6604     // If type twice as wide legal, widen and use a mul plus a shift.
6605     unsigned Size = VT.getScalarSizeInBits();
6606     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6607     if (VT.isVector())
6608       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6609                                 VT.getVectorElementCount());
6610     // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6611     // custom lowered. This is very expensive so avoid it at all costs for
6612     // constant divisors.
6613     if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6614          isOperationCustom(ISD::UDIVREM, VT.getScalarType())) ||
6615         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6616       X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6617       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6618       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6619       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6620                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6621       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6622     }
6623     return SDValue(); // No mulhu or equivalent
6624   };
6625 
6626   // Multiply the numerator (operand 0) by the magic value.
6627   Q = GetMULHU(Q, MagicFactor);
6628   if (!Q)
6629     return SDValue();
6630 
6631   Created.push_back(Q.getNode());
6632 
6633   if (UseNPQ) {
6634     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6635     Created.push_back(NPQ.getNode());
6636 
6637     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6638     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6639     if (VT.isVector())
6640       NPQ = GetMULHU(NPQ, NPQFactor);
6641     else
6642       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6643 
6644     Created.push_back(NPQ.getNode());
6645 
6646     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6647     Created.push_back(Q.getNode());
6648   }
6649 
6650   if (UsePostShift) {
6651     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6652     Created.push_back(Q.getNode());
6653   }
6654 
6655   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6656 
6657   SDValue One = DAG.getConstant(1, dl, VT);
6658   SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6659   return DAG.getSelect(dl, VT, IsOne, N0, Q);
6660 }
6661 
6662 /// If all values in Values that *don't* match the predicate are same 'splat'
6663 /// value, then replace all values with that splat value.
6664 /// Else, if AlternativeReplacement was provided, then replace all values that
6665 /// do match predicate with AlternativeReplacement value.
6666 static void
6667 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6668                           std::function<bool(SDValue)> Predicate,
6669                           SDValue AlternativeReplacement = SDValue()) {
6670   SDValue Replacement;
6671   // Is there a value for which the Predicate does *NOT* match? What is it?
6672   auto SplatValue = llvm::find_if_not(Values, Predicate);
6673   if (SplatValue != Values.end()) {
6674     // Does Values consist only of SplatValue's and values matching Predicate?
6675     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6676           return Value == *SplatValue || Predicate(Value);
6677         })) // Then we shall replace values matching predicate with SplatValue.
6678       Replacement = *SplatValue;
6679   }
6680   if (!Replacement) {
6681     // Oops, we did not find the "baseline" splat value.
6682     if (!AlternativeReplacement)
6683       return; // Nothing to do.
6684     // Let's replace with provided value then.
6685     Replacement = AlternativeReplacement;
6686   }
6687   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6688 }
6689 
6690 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6691 /// where the divisor is constant and the comparison target is zero,
6692 /// return a DAG expression that will generate the same comparison result
6693 /// using only multiplications, additions and shifts/rotations.
6694 /// Ref: "Hacker's Delight" 10-17.
6695 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6696                                         SDValue CompTargetNode,
6697                                         ISD::CondCode Cond,
6698                                         DAGCombinerInfo &DCI,
6699                                         const SDLoc &DL) const {
6700   SmallVector<SDNode *, 5> Built;
6701   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6702                                          DCI, DL, Built)) {
6703     for (SDNode *N : Built)
6704       DCI.AddToWorklist(N);
6705     return Folded;
6706   }
6707 
6708   return SDValue();
6709 }
6710 
6711 SDValue
6712 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6713                                   SDValue CompTargetNode, ISD::CondCode Cond,
6714                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6715                                   SmallVectorImpl<SDNode *> &Created) const {
6716   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6717   // - D must be constant, with D = D0 * 2^K where D0 is odd
6718   // - P is the multiplicative inverse of D0 modulo 2^W
6719   // - Q = floor(((2^W) - 1) / D)
6720   // where W is the width of the common type of N and D.
6721   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6722          "Only applicable for (in)equality comparisons.");
6723 
6724   SelectionDAG &DAG = DCI.DAG;
6725 
6726   EVT VT = REMNode.getValueType();
6727   EVT SVT = VT.getScalarType();
6728   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6729   EVT ShSVT = ShVT.getScalarType();
6730 
6731   // If MUL is unavailable, we cannot proceed in any case.
6732   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6733     return SDValue();
6734 
6735   bool ComparingWithAllZeros = true;
6736   bool AllComparisonsWithNonZerosAreTautological = true;
6737   bool HadTautologicalLanes = false;
6738   bool AllLanesAreTautological = true;
6739   bool HadEvenDivisor = false;
6740   bool AllDivisorsArePowerOfTwo = true;
6741   bool HadTautologicalInvertedLanes = false;
6742   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6743 
6744   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6745     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6746     if (CDiv->isZero())
6747       return false;
6748 
6749     const APInt &D = CDiv->getAPIntValue();
6750     const APInt &Cmp = CCmp->getAPIntValue();
6751 
6752     ComparingWithAllZeros &= Cmp.isZero();
6753 
6754     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6755     // if C2 is not less than C1, the comparison is always false.
6756     // But we will only be able to produce the comparison that will give the
6757     // opposive tautological answer. So this lane would need to be fixed up.
6758     bool TautologicalInvertedLane = D.ule(Cmp);
6759     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6760 
6761     // If all lanes are tautological (either all divisors are ones, or divisor
6762     // is not greater than the constant we are comparing with),
6763     // we will prefer to avoid the fold.
6764     bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6765     HadTautologicalLanes |= TautologicalLane;
6766     AllLanesAreTautological &= TautologicalLane;
6767 
6768     // If we are comparing with non-zero, we need'll need  to subtract said
6769     // comparison value from the LHS. But there is no point in doing that if
6770     // every lane where we are comparing with non-zero is tautological..
6771     if (!Cmp.isZero())
6772       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6773 
6774     // Decompose D into D0 * 2^K
6775     unsigned K = D.countr_zero();
6776     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6777     APInt D0 = D.lshr(K);
6778 
6779     // D is even if it has trailing zeros.
6780     HadEvenDivisor |= (K != 0);
6781     // D is a power-of-two if D0 is one.
6782     // If all divisors are power-of-two, we will prefer to avoid the fold.
6783     AllDivisorsArePowerOfTwo &= D0.isOne();
6784 
6785     // P = inv(D0, 2^W)
6786     // 2^W requires W + 1 bits, so we have to extend and then truncate.
6787     unsigned W = D.getBitWidth();
6788     APInt P = D0.multiplicativeInverse();
6789     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6790 
6791     // Q = floor((2^W - 1) u/ D)
6792     // R = ((2^W - 1) u% D)
6793     APInt Q, R;
6794     APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6795 
6796     // If we are comparing with zero, then that comparison constant is okay,
6797     // else it may need to be one less than that.
6798     if (Cmp.ugt(R))
6799       Q -= 1;
6800 
6801     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6802            "We are expecting that K is always less than all-ones for ShSVT");
6803 
6804     // If the lane is tautological the result can be constant-folded.
6805     if (TautologicalLane) {
6806       // Set P and K amount to a bogus values so we can try to splat them.
6807       P = 0;
6808       K = -1;
6809       // And ensure that comparison constant is tautological,
6810       // it will always compare true/false.
6811       Q = -1;
6812     }
6813 
6814     PAmts.push_back(DAG.getConstant(P, DL, SVT));
6815     KAmts.push_back(
6816         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
6817                               /*implicitTrunc=*/true),
6818                         DL, ShSVT));
6819     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6820     return true;
6821   };
6822 
6823   SDValue N = REMNode.getOperand(0);
6824   SDValue D = REMNode.getOperand(1);
6825 
6826   // Collect the values from each element.
6827   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6828     return SDValue();
6829 
6830   // If all lanes are tautological, the result can be constant-folded.
6831   if (AllLanesAreTautological)
6832     return SDValue();
6833 
6834   // If this is a urem by a powers-of-two, avoid the fold since it can be
6835   // best implemented as a bit test.
6836   if (AllDivisorsArePowerOfTwo)
6837     return SDValue();
6838 
6839   SDValue PVal, KVal, QVal;
6840   if (D.getOpcode() == ISD::BUILD_VECTOR) {
6841     if (HadTautologicalLanes) {
6842       // Try to turn PAmts into a splat, since we don't care about the values
6843       // that are currently '0'. If we can't, just keep '0'`s.
6844       turnVectorIntoSplatVector(PAmts, isNullConstant);
6845       // Try to turn KAmts into a splat, since we don't care about the values
6846       // that are currently '-1'. If we can't, change them to '0'`s.
6847       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6848                                 DAG.getConstant(0, DL, ShSVT));
6849     }
6850 
6851     PVal = DAG.getBuildVector(VT, DL, PAmts);
6852     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6853     QVal = DAG.getBuildVector(VT, DL, QAmts);
6854   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6855     assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6856            "Expected matchBinaryPredicate to return one element for "
6857            "SPLAT_VECTORs");
6858     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6859     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6860     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6861   } else {
6862     PVal = PAmts[0];
6863     KVal = KAmts[0];
6864     QVal = QAmts[0];
6865   }
6866 
6867   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6868     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6869       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6870     assert(CompTargetNode.getValueType() == N.getValueType() &&
6871            "Expecting that the types on LHS and RHS of comparisons match.");
6872     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6873   }
6874 
6875   // (mul N, P)
6876   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6877   Created.push_back(Op0.getNode());
6878 
6879   // Rotate right only if any divisor was even. We avoid rotates for all-odd
6880   // divisors as a performance improvement, since rotating by 0 is a no-op.
6881   if (HadEvenDivisor) {
6882     // We need ROTR to do this.
6883     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6884       return SDValue();
6885     // UREM: (rotr (mul N, P), K)
6886     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6887     Created.push_back(Op0.getNode());
6888   }
6889 
6890   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6891   SDValue NewCC =
6892       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6893                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6894   if (!HadTautologicalInvertedLanes)
6895     return NewCC;
6896 
6897   // If any lanes previously compared always-false, the NewCC will give
6898   // always-true result for them, so we need to fixup those lanes.
6899   // Or the other way around for inequality predicate.
6900   assert(VT.isVector() && "Can/should only get here for vectors.");
6901   Created.push_back(NewCC.getNode());
6902 
6903   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6904   // if C2 is not less than C1, the comparison is always false.
6905   // But we have produced the comparison that will give the
6906   // opposive tautological answer. So these lanes would need to be fixed up.
6907   SDValue TautologicalInvertedChannels =
6908       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6909   Created.push_back(TautologicalInvertedChannels.getNode());
6910 
6911   // NOTE: we avoid letting illegal types through even if we're before legalize
6912   // ops – legalization has a hard time producing good code for this.
6913   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6914     // If we have a vector select, let's replace the comparison results in the
6915     // affected lanes with the correct tautological result.
6916     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6917                                               DL, SETCCVT, SETCCVT);
6918     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6919                        Replacement, NewCC);
6920   }
6921 
6922   // Else, we can just invert the comparison result in the appropriate lanes.
6923   //
6924   // NOTE: see the note above VSELECT above.
6925   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6926     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6927                        TautologicalInvertedChannels);
6928 
6929   return SDValue(); // Don't know how to lower.
6930 }
6931 
6932 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6933 /// where the divisor is constant and the comparison target is zero,
6934 /// return a DAG expression that will generate the same comparison result
6935 /// using only multiplications, additions and shifts/rotations.
6936 /// Ref: "Hacker's Delight" 10-17.
6937 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6938                                         SDValue CompTargetNode,
6939                                         ISD::CondCode Cond,
6940                                         DAGCombinerInfo &DCI,
6941                                         const SDLoc &DL) const {
6942   SmallVector<SDNode *, 7> Built;
6943   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6944                                          DCI, DL, Built)) {
6945     assert(Built.size() <= 7 && "Max size prediction failed.");
6946     for (SDNode *N : Built)
6947       DCI.AddToWorklist(N);
6948     return Folded;
6949   }
6950 
6951   return SDValue();
6952 }
6953 
6954 SDValue
6955 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6956                                   SDValue CompTargetNode, ISD::CondCode Cond,
6957                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6958                                   SmallVectorImpl<SDNode *> &Created) const {
6959   // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6960   // Fold:
6961   //   (seteq/ne (srem N, D), 0)
6962   // To:
6963   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
6964   //
6965   // - D must be constant, with D = D0 * 2^K where D0 is odd
6966   // - P is the multiplicative inverse of D0 modulo 2^W
6967   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6968   // - Q = floor((2 * A) / (2^K))
6969   // where W is the width of the common type of N and D.
6970   //
6971   // When D is a power of two (and thus D0 is 1), the normal
6972   // formula for A and Q don't apply, because the derivation
6973   // depends on D not dividing 2^(W-1), and thus theorem ZRS
6974   // does not apply. This specifically fails when N = INT_MIN.
6975   //
6976   // Instead, for power-of-two D, we use:
6977   // - A = 2^(W-1)
6978   // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6979   // - Q = 2^(W-K) - 1
6980   // |-> Test that the top K bits are zero after rotation
6981   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6982          "Only applicable for (in)equality comparisons.");
6983 
6984   SelectionDAG &DAG = DCI.DAG;
6985 
6986   EVT VT = REMNode.getValueType();
6987   EVT SVT = VT.getScalarType();
6988   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6989   EVT ShSVT = ShVT.getScalarType();
6990 
6991   // If we are after ops legalization, and MUL is unavailable, we can not
6992   // proceed.
6993   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6994     return SDValue();
6995 
6996   // TODO: Could support comparing with non-zero too.
6997   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6998   if (!CompTarget || !CompTarget->isZero())
6999     return SDValue();
7000 
7001   bool HadIntMinDivisor = false;
7002   bool HadOneDivisor = false;
7003   bool AllDivisorsAreOnes = true;
7004   bool HadEvenDivisor = false;
7005   bool NeedToApplyOffset = false;
7006   bool AllDivisorsArePowerOfTwo = true;
7007   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7008 
7009   auto BuildSREMPattern = [&](ConstantSDNode *C) {
7010     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7011     if (C->isZero())
7012       return false;
7013 
7014     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7015 
7016     // WARNING: this fold is only valid for positive divisors!
7017     APInt D = C->getAPIntValue();
7018     if (D.isNegative())
7019       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
7020 
7021     HadIntMinDivisor |= D.isMinSignedValue();
7022 
7023     // If all divisors are ones, we will prefer to avoid the fold.
7024     HadOneDivisor |= D.isOne();
7025     AllDivisorsAreOnes &= D.isOne();
7026 
7027     // Decompose D into D0 * 2^K
7028     unsigned K = D.countr_zero();
7029     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7030     APInt D0 = D.lshr(K);
7031 
7032     if (!D.isMinSignedValue()) {
7033       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7034       // we don't care about this lane in this fold, we'll special-handle it.
7035       HadEvenDivisor |= (K != 0);
7036     }
7037 
7038     // D is a power-of-two if D0 is one. This includes INT_MIN.
7039     // If all divisors are power-of-two, we will prefer to avoid the fold.
7040     AllDivisorsArePowerOfTwo &= D0.isOne();
7041 
7042     // P = inv(D0, 2^W)
7043     // 2^W requires W + 1 bits, so we have to extend and then truncate.
7044     unsigned W = D.getBitWidth();
7045     APInt P = D0.multiplicativeInverse();
7046     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7047 
7048     // A = floor((2^(W - 1) - 1) / D0) & -2^K
7049     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7050     A.clearLowBits(K);
7051 
7052     if (!D.isMinSignedValue()) {
7053       // If divisor INT_MIN, then we don't care about this lane in this fold,
7054       // we'll special-handle it.
7055       NeedToApplyOffset |= A != 0;
7056     }
7057 
7058     // Q = floor((2 * A) / (2^K))
7059     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7060 
7061     assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7062            "We are expecting that A is always less than all-ones for SVT");
7063     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7064            "We are expecting that K is always less than all-ones for ShSVT");
7065 
7066     // If D was a power of two, apply the alternate constant derivation.
7067     if (D0.isOne()) {
7068       // A = 2^(W-1)
7069       A = APInt::getSignedMinValue(W);
7070       // - Q = 2^(W-K) - 1
7071       Q = APInt::getAllOnes(W - K).zext(W);
7072     }
7073 
7074     // If the divisor is 1 the result can be constant-folded. Likewise, we
7075     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7076     if (D.isOne()) {
7077       // Set P, A and K to a bogus values so we can try to splat them.
7078       P = 0;
7079       A = -1;
7080       K = -1;
7081 
7082       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
7083       Q = -1;
7084     }
7085 
7086     PAmts.push_back(DAG.getConstant(P, DL, SVT));
7087     AAmts.push_back(DAG.getConstant(A, DL, SVT));
7088     KAmts.push_back(
7089         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7090                               /*implicitTrunc=*/true),
7091                         DL, ShSVT));
7092     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7093     return true;
7094   };
7095 
7096   SDValue N = REMNode.getOperand(0);
7097   SDValue D = REMNode.getOperand(1);
7098 
7099   // Collect the values from each element.
7100   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7101     return SDValue();
7102 
7103   // If this is a srem by a one, avoid the fold since it can be constant-folded.
7104   if (AllDivisorsAreOnes)
7105     return SDValue();
7106 
7107   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7108   // since it can be best implemented as a bit test.
7109   if (AllDivisorsArePowerOfTwo)
7110     return SDValue();
7111 
7112   SDValue PVal, AVal, KVal, QVal;
7113   if (D.getOpcode() == ISD::BUILD_VECTOR) {
7114     if (HadOneDivisor) {
7115       // Try to turn PAmts into a splat, since we don't care about the values
7116       // that are currently '0'. If we can't, just keep '0'`s.
7117       turnVectorIntoSplatVector(PAmts, isNullConstant);
7118       // Try to turn AAmts into a splat, since we don't care about the
7119       // values that are currently '-1'. If we can't, change them to '0'`s.
7120       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
7121                                 DAG.getConstant(0, DL, SVT));
7122       // Try to turn KAmts into a splat, since we don't care about the values
7123       // that are currently '-1'. If we can't, change them to '0'`s.
7124       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
7125                                 DAG.getConstant(0, DL, ShSVT));
7126     }
7127 
7128     PVal = DAG.getBuildVector(VT, DL, PAmts);
7129     AVal = DAG.getBuildVector(VT, DL, AAmts);
7130     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7131     QVal = DAG.getBuildVector(VT, DL, QAmts);
7132   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7133     assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7134            QAmts.size() == 1 &&
7135            "Expected matchUnaryPredicate to return one element for scalable "
7136            "vectors");
7137     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7138     AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7139     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7140     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7141   } else {
7142     assert(isa<ConstantSDNode>(D) && "Expected a constant");
7143     PVal = PAmts[0];
7144     AVal = AAmts[0];
7145     KVal = KAmts[0];
7146     QVal = QAmts[0];
7147   }
7148 
7149   // (mul N, P)
7150   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7151   Created.push_back(Op0.getNode());
7152 
7153   if (NeedToApplyOffset) {
7154     // We need ADD to do this.
7155     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7156       return SDValue();
7157 
7158     // (add (mul N, P), A)
7159     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7160     Created.push_back(Op0.getNode());
7161   }
7162 
7163   // Rotate right only if any divisor was even. We avoid rotates for all-odd
7164   // divisors as a performance improvement, since rotating by 0 is a no-op.
7165   if (HadEvenDivisor) {
7166     // We need ROTR to do this.
7167     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7168       return SDValue();
7169     // SREM: (rotr (add (mul N, P), A), K)
7170     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7171     Created.push_back(Op0.getNode());
7172   }
7173 
7174   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7175   SDValue Fold =
7176       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7177                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7178 
7179   // If we didn't have lanes with INT_MIN divisor, then we're done.
7180   if (!HadIntMinDivisor)
7181     return Fold;
7182 
7183   // That fold is only valid for positive divisors. Which effectively means,
7184   // it is invalid for INT_MIN divisors. So if we have such a lane,
7185   // we must fix-up results for said lanes.
7186   assert(VT.isVector() && "Can/should only get here for vectors.");
7187 
7188   // NOTE: we avoid letting illegal types through even if we're before legalize
7189   // ops – legalization has a hard time producing good code for the code that
7190   // follows.
7191   if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7192       !isOperationLegalOrCustom(ISD::AND, VT) ||
7193       !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
7194       !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
7195     return SDValue();
7196 
7197   Created.push_back(Fold.getNode());
7198 
7199   SDValue IntMin = DAG.getConstant(
7200       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
7201   SDValue IntMax = DAG.getConstant(
7202       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
7203   SDValue Zero =
7204       DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
7205 
7206   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7207   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7208   Created.push_back(DivisorIsIntMin.getNode());
7209 
7210   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
7211   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7212   Created.push_back(Masked.getNode());
7213   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7214   Created.push_back(MaskedIsZero.getNode());
7215 
7216   // To produce final result we need to blend 2 vectors: 'SetCC' and
7217   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7218   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7219   // constant-folded, select can get lowered to a shuffle with constant mask.
7220   SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7221                                 MaskedIsZero, Fold);
7222 
7223   return Blended;
7224 }
7225 
7226 bool TargetLowering::
7227 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7228   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7229     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7230                                 "be a constant integer");
7231     return true;
7232   }
7233 
7234   return false;
7235 }
7236 
7237 SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7238                                          const DenormalMode &Mode) const {
7239   SDLoc DL(Op);
7240   EVT VT = Op.getValueType();
7241   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7242   SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7243 
7244   // This is specifically a check for the handling of denormal inputs, not the
7245   // result.
7246   if (Mode.Input == DenormalMode::PreserveSign ||
7247       Mode.Input == DenormalMode::PositiveZero) {
7248     // Test = X == 0.0
7249     return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7250   }
7251 
7252   // Testing it with denormal inputs to avoid wrong estimate.
7253   //
7254   // Test = fabs(X) < SmallestNormal
7255   const fltSemantics &FltSem = VT.getFltSemantics();
7256   APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7257   SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7258   SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7259   return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7260 }
7261 
7262 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7263                                              bool LegalOps, bool OptForSize,
7264                                              NegatibleCost &Cost,
7265                                              unsigned Depth) const {
7266   // fneg is removable even if it has multiple uses.
7267   if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7268     Cost = NegatibleCost::Cheaper;
7269     return Op.getOperand(0);
7270   }
7271 
7272   // Don't recurse exponentially.
7273   if (Depth > SelectionDAG::MaxRecursionDepth)
7274     return SDValue();
7275 
7276   // Pre-increment recursion depth for use in recursive calls.
7277   ++Depth;
7278   const SDNodeFlags Flags = Op->getFlags();
7279   const TargetOptions &Options = DAG.getTarget().Options;
7280   EVT VT = Op.getValueType();
7281   unsigned Opcode = Op.getOpcode();
7282 
7283   // Don't allow anything with multiple uses unless we know it is free.
7284   if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7285     bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7286                         isFPExtFree(VT, Op.getOperand(0).getValueType());
7287     if (!IsFreeExtend)
7288       return SDValue();
7289   }
7290 
7291   auto RemoveDeadNode = [&](SDValue N) {
7292     if (N && N.getNode()->use_empty())
7293       DAG.RemoveDeadNode(N.getNode());
7294   };
7295 
7296   SDLoc DL(Op);
7297 
7298   // Because getNegatedExpression can delete nodes we need a handle to keep
7299   // temporary nodes alive in case the recursion manages to create an identical
7300   // node.
7301   std::list<HandleSDNode> Handles;
7302 
7303   switch (Opcode) {
7304   case ISD::ConstantFP: {
7305     // Don't invert constant FP values after legalization unless the target says
7306     // the negated constant is legal.
7307     bool IsOpLegal =
7308         isOperationLegal(ISD::ConstantFP, VT) ||
7309         isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7310                      OptForSize);
7311 
7312     if (LegalOps && !IsOpLegal)
7313       break;
7314 
7315     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7316     V.changeSign();
7317     SDValue CFP = DAG.getConstantFP(V, DL, VT);
7318 
7319     // If we already have the use of the negated floating constant, it is free
7320     // to negate it even it has multiple uses.
7321     if (!Op.hasOneUse() && CFP.use_empty())
7322       break;
7323     Cost = NegatibleCost::Neutral;
7324     return CFP;
7325   }
7326   case ISD::BUILD_VECTOR: {
7327     // Only permit BUILD_VECTOR of constants.
7328     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7329           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7330         }))
7331       break;
7332 
7333     bool IsOpLegal =
7334         (isOperationLegal(ISD::ConstantFP, VT) &&
7335          isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
7336         llvm::all_of(Op->op_values(), [&](SDValue N) {
7337           return N.isUndef() ||
7338                  isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7339                               OptForSize);
7340         });
7341 
7342     if (LegalOps && !IsOpLegal)
7343       break;
7344 
7345     SmallVector<SDValue, 4> Ops;
7346     for (SDValue C : Op->op_values()) {
7347       if (C.isUndef()) {
7348         Ops.push_back(C);
7349         continue;
7350       }
7351       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7352       V.changeSign();
7353       Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7354     }
7355     Cost = NegatibleCost::Neutral;
7356     return DAG.getBuildVector(VT, DL, Ops);
7357   }
7358   case ISD::FADD: {
7359     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7360       break;
7361 
7362     // After operation legalization, it might not be legal to create new FSUBs.
7363     if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7364       break;
7365     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7366 
7367     // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7368     NegatibleCost CostX = NegatibleCost::Expensive;
7369     SDValue NegX =
7370         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7371     // Prevent this node from being deleted by the next call.
7372     if (NegX)
7373       Handles.emplace_back(NegX);
7374 
7375     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7376     NegatibleCost CostY = NegatibleCost::Expensive;
7377     SDValue NegY =
7378         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7379 
7380     // We're done with the handles.
7381     Handles.clear();
7382 
7383     // Negate the X if its cost is less or equal than Y.
7384     if (NegX && (CostX <= CostY)) {
7385       Cost = CostX;
7386       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7387       if (NegY != N)
7388         RemoveDeadNode(NegY);
7389       return N;
7390     }
7391 
7392     // Negate the Y if it is not expensive.
7393     if (NegY) {
7394       Cost = CostY;
7395       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7396       if (NegX != N)
7397         RemoveDeadNode(NegX);
7398       return N;
7399     }
7400     break;
7401   }
7402   case ISD::FSUB: {
7403     // We can't turn -(A-B) into B-A when we honor signed zeros.
7404     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7405       break;
7406 
7407     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7408     // fold (fneg (fsub 0, Y)) -> Y
7409     if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7410       if (C->isZero()) {
7411         Cost = NegatibleCost::Cheaper;
7412         return Y;
7413       }
7414 
7415     // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7416     Cost = NegatibleCost::Neutral;
7417     return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7418   }
7419   case ISD::FMUL:
7420   case ISD::FDIV: {
7421     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7422 
7423     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7424     NegatibleCost CostX = NegatibleCost::Expensive;
7425     SDValue NegX =
7426         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7427     // Prevent this node from being deleted by the next call.
7428     if (NegX)
7429       Handles.emplace_back(NegX);
7430 
7431     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7432     NegatibleCost CostY = NegatibleCost::Expensive;
7433     SDValue NegY =
7434         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7435 
7436     // We're done with the handles.
7437     Handles.clear();
7438 
7439     // Negate the X if its cost is less or equal than Y.
7440     if (NegX && (CostX <= CostY)) {
7441       Cost = CostX;
7442       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7443       if (NegY != N)
7444         RemoveDeadNode(NegY);
7445       return N;
7446     }
7447 
7448     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7449     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7450       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7451         break;
7452 
7453     // Negate the Y if it is not expensive.
7454     if (NegY) {
7455       Cost = CostY;
7456       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7457       if (NegX != N)
7458         RemoveDeadNode(NegX);
7459       return N;
7460     }
7461     break;
7462   }
7463   case ISD::FMA:
7464   case ISD::FMAD: {
7465     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7466       break;
7467 
7468     SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7469     NegatibleCost CostZ = NegatibleCost::Expensive;
7470     SDValue NegZ =
7471         getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7472     // Give up if fail to negate the Z.
7473     if (!NegZ)
7474       break;
7475 
7476     // Prevent this node from being deleted by the next two calls.
7477     Handles.emplace_back(NegZ);
7478 
7479     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7480     NegatibleCost CostX = NegatibleCost::Expensive;
7481     SDValue NegX =
7482         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7483     // Prevent this node from being deleted by the next call.
7484     if (NegX)
7485       Handles.emplace_back(NegX);
7486 
7487     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7488     NegatibleCost CostY = NegatibleCost::Expensive;
7489     SDValue NegY =
7490         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7491 
7492     // We're done with the handles.
7493     Handles.clear();
7494 
7495     // Negate the X if its cost is less or equal than Y.
7496     if (NegX && (CostX <= CostY)) {
7497       Cost = std::min(CostX, CostZ);
7498       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7499       if (NegY != N)
7500         RemoveDeadNode(NegY);
7501       return N;
7502     }
7503 
7504     // Negate the Y if it is not expensive.
7505     if (NegY) {
7506       Cost = std::min(CostY, CostZ);
7507       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7508       if (NegX != N)
7509         RemoveDeadNode(NegX);
7510       return N;
7511     }
7512     break;
7513   }
7514 
7515   case ISD::FP_EXTEND:
7516   case ISD::FSIN:
7517     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7518                                             OptForSize, Cost, Depth))
7519       return DAG.getNode(Opcode, DL, VT, NegV);
7520     break;
7521   case ISD::FP_ROUND:
7522     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7523                                             OptForSize, Cost, Depth))
7524       return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7525     break;
7526   case ISD::SELECT:
7527   case ISD::VSELECT: {
7528     // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7529     // iff at least one cost is cheaper and the other is neutral/cheaper
7530     SDValue LHS = Op.getOperand(1);
7531     NegatibleCost CostLHS = NegatibleCost::Expensive;
7532     SDValue NegLHS =
7533         getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7534     if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7535       RemoveDeadNode(NegLHS);
7536       break;
7537     }
7538 
7539     // Prevent this node from being deleted by the next call.
7540     Handles.emplace_back(NegLHS);
7541 
7542     SDValue RHS = Op.getOperand(2);
7543     NegatibleCost CostRHS = NegatibleCost::Expensive;
7544     SDValue NegRHS =
7545         getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7546 
7547     // We're done with the handles.
7548     Handles.clear();
7549 
7550     if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7551         (CostLHS != NegatibleCost::Cheaper &&
7552          CostRHS != NegatibleCost::Cheaper)) {
7553       RemoveDeadNode(NegLHS);
7554       RemoveDeadNode(NegRHS);
7555       break;
7556     }
7557 
7558     Cost = std::min(CostLHS, CostRHS);
7559     return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7560   }
7561   }
7562 
7563   return SDValue();
7564 }
7565 
7566 //===----------------------------------------------------------------------===//
7567 // Legalization Utilities
7568 //===----------------------------------------------------------------------===//
7569 
7570 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7571                                     SDValue LHS, SDValue RHS,
7572                                     SmallVectorImpl<SDValue> &Result,
7573                                     EVT HiLoVT, SelectionDAG &DAG,
7574                                     MulExpansionKind Kind, SDValue LL,
7575                                     SDValue LH, SDValue RL, SDValue RH) const {
7576   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7577          Opcode == ISD::SMUL_LOHI);
7578 
7579   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7580                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
7581   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7582                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
7583   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7584                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
7585   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7586                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
7587 
7588   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7589     return false;
7590 
7591   unsigned OuterBitSize = VT.getScalarSizeInBits();
7592   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7593 
7594   // LL, LH, RL, and RH must be either all NULL or all set to a value.
7595   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7596          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7597 
7598   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7599   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7600                           bool Signed) -> bool {
7601     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7602       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7603       Hi = SDValue(Lo.getNode(), 1);
7604       return true;
7605     }
7606     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7607       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7608       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7609       return true;
7610     }
7611     return false;
7612   };
7613 
7614   SDValue Lo, Hi;
7615 
7616   if (!LL.getNode() && !RL.getNode() &&
7617       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7618     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7619     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7620   }
7621 
7622   if (!LL.getNode())
7623     return false;
7624 
7625   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7626   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7627       DAG.MaskedValueIsZero(RHS, HighMask)) {
7628     // The inputs are both zero-extended.
7629     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7630       Result.push_back(Lo);
7631       Result.push_back(Hi);
7632       if (Opcode != ISD::MUL) {
7633         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7634         Result.push_back(Zero);
7635         Result.push_back(Zero);
7636       }
7637       return true;
7638     }
7639   }
7640 
7641   if (!VT.isVector() && Opcode == ISD::MUL &&
7642       DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7643       DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7644     // The input values are both sign-extended.
7645     // TODO non-MUL case?
7646     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7647       Result.push_back(Lo);
7648       Result.push_back(Hi);
7649       return true;
7650     }
7651   }
7652 
7653   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7654   SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7655 
7656   if (!LH.getNode() && !RH.getNode() &&
7657       isOperationLegalOrCustom(ISD::SRL, VT) &&
7658       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7659     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7660     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7661     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7662     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7663   }
7664 
7665   if (!LH.getNode())
7666     return false;
7667 
7668   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7669     return false;
7670 
7671   Result.push_back(Lo);
7672 
7673   if (Opcode == ISD::MUL) {
7674     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7675     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7676     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7677     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7678     Result.push_back(Hi);
7679     return true;
7680   }
7681 
7682   // Compute the full width result.
7683   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7684     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7685     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7686     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7687     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7688   };
7689 
7690   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7691   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7692     return false;
7693 
7694   // This is effectively the add part of a multiply-add of half-sized operands,
7695   // so it cannot overflow.
7696   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7697 
7698   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7699     return false;
7700 
7701   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7702   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7703 
7704   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7705                   isOperationLegalOrCustom(ISD::ADDE, VT));
7706   if (UseGlue)
7707     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7708                        Merge(Lo, Hi));
7709   else
7710     Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7711                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7712 
7713   SDValue Carry = Next.getValue(1);
7714   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7715   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7716 
7717   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7718     return false;
7719 
7720   if (UseGlue)
7721     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7722                      Carry);
7723   else
7724     Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7725                      Zero, Carry);
7726 
7727   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7728 
7729   if (Opcode == ISD::SMUL_LOHI) {
7730     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7731                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7732     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7733 
7734     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7735                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7736     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7737   }
7738 
7739   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7740   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7741   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7742   return true;
7743 }
7744 
7745 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7746                                SelectionDAG &DAG, MulExpansionKind Kind,
7747                                SDValue LL, SDValue LH, SDValue RL,
7748                                SDValue RH) const {
7749   SmallVector<SDValue, 2> Result;
7750   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7751                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7752                            DAG, Kind, LL, LH, RL, RH);
7753   if (Ok) {
7754     assert(Result.size() == 2);
7755     Lo = Result[0];
7756     Hi = Result[1];
7757   }
7758   return Ok;
7759 }
7760 
7761 // Optimize unsigned division or remainder by constants for types twice as large
7762 // as a legal VT.
7763 //
7764 // If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7765 // can be computed
7766 // as:
7767 //   Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7768 //   Remainder = Sum % Constant
7769 // This is based on "Remainder by Summing Digits" from Hacker's Delight.
7770 //
7771 // For division, we can compute the remainder using the algorithm described
7772 // above, subtract it from the dividend to get an exact multiple of Constant.
7773 // Then multiply that exact multiply by the multiplicative inverse modulo
7774 // (1 << (BitWidth / 2)) to get the quotient.
7775 
7776 // If Constant is even, we can shift right the dividend and the divisor by the
7777 // number of trailing zeros in Constant before applying the remainder algorithm.
7778 // If we're after the quotient, we can subtract this value from the shifted
7779 // dividend and multiply by the multiplicative inverse of the shifted divisor.
7780 // If we want the remainder, we shift the value left by the number of trailing
7781 // zeros and add the bits that were shifted out of the dividend.
7782 bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7783                                             SmallVectorImpl<SDValue> &Result,
7784                                             EVT HiLoVT, SelectionDAG &DAG,
7785                                             SDValue LL, SDValue LH) const {
7786   unsigned Opcode = N->getOpcode();
7787   EVT VT = N->getValueType(0);
7788 
7789   // TODO: Support signed division/remainder.
7790   if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7791     return false;
7792   assert(
7793       (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7794       "Unexpected opcode");
7795 
7796   auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7797   if (!CN)
7798     return false;
7799 
7800   APInt Divisor = CN->getAPIntValue();
7801   unsigned BitWidth = Divisor.getBitWidth();
7802   unsigned HBitWidth = BitWidth / 2;
7803   assert(VT.getScalarSizeInBits() == BitWidth &&
7804          HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7805 
7806   // Divisor needs to less than (1 << HBitWidth).
7807   APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7808   if (Divisor.uge(HalfMaxPlus1))
7809     return false;
7810 
7811   // We depend on the UREM by constant optimization in DAGCombiner that requires
7812   // high multiply.
7813   if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7814       !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
7815     return false;
7816 
7817   // Don't expand if optimizing for size.
7818   if (DAG.shouldOptForSize())
7819     return false;
7820 
7821   // Early out for 0 or 1 divisors.
7822   if (Divisor.ule(1))
7823     return false;
7824 
7825   // If the divisor is even, shift it until it becomes odd.
7826   unsigned TrailingZeros = 0;
7827   if (!Divisor[0]) {
7828     TrailingZeros = Divisor.countr_zero();
7829     Divisor.lshrInPlace(TrailingZeros);
7830   }
7831 
7832   SDLoc dl(N);
7833   SDValue Sum;
7834   SDValue PartialRem;
7835 
7836   // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7837   // then add in the carry.
7838   // TODO: If we can't split it in half, we might be able to split into 3 or
7839   // more pieces using a smaller bit width.
7840   if (HalfMaxPlus1.urem(Divisor).isOne()) {
7841     assert(!LL == !LH && "Expected both input halves or no input halves!");
7842     if (!LL)
7843       std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7844 
7845     // Shift the input by the number of TrailingZeros in the divisor. The
7846     // shifted out bits will be added to the remainder later.
7847     if (TrailingZeros) {
7848       // Save the shifted off bits if we need the remainder.
7849       if (Opcode != ISD::UDIV) {
7850         APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7851         PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7852                                  DAG.getConstant(Mask, dl, HiLoVT));
7853       }
7854 
7855       LL = DAG.getNode(
7856           ISD::OR, dl, HiLoVT,
7857           DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7858                       DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7859           DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7860                       DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7861                                                  HiLoVT, dl)));
7862       LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7863                        DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7864     }
7865 
7866     // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7867     EVT SetCCType =
7868         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7869     if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
7870       SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7871       Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7872       Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7873                         DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7874     } else {
7875       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7876       SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7877       // If the boolean for the target is 0 or 1, we can add the setcc result
7878       // directly.
7879       if (getBooleanContents(HiLoVT) ==
7880           TargetLoweringBase::ZeroOrOneBooleanContent)
7881         Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7882       else
7883         Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7884                               DAG.getConstant(0, dl, HiLoVT));
7885       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7886     }
7887   }
7888 
7889   // If we didn't find a sum, we can't do the expansion.
7890   if (!Sum)
7891     return false;
7892 
7893   // Perform a HiLoVT urem on the Sum using truncated divisor.
7894   SDValue RemL =
7895       DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7896                   DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7897   SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7898 
7899   if (Opcode != ISD::UREM) {
7900     // Subtract the remainder from the shifted dividend.
7901     SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7902     SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7903 
7904     Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7905 
7906     // Multiply by the multiplicative inverse of the divisor modulo
7907     // (1 << BitWidth).
7908     APInt MulFactor = Divisor.multiplicativeInverse();
7909 
7910     SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7911                                    DAG.getConstant(MulFactor, dl, VT));
7912 
7913     // Split the quotient into low and high parts.
7914     SDValue QuotL, QuotH;
7915     std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7916     Result.push_back(QuotL);
7917     Result.push_back(QuotH);
7918   }
7919 
7920   if (Opcode != ISD::UDIV) {
7921     // If we shifted the input, shift the remainder left and add the bits we
7922     // shifted off the input.
7923     if (TrailingZeros) {
7924       APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7925       RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7926                          DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7927       RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7928     }
7929     Result.push_back(RemL);
7930     Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7931   }
7932 
7933   return true;
7934 }
7935 
7936 // Check that (every element of) Z is undef or not an exact multiple of BW.
7937 static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7938   return ISD::matchUnaryPredicate(
7939       Z,
7940       [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7941       true);
7942 }
7943 
7944 static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7945   EVT VT = Node->getValueType(0);
7946   SDValue ShX, ShY;
7947   SDValue ShAmt, InvShAmt;
7948   SDValue X = Node->getOperand(0);
7949   SDValue Y = Node->getOperand(1);
7950   SDValue Z = Node->getOperand(2);
7951   SDValue Mask = Node->getOperand(3);
7952   SDValue VL = Node->getOperand(4);
7953 
7954   unsigned BW = VT.getScalarSizeInBits();
7955   bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7956   SDLoc DL(SDValue(Node, 0));
7957 
7958   EVT ShVT = Z.getValueType();
7959   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7960     // fshl: X << C | Y >> (BW - C)
7961     // fshr: X << (BW - C) | Y >> C
7962     // where C = Z % BW is not zero
7963     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7964     ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7965     InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7966     ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7967                       VL);
7968     ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7969                       VL);
7970   } else {
7971     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7972     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7973     SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7974     if (isPowerOf2_32(BW)) {
7975       // Z % BW -> Z & (BW - 1)
7976       ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7977       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7978       SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7979                                  DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7980       InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7981     } else {
7982       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7983       ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7984       InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7985     }
7986 
7987     SDValue One = DAG.getConstant(1, DL, ShVT);
7988     if (IsFSHL) {
7989       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7990       SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
7991       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
7992     } else {
7993       SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7994       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7995       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
7996     }
7997   }
7998   return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7999 }
8000 
8001 SDValue TargetLowering::expandFunnelShift(SDNode *Node,
8002                                           SelectionDAG &DAG) const {
8003   if (Node->isVPOpcode())
8004     return expandVPFunnelShift(Node, DAG);
8005 
8006   EVT VT = Node->getValueType(0);
8007 
8008   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8009                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
8010                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
8011                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
8012     return SDValue();
8013 
8014   SDValue X = Node->getOperand(0);
8015   SDValue Y = Node->getOperand(1);
8016   SDValue Z = Node->getOperand(2);
8017 
8018   unsigned BW = VT.getScalarSizeInBits();
8019   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8020   SDLoc DL(SDValue(Node, 0));
8021 
8022   EVT ShVT = Z.getValueType();
8023 
8024   // If a funnel shift in the other direction is more supported, use it.
8025   unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8026   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8027       isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8028     if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8029       // fshl X, Y, Z -> fshr X, Y, -Z
8030       // fshr X, Y, Z -> fshl X, Y, -Z
8031       SDValue Zero = DAG.getConstant(0, DL, ShVT);
8032       Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8033     } else {
8034       // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8035       // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8036       SDValue One = DAG.getConstant(1, DL, ShVT);
8037       if (IsFSHL) {
8038         Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8039         X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8040       } else {
8041         X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8042         Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8043       }
8044       Z = DAG.getNOT(DL, Z, ShVT);
8045     }
8046     return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8047   }
8048 
8049   SDValue ShX, ShY;
8050   SDValue ShAmt, InvShAmt;
8051   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8052     // fshl: X << C | Y >> (BW - C)
8053     // fshr: X << (BW - C) | Y >> C
8054     // where C = Z % BW is not zero
8055     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8056     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8057     InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8058     ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8059     ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8060   } else {
8061     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8062     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8063     SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8064     if (isPowerOf2_32(BW)) {
8065       // Z % BW -> Z & (BW - 1)
8066       ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8067       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8068       InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8069     } else {
8070       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8071       ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8072       InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8073     }
8074 
8075     SDValue One = DAG.getConstant(1, DL, ShVT);
8076     if (IsFSHL) {
8077       ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8078       SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8079       ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8080     } else {
8081       SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8082       ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8083       ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8084     }
8085   }
8086   return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8087 }
8088 
8089 // TODO: Merge with expandFunnelShift.
8090 SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8091                                   SelectionDAG &DAG) const {
8092   EVT VT = Node->getValueType(0);
8093   unsigned EltSizeInBits = VT.getScalarSizeInBits();
8094   bool IsLeft = Node->getOpcode() == ISD::ROTL;
8095   SDValue Op0 = Node->getOperand(0);
8096   SDValue Op1 = Node->getOperand(1);
8097   SDLoc DL(SDValue(Node, 0));
8098 
8099   EVT ShVT = Op1.getValueType();
8100   SDValue Zero = DAG.getConstant(0, DL, ShVT);
8101 
8102   // If a rotate in the other direction is more supported, use it.
8103   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8104   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8105       isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8106     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8107     return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8108   }
8109 
8110   if (!AllowVectorOps && VT.isVector() &&
8111       (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8112        !isOperationLegalOrCustom(ISD::SRL, VT) ||
8113        !isOperationLegalOrCustom(ISD::SUB, VT) ||
8114        !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
8115        !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
8116     return SDValue();
8117 
8118   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8119   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8120   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8121   SDValue ShVal;
8122   SDValue HsVal;
8123   if (isPowerOf2_32(EltSizeInBits)) {
8124     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8125     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8126     SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8127     SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8128     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8129     SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8130     HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8131   } else {
8132     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8133     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8134     SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8135     SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8136     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8137     SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8138     SDValue One = DAG.getConstant(1, DL, ShVT);
8139     HsVal =
8140         DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8141   }
8142   return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8143 }
8144 
8145 void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8146                                       SelectionDAG &DAG) const {
8147   assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8148   EVT VT = Node->getValueType(0);
8149   unsigned VTBits = VT.getScalarSizeInBits();
8150   assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8151 
8152   bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8153   bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8154   SDValue ShOpLo = Node->getOperand(0);
8155   SDValue ShOpHi = Node->getOperand(1);
8156   SDValue ShAmt = Node->getOperand(2);
8157   EVT ShAmtVT = ShAmt.getValueType();
8158   EVT ShAmtCCVT =
8159       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8160   SDLoc dl(Node);
8161 
8162   // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8163   // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8164   // away during isel.
8165   SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8166                                   DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8167   SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8168                                      DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8169                        : DAG.getConstant(0, dl, VT);
8170 
8171   SDValue Tmp2, Tmp3;
8172   if (IsSHL) {
8173     Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8174     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8175   } else {
8176     Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8177     Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8178   }
8179 
8180   // If the shift amount is larger or equal than the width of a part we don't
8181   // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8182   // values for large shift amounts.
8183   SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8184                                 DAG.getConstant(VTBits, dl, ShAmtVT));
8185   SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8186                               DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8187 
8188   if (IsSHL) {
8189     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8190     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8191   } else {
8192     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8193     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8194   }
8195 }
8196 
8197 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8198                                       SelectionDAG &DAG) const {
8199   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8200   SDValue Src = Node->getOperand(OpNo);
8201   EVT SrcVT = Src.getValueType();
8202   EVT DstVT = Node->getValueType(0);
8203   SDLoc dl(SDValue(Node, 0));
8204 
8205   // FIXME: Only f32 to i64 conversions are supported.
8206   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8207     return false;
8208 
8209   if (Node->isStrictFPOpcode())
8210     // When a NaN is converted to an integer a trap is allowed. We can't
8211     // use this expansion here because it would eliminate that trap. Other
8212     // traps are also allowed and cannot be eliminated. See
8213     // IEEE 754-2008 sec 5.8.
8214     return false;
8215 
8216   // Expand f32 -> i64 conversion
8217   // This algorithm comes from compiler-rt's implementation of fixsfdi:
8218   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8219   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8220   EVT IntVT = SrcVT.changeTypeToInteger();
8221   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8222 
8223   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8224   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8225   SDValue Bias = DAG.getConstant(127, dl, IntVT);
8226   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8227   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8228   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8229 
8230   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8231 
8232   SDValue ExponentBits = DAG.getNode(
8233       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8234       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8235   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8236 
8237   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8238                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8239                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8240   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8241 
8242   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8243                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8244                           DAG.getConstant(0x00800000, dl, IntVT));
8245 
8246   R = DAG.getZExtOrTrunc(R, dl, DstVT);
8247 
8248   R = DAG.getSelectCC(
8249       dl, Exponent, ExponentLoBit,
8250       DAG.getNode(ISD::SHL, dl, DstVT, R,
8251                   DAG.getZExtOrTrunc(
8252                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8253                       dl, IntShVT)),
8254       DAG.getNode(ISD::SRL, dl, DstVT, R,
8255                   DAG.getZExtOrTrunc(
8256                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8257                       dl, IntShVT)),
8258       ISD::SETGT);
8259 
8260   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8261                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8262 
8263   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8264                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8265   return true;
8266 }
8267 
8268 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8269                                       SDValue &Chain,
8270                                       SelectionDAG &DAG) const {
8271   SDLoc dl(SDValue(Node, 0));
8272   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8273   SDValue Src = Node->getOperand(OpNo);
8274 
8275   EVT SrcVT = Src.getValueType();
8276   EVT DstVT = Node->getValueType(0);
8277   EVT SetCCVT =
8278       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8279   EVT DstSetCCVT =
8280       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8281 
8282   // Only expand vector types if we have the appropriate vector bit operations.
8283   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8284                                                    ISD::FP_TO_SINT;
8285   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8286                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
8287     return false;
8288 
8289   // If the maximum float value is smaller then the signed integer range,
8290   // the destination signmask can't be represented by the float, so we can
8291   // just use FP_TO_SINT directly.
8292   const fltSemantics &APFSem = SrcVT.getFltSemantics();
8293   APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8294   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8295   if (APFloat::opOverflow &
8296       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8297     if (Node->isStrictFPOpcode()) {
8298       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8299                            { Node->getOperand(0), Src });
8300       Chain = Result.getValue(1);
8301     } else
8302       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8303     return true;
8304   }
8305 
8306   // Don't expand it if there isn't cheap fsub instruction.
8307   if (!isOperationLegalOrCustom(
8308           Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8309     return false;
8310 
8311   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8312   SDValue Sel;
8313 
8314   if (Node->isStrictFPOpcode()) {
8315     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8316                        Node->getOperand(0), /*IsSignaling*/ true);
8317     Chain = Sel.getValue(1);
8318   } else {
8319     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8320   }
8321 
8322   bool Strict = Node->isStrictFPOpcode() ||
8323                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8324 
8325   if (Strict) {
8326     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8327     // signmask then offset (the result of which should be fully representable).
8328     // Sel = Src < 0x8000000000000000
8329     // FltOfs = select Sel, 0, 0x8000000000000000
8330     // IntOfs = select Sel, 0, 0x8000000000000000
8331     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8332 
8333     // TODO: Should any fast-math-flags be set for the FSUB?
8334     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8335                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8336     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8337     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8338                                    DAG.getConstant(0, dl, DstVT),
8339                                    DAG.getConstant(SignMask, dl, DstVT));
8340     SDValue SInt;
8341     if (Node->isStrictFPOpcode()) {
8342       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8343                                 { Chain, Src, FltOfs });
8344       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8345                          { Val.getValue(1), Val });
8346       Chain = SInt.getValue(1);
8347     } else {
8348       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8349       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8350     }
8351     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8352   } else {
8353     // Expand based on maximum range of FP_TO_SINT:
8354     // True = fp_to_sint(Src)
8355     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8356     // Result = select (Src < 0x8000000000000000), True, False
8357 
8358     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8359     // TODO: Should any fast-math-flags be set for the FSUB?
8360     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8361                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8362     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8363                         DAG.getConstant(SignMask, dl, DstVT));
8364     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8365     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8366   }
8367   return true;
8368 }
8369 
8370 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8371                                       SDValue &Chain, SelectionDAG &DAG) const {
8372   // This transform is not correct for converting 0 when rounding mode is set
8373   // to round toward negative infinity which will produce -0.0. So disable
8374   // under strictfp.
8375   if (Node->isStrictFPOpcode())
8376     return false;
8377 
8378   SDValue Src = Node->getOperand(0);
8379   EVT SrcVT = Src.getValueType();
8380   EVT DstVT = Node->getValueType(0);
8381 
8382   // If the input is known to be non-negative and SINT_TO_FP is legal then use
8383   // it.
8384   if (Node->getFlags().hasNonNeg() &&
8385       isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT)) {
8386     Result =
8387         DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8388     return true;
8389   }
8390 
8391   if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8392     return false;
8393 
8394   // Only expand vector types if we have the appropriate vector bit
8395   // operations.
8396   if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8397                            !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
8398                            !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
8399                            !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
8400                            !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
8401     return false;
8402 
8403   SDLoc dl(SDValue(Node, 0));
8404   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8405 
8406   // Implementation of unsigned i64 to f64 following the algorithm in
8407   // __floatundidf in compiler_rt.  This implementation performs rounding
8408   // correctly in all rounding modes with the exception of converting 0
8409   // when rounding toward negative infinity. In that case the fsub will
8410   // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8411   // incorrect.
8412   SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8413   SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8414       llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8415   SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8416   SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8417   SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8418 
8419   SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8420   SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8421   SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8422   SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8423   SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8424   SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8425   SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8426   Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8427   return true;
8428 }
8429 
8430 SDValue
8431 TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8432                                                SelectionDAG &DAG) const {
8433   unsigned Opcode = Node->getOpcode();
8434   assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8435           Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8436          "Wrong opcode");
8437 
8438   if (Node->getFlags().hasNoNaNs()) {
8439     ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8440     EVT VT = Node->getValueType(0);
8441     if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8442          !isOperationLegalOrCustom(ISD::VSELECT, VT)) &&
8443         VT.isVector())
8444       return SDValue();
8445     SDValue Op1 = Node->getOperand(0);
8446     SDValue Op2 = Node->getOperand(1);
8447     SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8448     // Copy FMF flags, but always set the no-signed-zeros flag
8449     // as this is implied by the FMINNUM/FMAXNUM semantics.
8450     SDNodeFlags Flags = Node->getFlags();
8451     Flags.setNoSignedZeros(true);
8452     SelCC->setFlags(Flags);
8453     return SelCC;
8454   }
8455 
8456   return SDValue();
8457 }
8458 
8459 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8460                                               SelectionDAG &DAG) const {
8461   if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8462     return Expanded;
8463 
8464   EVT VT = Node->getValueType(0);
8465   if (VT.isScalableVector())
8466     report_fatal_error(
8467         "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8468 
8469   SDLoc dl(Node);
8470   unsigned NewOp =
8471       Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8472 
8473   if (isOperationLegalOrCustom(NewOp, VT)) {
8474     SDValue Quiet0 = Node->getOperand(0);
8475     SDValue Quiet1 = Node->getOperand(1);
8476 
8477     if (!Node->getFlags().hasNoNaNs()) {
8478       // Insert canonicalizes if it's possible we need to quiet to get correct
8479       // sNaN behavior.
8480       if (!DAG.isKnownNeverSNaN(Quiet0)) {
8481         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8482                              Node->getFlags());
8483       }
8484       if (!DAG.isKnownNeverSNaN(Quiet1)) {
8485         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8486                              Node->getFlags());
8487       }
8488     }
8489 
8490     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8491   }
8492 
8493   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8494   // instead if there are no NaNs and there can't be an incompatible zero
8495   // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8496   if ((Node->getFlags().hasNoNaNs() ||
8497        (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8498         DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8499       (Node->getFlags().hasNoSignedZeros() ||
8500        DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8501        DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8502     unsigned IEEE2018Op =
8503         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8504     if (isOperationLegalOrCustom(IEEE2018Op, VT))
8505       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8506                          Node->getOperand(1), Node->getFlags());
8507   }
8508 
8509   if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8510     return SelCC;
8511 
8512   return SDValue();
8513 }
8514 
8515 SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8516                                                 SelectionDAG &DAG) const {
8517   if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8518     return Expanded;
8519 
8520   SDLoc DL(N);
8521   SDValue LHS = N->getOperand(0);
8522   SDValue RHS = N->getOperand(1);
8523   unsigned Opc = N->getOpcode();
8524   EVT VT = N->getValueType(0);
8525   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8526   bool IsMax = Opc == ISD::FMAXIMUM;
8527   SDNodeFlags Flags = N->getFlags();
8528 
8529   // First, implement comparison not propagating NaN. If no native fmin or fmax
8530   // available, use plain select with setcc instead.
8531   SDValue MinMax;
8532   unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8533   unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8534 
8535   // FIXME: We should probably define fminnum/fmaxnum variants with correct
8536   // signed zero behavior.
8537   bool MinMaxMustRespectOrderedZero = false;
8538 
8539   if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8540     MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8541     MinMaxMustRespectOrderedZero = true;
8542   } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8543     MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8544   } else {
8545     if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8546       return DAG.UnrollVectorOp(N);
8547 
8548     // NaN (if exists) will be propagated later, so orderness doesn't matter.
8549     SDValue Compare =
8550         DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8551     MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8552   }
8553 
8554   // Propagate any NaN of both operands
8555   if (!N->getFlags().hasNoNaNs() &&
8556       (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8557     ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8558                                         APFloat::getNaN(VT.getFltSemantics()));
8559     MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8560                            DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8561   }
8562 
8563   // fminimum/fmaximum requires -0.0 less than +0.0
8564   if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8565       !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8566     SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8567                                   DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8568     SDValue TestZero =
8569         DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8570     SDValue LCmp = DAG.getSelect(
8571         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8572         MinMax, Flags);
8573     SDValue RCmp = DAG.getSelect(
8574         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8575         LCmp, Flags);
8576     MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8577   }
8578 
8579   return MinMax;
8580 }
8581 
8582 SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
8583                                                       SelectionDAG &DAG) const {
8584   SDLoc DL(Node);
8585   SDValue LHS = Node->getOperand(0);
8586   SDValue RHS = Node->getOperand(1);
8587   unsigned Opc = Node->getOpcode();
8588   EVT VT = Node->getValueType(0);
8589   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8590   bool IsMax = Opc == ISD::FMAXIMUMNUM;
8591   const TargetOptions &Options = DAG.getTarget().Options;
8592   SDNodeFlags Flags = Node->getFlags();
8593 
8594   unsigned NewOp =
8595       Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8596 
8597   if (isOperationLegalOrCustom(NewOp, VT)) {
8598     if (!Flags.hasNoNaNs()) {
8599       // Insert canonicalizes if it's possible we need to quiet to get correct
8600       // sNaN behavior.
8601       if (!DAG.isKnownNeverSNaN(LHS)) {
8602         LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8603       }
8604       if (!DAG.isKnownNeverSNaN(RHS)) {
8605         RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8606       }
8607     }
8608 
8609     return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8610   }
8611 
8612   // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8613   // same behaviors for all of other cases: +0.0 vs -0.0 included.
8614   if (Flags.hasNoNaNs() ||
8615       (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8616     unsigned IEEE2019Op =
8617         Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8618     if (isOperationLegalOrCustom(IEEE2019Op, VT))
8619       return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8620   }
8621 
8622   // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8623   // either one for +0.0 vs -0.0.
8624   if ((Flags.hasNoNaNs() ||
8625        (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8626       (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8627        DAG.isKnownNeverZeroFloat(RHS))) {
8628     unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8629     if (isOperationLegalOrCustom(IEEE2008Op, VT))
8630       return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8631   }
8632 
8633   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8634     return DAG.UnrollVectorOp(Node);
8635 
8636   // If only one operand is NaN, override it with another operand.
8637   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8638     LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8639   }
8640   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8641     RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8642   }
8643 
8644   SDValue MinMax =
8645       DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8646   // If MinMax is NaN, let's quiet it.
8647   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
8648       !DAG.isKnownNeverNaN(RHS)) {
8649     MinMax = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
8650   }
8651 
8652   // Fixup signed zero behavior.
8653   if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8654       DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
8655     return MinMax;
8656   }
8657   SDValue TestZero =
8658       DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8659   SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8660                                 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8661   SDValue LCmp = DAG.getSelect(
8662       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8663       MinMax, Flags);
8664   SDValue RCmp = DAG.getSelect(
8665       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8666       Flags);
8667   return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8668 }
8669 
8670 /// Returns a true value if if this FPClassTest can be performed with an ordered
8671 /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8672 /// std::nullopt if it cannot be performed as a compare with 0.
8673 static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8674                                            const fltSemantics &Semantics,
8675                                            const MachineFunction &MF) {
8676   FPClassTest OrderedMask = Test & ~fcNan;
8677   FPClassTest NanTest = Test & fcNan;
8678   bool IsOrdered = NanTest == fcNone;
8679   bool IsUnordered = NanTest == fcNan;
8680 
8681   // Skip cases that are testing for only a qnan or snan.
8682   if (!IsOrdered && !IsUnordered)
8683     return std::nullopt;
8684 
8685   if (OrderedMask == fcZero &&
8686       MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8687     return IsOrdered;
8688   if (OrderedMask == (fcZero | fcSubnormal) &&
8689       MF.getDenormalMode(Semantics).inputsAreZero())
8690     return IsOrdered;
8691   return std::nullopt;
8692 }
8693 
8694 SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8695                                          const FPClassTest OrigTestMask,
8696                                          SDNodeFlags Flags, const SDLoc &DL,
8697                                          SelectionDAG &DAG) const {
8698   EVT OperandVT = Op.getValueType();
8699   assert(OperandVT.isFloatingPoint());
8700   FPClassTest Test = OrigTestMask;
8701 
8702   // Degenerated cases.
8703   if (Test == fcNone)
8704     return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8705   if (Test == fcAllFlags)
8706     return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8707 
8708   // PPC double double is a pair of doubles, of which the higher part determines
8709   // the value class.
8710   if (OperandVT == MVT::ppcf128) {
8711     Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8712                      DAG.getConstant(1, DL, MVT::i32));
8713     OperandVT = MVT::f64;
8714   }
8715 
8716   // Floating-point type properties.
8717   EVT ScalarFloatVT = OperandVT.getScalarType();
8718   const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8719   const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8720   bool IsF80 = (ScalarFloatVT == MVT::f80);
8721 
8722   // Some checks can be implemented using float comparisons, if floating point
8723   // exceptions are ignored.
8724   if (Flags.hasNoFPExcept() &&
8725       isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8726     FPClassTest FPTestMask = Test;
8727     bool IsInvertedFP = false;
8728 
8729     if (FPClassTest InvertedFPCheck =
8730             invertFPClassTestIfSimpler(FPTestMask, true)) {
8731       FPTestMask = InvertedFPCheck;
8732       IsInvertedFP = true;
8733     }
8734 
8735     ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8736     ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8737 
8738     // See if we can fold an | fcNan into an unordered compare.
8739     FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8740 
8741     // Can't fold the ordered check if we're only testing for snan or qnan
8742     // individually.
8743     if ((FPTestMask & fcNan) != fcNan)
8744       OrderedFPTestMask = FPTestMask;
8745 
8746     const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8747 
8748     if (std::optional<bool> IsCmp0 =
8749             isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8750         IsCmp0 && (isCondCodeLegalOrCustom(
8751                       *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8752                       OperandVT.getScalarType().getSimpleVT()))) {
8753 
8754       // If denormals could be implicitly treated as 0, this is not equivalent
8755       // to a compare with 0 since it will also be true for denormals.
8756       return DAG.getSetCC(DL, ResultVT, Op,
8757                           DAG.getConstantFP(0.0, DL, OperandVT),
8758                           *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8759     }
8760 
8761     if (FPTestMask == fcNan &&
8762         isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
8763                                 OperandVT.getScalarType().getSimpleVT()))
8764       return DAG.getSetCC(DL, ResultVT, Op, Op,
8765                           IsInvertedFP ? ISD::SETO : ISD::SETUO);
8766 
8767     bool IsOrderedInf = FPTestMask == fcInf;
8768     if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8769         isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8770                                              : UnorderedCmpOpcode,
8771                                 OperandVT.getScalarType().getSimpleVT()) &&
8772         isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8773         (isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) ||
8774          (OperandVT.isVector() &&
8775           isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) {
8776       // isinf(x) --> fabs(x) == inf
8777       SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8778       SDValue Inf =
8779           DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8780       return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8781                           IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8782     }
8783 
8784     if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8785         isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8786                                           : UnorderedCmpOpcode,
8787                                 OperandVT.getSimpleVT())) {
8788       // isposinf(x) --> x == inf
8789       // isneginf(x) --> x == -inf
8790       // isposinf(x) || nan --> x u== inf
8791       // isneginf(x) || nan --> x u== -inf
8792 
8793       SDValue Inf = DAG.getConstantFP(
8794           APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
8795           OperandVT);
8796       return DAG.getSetCC(DL, ResultVT, Op, Inf,
8797                           IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8798     }
8799 
8800     if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8801       // TODO: Could handle ordered case, but it produces worse code for
8802       // x86. Maybe handle ordered if fabs is free?
8803 
8804       ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8805       ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8806 
8807       if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8808                                   OperandVT.getScalarType().getSimpleVT())) {
8809         // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8810 
8811         // TODO: Maybe only makes sense if fabs is free. Integer test of
8812         // exponent bits seems better for x86.
8813         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8814         SDValue SmallestNormal = DAG.getConstantFP(
8815             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8816         return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8817                             IsOrdered ? OrderedOp : UnorderedOp);
8818       }
8819     }
8820 
8821     if (FPTestMask == fcNormal) {
8822       // TODO: Handle unordered
8823       ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8824       ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
8825 
8826       if (isCondCodeLegalOrCustom(IsFiniteOp,
8827                                   OperandVT.getScalarType().getSimpleVT()) &&
8828           isCondCodeLegalOrCustom(IsNormalOp,
8829                                   OperandVT.getScalarType().getSimpleVT()) &&
8830           isFAbsFree(OperandVT)) {
8831         // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
8832         SDValue Inf =
8833             DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8834         SDValue SmallestNormal = DAG.getConstantFP(
8835             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8836 
8837         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8838         SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
8839         SDValue IsNormal =
8840             DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
8841         unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
8842         return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
8843       }
8844     }
8845   }
8846 
8847   // Some checks may be represented as inversion of simpler check, for example
8848   // "inf|normal|subnormal|zero" => !"nan".
8849   bool IsInverted = false;
8850 
8851   if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8852     Test = InvertedCheck;
8853     IsInverted = true;
8854   }
8855 
8856   // In the general case use integer operations.
8857   unsigned BitSize = OperandVT.getScalarSizeInBits();
8858   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8859   if (OperandVT.isVector())
8860     IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8861                              OperandVT.getVectorElementCount());
8862   SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8863 
8864   // Various masks.
8865   APInt SignBit = APInt::getSignMask(BitSize);
8866   APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
8867   APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8868   const unsigned ExplicitIntBitInF80 = 63;
8869   APInt ExpMask = Inf;
8870   if (IsF80)
8871     ExpMask.clearBit(ExplicitIntBitInF80);
8872   APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8873   APInt QNaNBitMask =
8874       APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8875   APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8876 
8877   SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8878   SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8879   SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8880   SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8881   SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8882   SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8883 
8884   SDValue Res;
8885   const auto appendResult = [&](SDValue PartialRes) {
8886     if (PartialRes) {
8887       if (Res)
8888         Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8889       else
8890         Res = PartialRes;
8891     }
8892   };
8893 
8894   SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8895   const auto getIntBitIsSet = [&]() -> SDValue {
8896     if (!IntBitIsSetV) {
8897       APInt IntBitMask(BitSize, 0);
8898       IntBitMask.setBit(ExplicitIntBitInF80);
8899       SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8900       SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8901       IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8902     }
8903     return IntBitIsSetV;
8904   };
8905 
8906   // Split the value into sign bit and absolute value.
8907   SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8908   SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8909                                DAG.getConstant(0, DL, IntVT), ISD::SETLT);
8910 
8911   // Tests that involve more than one class should be processed first.
8912   SDValue PartialRes;
8913 
8914   if (IsF80)
8915     ; // Detect finite numbers of f80 by checking individual classes because
8916       // they have different settings of the explicit integer bit.
8917   else if ((Test & fcFinite) == fcFinite) {
8918     // finite(V) ==> abs(V) < exp_mask
8919     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8920     Test &= ~fcFinite;
8921   } else if ((Test & fcFinite) == fcPosFinite) {
8922     // finite(V) && V > 0 ==> V < exp_mask
8923     PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8924     Test &= ~fcPosFinite;
8925   } else if ((Test & fcFinite) == fcNegFinite) {
8926     // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8927     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8928     PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8929     Test &= ~fcNegFinite;
8930   }
8931   appendResult(PartialRes);
8932 
8933   if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8934     // fcZero | fcSubnormal => test all exponent bits are 0
8935     // TODO: Handle sign bit specific cases
8936     if (PartialCheck == (fcZero | fcSubnormal)) {
8937       SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8938       SDValue ExpIsZero =
8939           DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8940       appendResult(ExpIsZero);
8941       Test &= ~PartialCheck & fcAllFlags;
8942     }
8943   }
8944 
8945   // Check for individual classes.
8946 
8947   if (unsigned PartialCheck = Test & fcZero) {
8948     if (PartialCheck == fcPosZero)
8949       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8950     else if (PartialCheck == fcZero)
8951       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8952     else // ISD::fcNegZero
8953       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8954     appendResult(PartialRes);
8955   }
8956 
8957   if (unsigned PartialCheck = Test & fcSubnormal) {
8958     // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8959     // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8960     SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8961     SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8962     SDValue VMinusOneV =
8963         DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8964     PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8965     if (PartialCheck == fcNegSubnormal)
8966       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8967     appendResult(PartialRes);
8968   }
8969 
8970   if (unsigned PartialCheck = Test & fcInf) {
8971     if (PartialCheck == fcPosInf)
8972       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8973     else if (PartialCheck == fcInf)
8974       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8975     else { // ISD::fcNegInf
8976       APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8977       SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8978       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8979     }
8980     appendResult(PartialRes);
8981   }
8982 
8983   if (unsigned PartialCheck = Test & fcNan) {
8984     APInt InfWithQnanBit = Inf | QNaNBitMask;
8985     SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8986     if (PartialCheck == fcNan) {
8987       // isnan(V) ==> abs(V) > int(inf)
8988       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8989       if (IsF80) {
8990         // Recognize unsupported values as NaNs for compatibility with glibc.
8991         // In them (exp(V)==0) == int_bit.
8992         SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8993         SDValue ExpIsZero =
8994             DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8995         SDValue IsPseudo =
8996             DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8997         PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8998       }
8999     } else if (PartialCheck == fcQNan) {
9000       // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9001       PartialRes =
9002           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9003     } else { // ISD::fcSNan
9004       // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9005       //                    abs(V) < (unsigned(Inf) | quiet_bit)
9006       SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9007       SDValue IsNotQnan =
9008           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9009       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9010     }
9011     appendResult(PartialRes);
9012   }
9013 
9014   if (unsigned PartialCheck = Test & fcNormal) {
9015     // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9016     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9017     SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9018     SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9019     APInt ExpLimit = ExpMask - ExpLSB;
9020     SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9021     PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9022     if (PartialCheck == fcNegNormal)
9023       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9024     else if (PartialCheck == fcPosNormal) {
9025       SDValue PosSignV =
9026           DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
9027       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9028     }
9029     if (IsF80)
9030       PartialRes =
9031           DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9032     appendResult(PartialRes);
9033   }
9034 
9035   if (!Res)
9036     return DAG.getConstant(IsInverted, DL, ResultVT);
9037   if (IsInverted)
9038     Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
9039   return Res;
9040 }
9041 
9042 // Only expand vector types if we have the appropriate vector bit operations.
9043 static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9044   assert(VT.isVector() && "Expected vector type");
9045   unsigned Len = VT.getScalarSizeInBits();
9046   return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9047          TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
9048          TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
9049          (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9050          TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
9051 }
9052 
9053 SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9054   SDLoc dl(Node);
9055   EVT VT = Node->getValueType(0);
9056   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9057   SDValue Op = Node->getOperand(0);
9058   unsigned Len = VT.getScalarSizeInBits();
9059   assert(VT.isInteger() && "CTPOP not implemented for this type.");
9060 
9061   // TODO: Add support for irregular type lengths.
9062   if (!(Len <= 128 && Len % 8 == 0))
9063     return SDValue();
9064 
9065   // Only expand vector types if we have the appropriate vector bit operations.
9066   if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9067     return SDValue();
9068 
9069   // This is the "best" algorithm from
9070   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9071   SDValue Mask55 =
9072       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9073   SDValue Mask33 =
9074       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9075   SDValue Mask0F =
9076       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9077 
9078   // v = v - ((v >> 1) & 0x55555555...)
9079   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9080                    DAG.getNode(ISD::AND, dl, VT,
9081                                DAG.getNode(ISD::SRL, dl, VT, Op,
9082                                            DAG.getConstant(1, dl, ShVT)),
9083                                Mask55));
9084   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9085   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9086                    DAG.getNode(ISD::AND, dl, VT,
9087                                DAG.getNode(ISD::SRL, dl, VT, Op,
9088                                            DAG.getConstant(2, dl, ShVT)),
9089                                Mask33));
9090   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9091   Op = DAG.getNode(ISD::AND, dl, VT,
9092                    DAG.getNode(ISD::ADD, dl, VT, Op,
9093                                DAG.getNode(ISD::SRL, dl, VT, Op,
9094                                            DAG.getConstant(4, dl, ShVT))),
9095                    Mask0F);
9096 
9097   if (Len <= 8)
9098     return Op;
9099 
9100   // Avoid the multiply if we only have 2 bytes to add.
9101   // TODO: Only doing this for scalars because vectors weren't as obviously
9102   // improved.
9103   if (Len == 16 && !VT.isVector()) {
9104     // v = (v + (v >> 8)) & 0x00FF;
9105     return DAG.getNode(ISD::AND, dl, VT,
9106                      DAG.getNode(ISD::ADD, dl, VT, Op,
9107                                  DAG.getNode(ISD::SRL, dl, VT, Op,
9108                                              DAG.getConstant(8, dl, ShVT))),
9109                      DAG.getConstant(0xFF, dl, VT));
9110   }
9111 
9112   // v = (v * 0x01010101...) >> (Len - 8)
9113   SDValue V;
9114   if (isOperationLegalOrCustomOrPromote(
9115           ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9116     SDValue Mask01 =
9117         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9118     V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9119   } else {
9120     V = Op;
9121     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9122       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9123       V = DAG.getNode(ISD::ADD, dl, VT, V,
9124                       DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9125     }
9126   }
9127   return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9128 }
9129 
9130 SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9131   SDLoc dl(Node);
9132   EVT VT = Node->getValueType(0);
9133   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9134   SDValue Op = Node->getOperand(0);
9135   SDValue Mask = Node->getOperand(1);
9136   SDValue VL = Node->getOperand(2);
9137   unsigned Len = VT.getScalarSizeInBits();
9138   assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9139 
9140   // TODO: Add support for irregular type lengths.
9141   if (!(Len <= 128 && Len % 8 == 0))
9142     return SDValue();
9143 
9144   // This is same algorithm of expandCTPOP from
9145   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9146   SDValue Mask55 =
9147       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9148   SDValue Mask33 =
9149       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9150   SDValue Mask0F =
9151       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9152 
9153   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9154 
9155   // v = v - ((v >> 1) & 0x55555555...)
9156   Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9157                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9158                                  DAG.getConstant(1, dl, ShVT), Mask, VL),
9159                      Mask55, Mask, VL);
9160   Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9161 
9162   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9163   Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9164   Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9165                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9166                                  DAG.getConstant(2, dl, ShVT), Mask, VL),
9167                      Mask33, Mask, VL);
9168   Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9169 
9170   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9171   Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9172                      Mask, VL),
9173   Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9174   Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9175 
9176   if (Len <= 8)
9177     return Op;
9178 
9179   // v = (v * 0x01010101...) >> (Len - 8)
9180   SDValue V;
9181   if (isOperationLegalOrCustomOrPromote(
9182           ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9183     SDValue Mask01 =
9184         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9185     V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9186   } else {
9187     V = Op;
9188     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9189       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9190       V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9191                       DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9192                       Mask, VL);
9193     }
9194   }
9195   return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9196                      Mask, VL);
9197 }
9198 
9199 SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9200   SDLoc dl(Node);
9201   EVT VT = Node->getValueType(0);
9202   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9203   SDValue Op = Node->getOperand(0);
9204   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9205 
9206   // If the non-ZERO_UNDEF version is supported we can use that instead.
9207   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9208       isOperationLegalOrCustom(ISD::CTLZ, VT))
9209     return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9210 
9211   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9212   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
9213     EVT SetCCVT =
9214         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9215     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9216     SDValue Zero = DAG.getConstant(0, dl, VT);
9217     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9218     return DAG.getSelect(dl, VT, SrcIsZero,
9219                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9220   }
9221 
9222   // Only expand vector types if we have the appropriate vector bit operations.
9223   // This includes the operations needed to expand CTPOP if it isn't supported.
9224   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9225                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9226                          !canExpandVectorCTPOP(*this, VT)) ||
9227                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
9228                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
9229     return SDValue();
9230 
9231   // for now, we do this:
9232   // x = x | (x >> 1);
9233   // x = x | (x >> 2);
9234   // ...
9235   // x = x | (x >>16);
9236   // x = x | (x >>32); // for 64-bit input
9237   // return popcount(~x);
9238   //
9239   // Ref: "Hacker's Delight" by Henry Warren
9240   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9241     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9242     Op = DAG.getNode(ISD::OR, dl, VT, Op,
9243                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9244   }
9245   Op = DAG.getNOT(dl, Op, VT);
9246   return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9247 }
9248 
9249 SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9250   SDLoc dl(Node);
9251   EVT VT = Node->getValueType(0);
9252   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9253   SDValue Op = Node->getOperand(0);
9254   SDValue Mask = Node->getOperand(1);
9255   SDValue VL = Node->getOperand(2);
9256   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9257 
9258   // do this:
9259   // x = x | (x >> 1);
9260   // x = x | (x >> 2);
9261   // ...
9262   // x = x | (x >>16);
9263   // x = x | (x >>32); // for 64-bit input
9264   // return popcount(~x);
9265   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9266     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9267     Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9268                      DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9269                      VL);
9270   }
9271   Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9272                    Mask, VL);
9273   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9274 }
9275 
9276 SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9277                                         const SDLoc &DL, EVT VT, SDValue Op,
9278                                         unsigned BitWidth) const {
9279   if (BitWidth != 32 && BitWidth != 64)
9280     return SDValue();
9281   APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9282                                   : APInt(64, 0x0218A392CD3D5DBFULL);
9283   const DataLayout &TD = DAG.getDataLayout();
9284   MachinePointerInfo PtrInfo =
9285       MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
9286   unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9287   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9288   SDValue Lookup = DAG.getNode(
9289       ISD::SRL, DL, VT,
9290       DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9291                   DAG.getConstant(DeBruijn, DL, VT)),
9292       DAG.getConstant(ShiftAmt, DL, VT));
9293   Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
9294 
9295   SmallVector<uint8_t> Table(BitWidth, 0);
9296   for (unsigned i = 0; i < BitWidth; i++) {
9297     APInt Shl = DeBruijn.shl(i);
9298     APInt Lshr = Shl.lshr(ShiftAmt);
9299     Table[Lshr.getZExtValue()] = i;
9300   }
9301 
9302   // Create a ConstantArray in Constant Pool
9303   auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9304   SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9305                                       TD.getPrefTypeAlign(CA->getType()));
9306   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9307                                    DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9308                                    PtrInfo, MVT::i8);
9309   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9310     return ExtLoad;
9311 
9312   EVT SetCCVT =
9313       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9314   SDValue Zero = DAG.getConstant(0, DL, VT);
9315   SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9316   return DAG.getSelect(DL, VT, SrcIsZero,
9317                        DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9318 }
9319 
9320 SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9321   SDLoc dl(Node);
9322   EVT VT = Node->getValueType(0);
9323   SDValue Op = Node->getOperand(0);
9324   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9325 
9326   // If the non-ZERO_UNDEF version is supported we can use that instead.
9327   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9328       isOperationLegalOrCustom(ISD::CTTZ, VT))
9329     return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9330 
9331   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9332   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
9333     EVT SetCCVT =
9334         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9335     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9336     SDValue Zero = DAG.getConstant(0, dl, VT);
9337     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9338     return DAG.getSelect(dl, VT, SrcIsZero,
9339                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9340   }
9341 
9342   // Only expand vector types if we have the appropriate vector bit operations.
9343   // This includes the operations needed to expand CTPOP if it isn't supported.
9344   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9345                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9346                          !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
9347                          !canExpandVectorCTPOP(*this, VT)) ||
9348                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
9349                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
9350                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9351     return SDValue();
9352 
9353   // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9354   if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9355       !isOperationLegal(ISD::CTLZ, VT))
9356     if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9357       return V;
9358 
9359   // for now, we use: { return popcount(~x & (x - 1)); }
9360   // unless the target has ctlz but not ctpop, in which case we use:
9361   // { return 32 - nlz(~x & (x-1)); }
9362   // Ref: "Hacker's Delight" by Henry Warren
9363   SDValue Tmp = DAG.getNode(
9364       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9365       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9366 
9367   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9368   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
9369     return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9370                        DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9371   }
9372 
9373   return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9374 }
9375 
9376 SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9377   SDValue Op = Node->getOperand(0);
9378   SDValue Mask = Node->getOperand(1);
9379   SDValue VL = Node->getOperand(2);
9380   SDLoc dl(Node);
9381   EVT VT = Node->getValueType(0);
9382 
9383   // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9384   SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9385                             DAG.getAllOnesConstant(dl, VT), Mask, VL);
9386   SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9387                                  DAG.getConstant(1, dl, VT), Mask, VL);
9388   SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9389   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9390 }
9391 
9392 SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9393                                              SelectionDAG &DAG) const {
9394   // %cond = to_bool_vec %source
9395   // %splat = splat /*val=*/VL
9396   // %tz = step_vector
9397   // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9398   // %r = vp.reduce.umin %v
9399   SDLoc DL(N);
9400   SDValue Source = N->getOperand(0);
9401   SDValue Mask = N->getOperand(1);
9402   SDValue EVL = N->getOperand(2);
9403   EVT SrcVT = Source.getValueType();
9404   EVT ResVT = N->getValueType(0);
9405   EVT ResVecVT =
9406       EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9407 
9408   // Convert to boolean vector.
9409   if (SrcVT.getScalarType() != MVT::i1) {
9410     SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9411     SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9412                              SrcVT.getVectorElementCount());
9413     Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9414                          DAG.getCondCode(ISD::SETNE), Mask, EVL);
9415   }
9416 
9417   SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9418   SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9419   SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9420   SDValue Select =
9421       DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9422   return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9423 }
9424 
9425 SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9426                                   bool IsNegative) const {
9427   SDLoc dl(N);
9428   EVT VT = N->getValueType(0);
9429   SDValue Op = N->getOperand(0);
9430 
9431   // abs(x) -> smax(x,sub(0,x))
9432   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9433       isOperationLegal(ISD::SMAX, VT)) {
9434     SDValue Zero = DAG.getConstant(0, dl, VT);
9435     Op = DAG.getFreeze(Op);
9436     return DAG.getNode(ISD::SMAX, dl, VT, Op,
9437                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9438   }
9439 
9440   // abs(x) -> umin(x,sub(0,x))
9441   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9442       isOperationLegal(ISD::UMIN, VT)) {
9443     SDValue Zero = DAG.getConstant(0, dl, VT);
9444     Op = DAG.getFreeze(Op);
9445     return DAG.getNode(ISD::UMIN, dl, VT, Op,
9446                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9447   }
9448 
9449   // 0 - abs(x) -> smin(x, sub(0,x))
9450   if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9451       isOperationLegal(ISD::SMIN, VT)) {
9452     SDValue Zero = DAG.getConstant(0, dl, VT);
9453     Op = DAG.getFreeze(Op);
9454     return DAG.getNode(ISD::SMIN, dl, VT, Op,
9455                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9456   }
9457 
9458   // Only expand vector types if we have the appropriate vector operations.
9459   if (VT.isVector() &&
9460       (!isOperationLegalOrCustom(ISD::SRA, VT) ||
9461        (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9462        (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9463        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9464     return SDValue();
9465 
9466   Op = DAG.getFreeze(Op);
9467   SDValue Shift = DAG.getNode(
9468       ISD::SRA, dl, VT, Op,
9469       DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9470   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9471 
9472   // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9473   if (!IsNegative)
9474     return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9475 
9476   // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9477   return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9478 }
9479 
9480 SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9481   SDLoc dl(N);
9482   EVT VT = N->getValueType(0);
9483   SDValue LHS = DAG.getFreeze(N->getOperand(0));
9484   SDValue RHS = DAG.getFreeze(N->getOperand(1));
9485   bool IsSigned = N->getOpcode() == ISD::ABDS;
9486 
9487   // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9488   // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9489   unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9490   unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9491   if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9492     SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9493     SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9494     return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9495   }
9496 
9497   // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9498   if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9499     return DAG.getNode(ISD::OR, dl, VT,
9500                        DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9501                        DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9502 
9503   // If the subtract doesn't overflow then just use abs(sub())
9504   // NOTE: don't use frozen operands for value tracking.
9505   bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9506                        DAG.SignBitIsZero(N->getOperand(0));
9507 
9508   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
9509                              N->getOperand(1)))
9510     return DAG.getNode(ISD::ABS, dl, VT,
9511                        DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9512 
9513   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
9514                              N->getOperand(0)))
9515     return DAG.getNode(ISD::ABS, dl, VT,
9516                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9517 
9518   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9519   ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9520   SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9521 
9522   // Branchless expansion iff cmp result is allbits:
9523   // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9524   // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9525   if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9526     SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9527     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9528     return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9529   }
9530 
9531   // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9532   // flag if the (scalar) type is illegal as this is more likely to legalize
9533   // cleanly:
9534   // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9535   if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9536     SDValue USubO =
9537         DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9538     SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9539     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9540     return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9541   }
9542 
9543   // FIXME: Should really try to split the vector in case it's legal on a
9544   // subvector.
9545   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9546     return DAG.UnrollVectorOp(N);
9547 
9548   // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9549   // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9550   return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9551                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9552 }
9553 
9554 SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
9555   SDLoc dl(N);
9556   EVT VT = N->getValueType(0);
9557   SDValue LHS = N->getOperand(0);
9558   SDValue RHS = N->getOperand(1);
9559 
9560   unsigned Opc = N->getOpcode();
9561   bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9562   bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9563   unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9564   unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9565   unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9566   unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9567   assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9568           Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9569          "Unknown AVG node");
9570 
9571   // If the operands are already extended, we can add+shift.
9572   bool IsExt =
9573       (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9574        DAG.ComputeNumSignBits(RHS) >= 2) ||
9575       (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9576        DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9577   if (IsExt) {
9578     SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9579     if (!IsFloor)
9580       Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9581     return DAG.getNode(ShiftOpc, dl, VT, Sum,
9582                        DAG.getShiftAmountConstant(1, VT, dl));
9583   }
9584 
9585   // For scalars, see if we can efficiently extend/truncate to use add+shift.
9586   if (VT.isScalarInteger()) {
9587     unsigned BW = VT.getScalarSizeInBits();
9588     EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9589     if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9590       LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9591       RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9592       SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9593       if (!IsFloor)
9594         Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9595                           DAG.getConstant(1, dl, ExtVT));
9596       // Just use SRL as we will be truncating away the extended sign bits.
9597       Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9598                         DAG.getShiftAmountConstant(1, ExtVT, dl));
9599       return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9600     }
9601   }
9602 
9603   // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9604   if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9605     SDValue UAddWithOverflow =
9606         DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9607 
9608     SDValue Sum = UAddWithOverflow.getValue(0);
9609     SDValue Overflow = UAddWithOverflow.getValue(1);
9610 
9611     // Right shift the sum by 1
9612     SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9613                                   DAG.getShiftAmountConstant(1, VT, dl));
9614 
9615     SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9616     SDValue OverflowShl = DAG.getNode(
9617         ISD::SHL, dl, VT, ZeroExtOverflow,
9618         DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9619 
9620     return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9621   }
9622 
9623   // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9624   // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9625   // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9626   // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9627   LHS = DAG.getFreeze(LHS);
9628   RHS = DAG.getFreeze(RHS);
9629   SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9630   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9631   SDValue Shift =
9632       DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9633   return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9634 }
9635 
9636 SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9637   SDLoc dl(N);
9638   EVT VT = N->getValueType(0);
9639   SDValue Op = N->getOperand(0);
9640 
9641   if (!VT.isSimple())
9642     return SDValue();
9643 
9644   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9645   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9646   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9647   default:
9648     return SDValue();
9649   case MVT::i16:
9650     // Use a rotate by 8. This can be further expanded if necessary.
9651     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9652   case MVT::i32:
9653     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9654     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9655                        DAG.getConstant(0xFF00, dl, VT));
9656     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9657     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9658     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9659     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9660     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9661     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9662     return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9663   case MVT::i64:
9664     Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9665     Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9666                        DAG.getConstant(255ULL<<8, dl, VT));
9667     Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9668     Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9669                        DAG.getConstant(255ULL<<16, dl, VT));
9670     Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9671     Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9672                        DAG.getConstant(255ULL<<24, dl, VT));
9673     Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9674     Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9675     Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9676                        DAG.getConstant(255ULL<<24, dl, VT));
9677     Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9678     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9679                        DAG.getConstant(255ULL<<16, dl, VT));
9680     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9681     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9682                        DAG.getConstant(255ULL<<8, dl, VT));
9683     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9684     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9685     Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9686     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9687     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9688     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9689     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9690     return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9691   }
9692 }
9693 
9694 SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9695   SDLoc dl(N);
9696   EVT VT = N->getValueType(0);
9697   SDValue Op = N->getOperand(0);
9698   SDValue Mask = N->getOperand(1);
9699   SDValue EVL = N->getOperand(2);
9700 
9701   if (!VT.isSimple())
9702     return SDValue();
9703 
9704   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9705   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9706   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9707   default:
9708     return SDValue();
9709   case MVT::i16:
9710     Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9711                        Mask, EVL);
9712     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9713                        Mask, EVL);
9714     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9715   case MVT::i32:
9716     Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9717                        Mask, EVL);
9718     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9719                        Mask, EVL);
9720     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9721                        Mask, EVL);
9722     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9723                        Mask, EVL);
9724     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9725                        DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9726     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9727                        Mask, EVL);
9728     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9729     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9730     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9731   case MVT::i64:
9732     Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9733                        Mask, EVL);
9734     Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9735                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9736     Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9737                        Mask, EVL);
9738     Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9739                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9740     Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9741                        Mask, EVL);
9742     Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9743                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9744     Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9745                        Mask, EVL);
9746     Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9747                        Mask, EVL);
9748     Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9749                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9750     Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9751                        Mask, EVL);
9752     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9753                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9754     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9755                        Mask, EVL);
9756     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9757                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9758     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9759                        Mask, EVL);
9760     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9761     Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9762     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9763     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9764     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9765     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9766     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9767   }
9768 }
9769 
9770 SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9771   SDLoc dl(N);
9772   EVT VT = N->getValueType(0);
9773   SDValue Op = N->getOperand(0);
9774   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9775   unsigned Sz = VT.getScalarSizeInBits();
9776 
9777   SDValue Tmp, Tmp2, Tmp3;
9778 
9779   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9780   // and finally the i1 pairs.
9781   // TODO: We can easily support i4/i2 legal types if any target ever does.
9782   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9783     // Create the masks - repeating the pattern every byte.
9784     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9785     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9786     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9787 
9788     // BSWAP if the type is wider than a single byte.
9789     Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9790 
9791     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9792     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9793     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9794     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9795     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9796     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9797 
9798     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9799     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9800     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9801     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9802     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9803     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9804 
9805     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9806     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9807     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9808     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9809     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9810     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9811     return Tmp;
9812   }
9813 
9814   Tmp = DAG.getConstant(0, dl, VT);
9815   for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9816     if (I < J)
9817       Tmp2 =
9818           DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9819     else
9820       Tmp2 =
9821           DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9822 
9823     APInt Shift = APInt::getOneBitSet(Sz, J);
9824     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9825     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9826   }
9827 
9828   return Tmp;
9829 }
9830 
9831 SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9832   assert(N->getOpcode() == ISD::VP_BITREVERSE);
9833 
9834   SDLoc dl(N);
9835   EVT VT = N->getValueType(0);
9836   SDValue Op = N->getOperand(0);
9837   SDValue Mask = N->getOperand(1);
9838   SDValue EVL = N->getOperand(2);
9839   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9840   unsigned Sz = VT.getScalarSizeInBits();
9841 
9842   SDValue Tmp, Tmp2, Tmp3;
9843 
9844   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9845   // and finally the i1 pairs.
9846   // TODO: We can easily support i4/i2 legal types if any target ever does.
9847   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9848     // Create the masks - repeating the pattern every byte.
9849     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9850     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9851     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9852 
9853     // BSWAP if the type is wider than a single byte.
9854     Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9855 
9856     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9857     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9858                        Mask, EVL);
9859     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9860                        DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9861     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9862                        Mask, EVL);
9863     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9864                        Mask, EVL);
9865     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9866 
9867     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9868     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9869                        Mask, EVL);
9870     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9871                        DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9872     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9873                        Mask, EVL);
9874     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9875                        Mask, EVL);
9876     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9877 
9878     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9879     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9880                        Mask, EVL);
9881     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9882                        DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9883     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9884                        Mask, EVL);
9885     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9886                        Mask, EVL);
9887     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9888     return Tmp;
9889   }
9890   return SDValue();
9891 }
9892 
9893 std::pair<SDValue, SDValue>
9894 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9895                                     SelectionDAG &DAG) const {
9896   SDLoc SL(LD);
9897   SDValue Chain = LD->getChain();
9898   SDValue BasePTR = LD->getBasePtr();
9899   EVT SrcVT = LD->getMemoryVT();
9900   EVT DstVT = LD->getValueType(0);
9901   ISD::LoadExtType ExtType = LD->getExtensionType();
9902 
9903   if (SrcVT.isScalableVector())
9904     report_fatal_error("Cannot scalarize scalable vector loads");
9905 
9906   unsigned NumElem = SrcVT.getVectorNumElements();
9907 
9908   EVT SrcEltVT = SrcVT.getScalarType();
9909   EVT DstEltVT = DstVT.getScalarType();
9910 
9911   // A vector must always be stored in memory as-is, i.e. without any padding
9912   // between the elements, since various code depend on it, e.g. in the
9913   // handling of a bitcast of a vector type to int, which may be done with a
9914   // vector store followed by an integer load. A vector that does not have
9915   // elements that are byte-sized must therefore be stored as an integer
9916   // built out of the extracted vector elements.
9917   if (!SrcEltVT.isByteSized()) {
9918     unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9919     EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9920 
9921     unsigned NumSrcBits = SrcVT.getSizeInBits();
9922     EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9923 
9924     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9925     SDValue SrcEltBitMask = DAG.getConstant(
9926         APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9927 
9928     // Load the whole vector and avoid masking off the top bits as it makes
9929     // the codegen worse.
9930     SDValue Load =
9931         DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9932                        LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9933                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
9934 
9935     SmallVector<SDValue, 8> Vals;
9936     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9937       unsigned ShiftIntoIdx =
9938           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9939       SDValue ShiftAmount = DAG.getShiftAmountConstant(
9940           ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
9941       SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9942       SDValue Elt =
9943           DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9944       SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9945 
9946       if (ExtType != ISD::NON_EXTLOAD) {
9947         unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9948         Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9949       }
9950 
9951       Vals.push_back(Scalar);
9952     }
9953 
9954     SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9955     return std::make_pair(Value, Load.getValue(1));
9956   }
9957 
9958   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9959   assert(SrcEltVT.isByteSized());
9960 
9961   SmallVector<SDValue, 8> Vals;
9962   SmallVector<SDValue, 8> LoadChains;
9963 
9964   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9965     SDValue ScalarLoad =
9966         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9967                        LD->getPointerInfo().getWithOffset(Idx * Stride),
9968                        SrcEltVT, LD->getOriginalAlign(),
9969                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
9970 
9971     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9972 
9973     Vals.push_back(ScalarLoad.getValue(0));
9974     LoadChains.push_back(ScalarLoad.getValue(1));
9975   }
9976 
9977   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9978   SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9979 
9980   return std::make_pair(Value, NewChain);
9981 }
9982 
9983 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9984                                              SelectionDAG &DAG) const {
9985   SDLoc SL(ST);
9986 
9987   SDValue Chain = ST->getChain();
9988   SDValue BasePtr = ST->getBasePtr();
9989   SDValue Value = ST->getValue();
9990   EVT StVT = ST->getMemoryVT();
9991 
9992   if (StVT.isScalableVector())
9993     report_fatal_error("Cannot scalarize scalable vector stores");
9994 
9995   // The type of the data we want to save
9996   EVT RegVT = Value.getValueType();
9997   EVT RegSclVT = RegVT.getScalarType();
9998 
9999   // The type of data as saved in memory.
10000   EVT MemSclVT = StVT.getScalarType();
10001 
10002   unsigned NumElem = StVT.getVectorNumElements();
10003 
10004   // A vector must always be stored in memory as-is, i.e. without any padding
10005   // between the elements, since various code depend on it, e.g. in the
10006   // handling of a bitcast of a vector type to int, which may be done with a
10007   // vector store followed by an integer load. A vector that does not have
10008   // elements that are byte-sized must therefore be stored as an integer
10009   // built out of the extracted vector elements.
10010   if (!MemSclVT.isByteSized()) {
10011     unsigned NumBits = StVT.getSizeInBits();
10012     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10013 
10014     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10015 
10016     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10017       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10018                                 DAG.getVectorIdxConstant(Idx, SL));
10019       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10020       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10021       unsigned ShiftIntoIdx =
10022           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10023       SDValue ShiftAmount =
10024           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10025       SDValue ShiftedElt =
10026           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10027       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10028     }
10029 
10030     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10031                         ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10032                         ST->getAAInfo());
10033   }
10034 
10035   // Store Stride in bytes
10036   unsigned Stride = MemSclVT.getSizeInBits() / 8;
10037   assert(Stride && "Zero stride!");
10038   // Extract each of the elements from the original vector and save them into
10039   // memory individually.
10040   SmallVector<SDValue, 8> Stores;
10041   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10042     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10043                               DAG.getVectorIdxConstant(Idx, SL));
10044 
10045     SDValue Ptr =
10046         DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10047 
10048     // This scalar TruncStore may be illegal, but we legalize it later.
10049     SDValue Store = DAG.getTruncStore(
10050         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10051         MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10052         ST->getAAInfo());
10053 
10054     Stores.push_back(Store);
10055   }
10056 
10057   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10058 }
10059 
10060 std::pair<SDValue, SDValue>
10061 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
10062   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10063          "unaligned indexed loads not implemented!");
10064   SDValue Chain = LD->getChain();
10065   SDValue Ptr = LD->getBasePtr();
10066   EVT VT = LD->getValueType(0);
10067   EVT LoadedVT = LD->getMemoryVT();
10068   SDLoc dl(LD);
10069   auto &MF = DAG.getMachineFunction();
10070 
10071   if (VT.isFloatingPoint() || VT.isVector()) {
10072     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10073     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10074       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10075           LoadedVT.isVector()) {
10076         // Scalarize the load and let the individual components be handled.
10077         return scalarizeVectorLoad(LD, DAG);
10078       }
10079 
10080       // Expand to a (misaligned) integer load of the same size,
10081       // then bitconvert to floating point or vector.
10082       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10083                                     LD->getMemOperand());
10084       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10085       if (LoadedVT != VT)
10086         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10087                              ISD::ANY_EXTEND, dl, VT, Result);
10088 
10089       return std::make_pair(Result, newLoad.getValue(1));
10090     }
10091 
10092     // Copy the value to a (aligned) stack slot using (unaligned) integer
10093     // loads and stores, then do a (aligned) load from the stack slot.
10094     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10095     unsigned LoadedBytes = LoadedVT.getStoreSize();
10096     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10097     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10098 
10099     // Make sure the stack slot is also aligned for the register type.
10100     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10101     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10102     SmallVector<SDValue, 8> Stores;
10103     SDValue StackPtr = StackBase;
10104     unsigned Offset = 0;
10105 
10106     EVT PtrVT = Ptr.getValueType();
10107     EVT StackPtrVT = StackPtr.getValueType();
10108 
10109     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10110     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10111 
10112     // Do all but one copies using the full register width.
10113     for (unsigned i = 1; i < NumRegs; i++) {
10114       // Load one integer register's worth from the original location.
10115       SDValue Load = DAG.getLoad(
10116           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10117           LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10118           LD->getAAInfo());
10119       // Follow the load with a store to the stack slot.  Remember the store.
10120       Stores.push_back(DAG.getStore(
10121           Load.getValue(1), dl, Load, StackPtr,
10122           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10123       // Increment the pointers.
10124       Offset += RegBytes;
10125 
10126       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10127       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10128     }
10129 
10130     // The last copy may be partial.  Do an extending load.
10131     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10132                                   8 * (LoadedBytes - Offset));
10133     SDValue Load =
10134         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10135                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
10136                        LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10137                        LD->getAAInfo());
10138     // Follow the load with a store to the stack slot.  Remember the store.
10139     // On big-endian machines this requires a truncating store to ensure
10140     // that the bits end up in the right place.
10141     Stores.push_back(DAG.getTruncStore(
10142         Load.getValue(1), dl, Load, StackPtr,
10143         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10144 
10145     // The order of the stores doesn't matter - say it with a TokenFactor.
10146     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10147 
10148     // Finally, perform the original load only redirected to the stack slot.
10149     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10150                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10151                           LoadedVT);
10152 
10153     // Callers expect a MERGE_VALUES node.
10154     return std::make_pair(Load, TF);
10155   }
10156 
10157   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10158          "Unaligned load of unsupported type.");
10159 
10160   // Compute the new VT that is half the size of the old one.  This is an
10161   // integer MVT.
10162   unsigned NumBits = LoadedVT.getSizeInBits();
10163   EVT NewLoadedVT;
10164   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10165   NumBits >>= 1;
10166 
10167   Align Alignment = LD->getOriginalAlign();
10168   unsigned IncrementSize = NumBits / 8;
10169   ISD::LoadExtType HiExtType = LD->getExtensionType();
10170 
10171   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10172   if (HiExtType == ISD::NON_EXTLOAD)
10173     HiExtType = ISD::ZEXTLOAD;
10174 
10175   // Load the value in two parts
10176   SDValue Lo, Hi;
10177   if (DAG.getDataLayout().isLittleEndian()) {
10178     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10179                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10180                         LD->getAAInfo());
10181 
10182     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10183     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10184                         LD->getPointerInfo().getWithOffset(IncrementSize),
10185                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10186                         LD->getAAInfo());
10187   } else {
10188     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10189                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10190                         LD->getAAInfo());
10191 
10192     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10193     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10194                         LD->getPointerInfo().getWithOffset(IncrementSize),
10195                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10196                         LD->getAAInfo());
10197   }
10198 
10199   // aggregate the two parts
10200   SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10201   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10202   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10203 
10204   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10205                              Hi.getValue(1));
10206 
10207   return std::make_pair(Result, TF);
10208 }
10209 
10210 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10211                                              SelectionDAG &DAG) const {
10212   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10213          "unaligned indexed stores not implemented!");
10214   SDValue Chain = ST->getChain();
10215   SDValue Ptr = ST->getBasePtr();
10216   SDValue Val = ST->getValue();
10217   EVT VT = Val.getValueType();
10218   Align Alignment = ST->getOriginalAlign();
10219   auto &MF = DAG.getMachineFunction();
10220   EVT StoreMemVT = ST->getMemoryVT();
10221 
10222   SDLoc dl(ST);
10223   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10224     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10225     if (isTypeLegal(intVT)) {
10226       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10227           StoreMemVT.isVector()) {
10228         // Scalarize the store and let the individual components be handled.
10229         SDValue Result = scalarizeVectorStore(ST, DAG);
10230         return Result;
10231       }
10232       // Expand to a bitconvert of the value to the integer type of the
10233       // same size, then a (misaligned) int store.
10234       // FIXME: Does not handle truncating floating point stores!
10235       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10236       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10237                             Alignment, ST->getMemOperand()->getFlags());
10238       return Result;
10239     }
10240     // Do a (aligned) store to a stack slot, then copy from the stack slot
10241     // to the final destination using (unaligned) integer loads and stores.
10242     MVT RegVT = getRegisterType(
10243         *DAG.getContext(),
10244         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10245     EVT PtrVT = Ptr.getValueType();
10246     unsigned StoredBytes = StoreMemVT.getStoreSize();
10247     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10248     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10249 
10250     // Make sure the stack slot is also aligned for the register type.
10251     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10252     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10253 
10254     // Perform the original store, only redirected to the stack slot.
10255     SDValue Store = DAG.getTruncStore(
10256         Chain, dl, Val, StackPtr,
10257         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10258 
10259     EVT StackPtrVT = StackPtr.getValueType();
10260 
10261     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10262     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10263     SmallVector<SDValue, 8> Stores;
10264     unsigned Offset = 0;
10265 
10266     // Do all but one copies using the full register width.
10267     for (unsigned i = 1; i < NumRegs; i++) {
10268       // Load one integer register's worth from the stack slot.
10269       SDValue Load = DAG.getLoad(
10270           RegVT, dl, Store, StackPtr,
10271           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10272       // Store it to the final location.  Remember the store.
10273       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10274                                     ST->getPointerInfo().getWithOffset(Offset),
10275                                     ST->getOriginalAlign(),
10276                                     ST->getMemOperand()->getFlags()));
10277       // Increment the pointers.
10278       Offset += RegBytes;
10279       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10280       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10281     }
10282 
10283     // The last store may be partial.  Do a truncating store.  On big-endian
10284     // machines this requires an extending load from the stack slot to ensure
10285     // that the bits are in the right place.
10286     EVT LoadMemVT =
10287         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10288 
10289     // Load from the stack slot.
10290     SDValue Load = DAG.getExtLoad(
10291         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10292         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10293 
10294     Stores.push_back(
10295         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10296                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10297                           ST->getOriginalAlign(),
10298                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10299     // The order of the stores doesn't matter - say it with a TokenFactor.
10300     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10301     return Result;
10302   }
10303 
10304   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10305          "Unaligned store of unknown type.");
10306   // Get the half-size VT
10307   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10308   unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10309   unsigned IncrementSize = NumBits / 8;
10310 
10311   // Divide the stored value in two parts.
10312   SDValue ShiftAmount =
10313       DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10314   SDValue Lo = Val;
10315   // If Val is a constant, replace the upper bits with 0. The SRL will constant
10316   // fold and not use the upper bits. A smaller constant may be easier to
10317   // materialize.
10318   if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10319     Lo = DAG.getNode(
10320         ISD::AND, dl, VT, Lo,
10321         DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10322                         VT));
10323   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10324 
10325   // Store the two parts
10326   SDValue Store1, Store2;
10327   Store1 = DAG.getTruncStore(Chain, dl,
10328                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10329                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10330                              ST->getMemOperand()->getFlags());
10331 
10332   Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10333   Store2 = DAG.getTruncStore(
10334       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10335       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10336       ST->getMemOperand()->getFlags(), ST->getAAInfo());
10337 
10338   SDValue Result =
10339       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10340   return Result;
10341 }
10342 
10343 SDValue
10344 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10345                                        const SDLoc &DL, EVT DataVT,
10346                                        SelectionDAG &DAG,
10347                                        bool IsCompressedMemory) const {
10348   SDValue Increment;
10349   EVT AddrVT = Addr.getValueType();
10350   EVT MaskVT = Mask.getValueType();
10351   assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10352          "Incompatible types of Data and Mask");
10353   if (IsCompressedMemory) {
10354     if (DataVT.isScalableVector())
10355       report_fatal_error(
10356           "Cannot currently handle compressed memory with scalable vectors");
10357     // Incrementing the pointer according to number of '1's in the mask.
10358     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10359     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10360     if (MaskIntVT.getSizeInBits() < 32) {
10361       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10362       MaskIntVT = MVT::i32;
10363     }
10364 
10365     // Count '1's with POPCNT.
10366     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10367     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10368     // Scale is an element size in bytes.
10369     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10370                                     AddrVT);
10371     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10372   } else if (DataVT.isScalableVector()) {
10373     Increment = DAG.getVScale(DL, AddrVT,
10374                               APInt(AddrVT.getFixedSizeInBits(),
10375                                     DataVT.getStoreSize().getKnownMinValue()));
10376   } else
10377     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10378 
10379   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10380 }
10381 
10382 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10383                                        EVT VecVT, const SDLoc &dl,
10384                                        ElementCount SubEC) {
10385   assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10386          "Cannot index a scalable vector within a fixed-width vector");
10387 
10388   unsigned NElts = VecVT.getVectorMinNumElements();
10389   unsigned NumSubElts = SubEC.getKnownMinValue();
10390   EVT IdxVT = Idx.getValueType();
10391 
10392   if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10393     // If this is a constant index and we know the value plus the number of the
10394     // elements in the subvector minus one is less than the minimum number of
10395     // elements then it's safe to return Idx.
10396     if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10397       if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10398         return Idx;
10399     SDValue VS =
10400         DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10401     unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10402     SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10403                               DAG.getConstant(NumSubElts, dl, IdxVT));
10404     return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10405   }
10406   if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10407     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10408     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10409                        DAG.getConstant(Imm, dl, IdxVT));
10410   }
10411   unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10412   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10413                      DAG.getConstant(MaxIndex, dl, IdxVT));
10414 }
10415 
10416 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
10417                                                 SDValue VecPtr, EVT VecVT,
10418                                                 SDValue Index) const {
10419   return getVectorSubVecPointer(
10420       DAG, VecPtr, VecVT,
10421       EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
10422       Index);
10423 }
10424 
10425 SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
10426                                                SDValue VecPtr, EVT VecVT,
10427                                                EVT SubVecVT,
10428                                                SDValue Index) const {
10429   SDLoc dl(Index);
10430   // Make sure the index type is big enough to compute in.
10431   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10432 
10433   EVT EltVT = VecVT.getVectorElementType();
10434 
10435   // Calculate the element offset and add it to the pointer.
10436   unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10437   assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10438          "Converting bits to bytes lost precision");
10439   assert(SubVecVT.getVectorElementType() == EltVT &&
10440          "Sub-vector must be a vector with matching element type");
10441   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10442                                   SubVecVT.getVectorElementCount());
10443 
10444   EVT IdxVT = Index.getValueType();
10445   if (SubVecVT.isScalableVector())
10446     Index =
10447         DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10448                     DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10449 
10450   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10451                       DAG.getConstant(EltSize, dl, IdxVT));
10452   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10453 }
10454 
10455 //===----------------------------------------------------------------------===//
10456 // Implementation of Emulated TLS Model
10457 //===----------------------------------------------------------------------===//
10458 
10459 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10460                                                 SelectionDAG &DAG) const {
10461   // Access to address of TLS varialbe xyz is lowered to a function call:
10462   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10463   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10464   PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10465   SDLoc dl(GA);
10466 
10467   ArgListTy Args;
10468   ArgListEntry Entry;
10469   const GlobalValue *GV =
10470       cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10471   SmallString<32> NameString("__emutls_v.");
10472   NameString += GV->getName();
10473   StringRef EmuTlsVarName(NameString);
10474   const GlobalVariable *EmuTlsVar =
10475       GV->getParent()->getNamedGlobal(EmuTlsVarName);
10476   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10477   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10478   Entry.Ty = VoidPtrType;
10479   Args.push_back(Entry);
10480 
10481   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10482 
10483   TargetLowering::CallLoweringInfo CLI(DAG);
10484   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10485   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10486   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10487 
10488   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10489   // At last for X86 targets, maybe good for other targets too?
10490   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10491   MFI.setAdjustsStack(true); // Is this only for X86 target?
10492   MFI.setHasCalls(true);
10493 
10494   assert((GA->getOffset() == 0) &&
10495          "Emulated TLS must have zero offset in GlobalAddressSDNode");
10496   return CallResult.first;
10497 }
10498 
10499 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10500                                                 SelectionDAG &DAG) const {
10501   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10502   if (!isCtlzFast())
10503     return SDValue();
10504   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10505   SDLoc dl(Op);
10506   if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10507     EVT VT = Op.getOperand(0).getValueType();
10508     SDValue Zext = Op.getOperand(0);
10509     if (VT.bitsLT(MVT::i32)) {
10510       VT = MVT::i32;
10511       Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10512     }
10513     unsigned Log2b = Log2_32(VT.getSizeInBits());
10514     SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10515     SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10516                               DAG.getConstant(Log2b, dl, MVT::i32));
10517     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10518   }
10519   return SDValue();
10520 }
10521 
10522 SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
10523   SDValue Op0 = Node->getOperand(0);
10524   SDValue Op1 = Node->getOperand(1);
10525   EVT VT = Op0.getValueType();
10526   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10527   unsigned Opcode = Node->getOpcode();
10528   SDLoc DL(Node);
10529 
10530   // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10531   if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10532       getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10533     Op0 = DAG.getFreeze(Op0);
10534     SDValue Zero = DAG.getConstant(0, DL, VT);
10535     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10536                        DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10537   }
10538 
10539   // umin(x,y) -> sub(x,usubsat(x,y))
10540   // TODO: Missing freeze(Op0)?
10541   if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10542       isOperationLegal(ISD::USUBSAT, VT)) {
10543     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10544                        DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10545   }
10546 
10547   // umax(x,y) -> add(x,usubsat(y,x))
10548   // TODO: Missing freeze(Op0)?
10549   if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10550       isOperationLegal(ISD::USUBSAT, VT)) {
10551     return DAG.getNode(ISD::ADD, DL, VT, Op0,
10552                        DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10553   }
10554 
10555   // FIXME: Should really try to split the vector in case it's legal on a
10556   // subvector.
10557   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10558     return DAG.UnrollVectorOp(Node);
10559 
10560   // Attempt to find an existing SETCC node that we can reuse.
10561   // TODO: Do we need a generic doesSETCCNodeExist?
10562   // TODO: Missing freeze(Op0)/freeze(Op1)?
10563   auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10564                          ISD::CondCode PrefCommuteCC,
10565                          ISD::CondCode AltCommuteCC) {
10566     SDVTList BoolVTList = DAG.getVTList(BoolVT);
10567     for (ISD::CondCode CC : {PrefCC, AltCC}) {
10568       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10569                             {Op0, Op1, DAG.getCondCode(CC)})) {
10570         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10571         return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10572       }
10573     }
10574     for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10575       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10576                             {Op0, Op1, DAG.getCondCode(CC)})) {
10577         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10578         return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10579       }
10580     }
10581     SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10582     return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10583   };
10584 
10585   // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10586   //                      -> Y = (A < B) ? B : A
10587   //                      -> Y = (A >= B) ? A : B
10588   //                      -> Y = (A <= B) ? B : A
10589   switch (Opcode) {
10590   case ISD::SMAX:
10591     return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10592   case ISD::SMIN:
10593     return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10594   case ISD::UMAX:
10595     return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10596   case ISD::UMIN:
10597     return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10598   }
10599 
10600   llvm_unreachable("How did we get here?");
10601 }
10602 
10603 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
10604   unsigned Opcode = Node->getOpcode();
10605   SDValue LHS = Node->getOperand(0);
10606   SDValue RHS = Node->getOperand(1);
10607   EVT VT = LHS.getValueType();
10608   SDLoc dl(Node);
10609 
10610   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10611   assert(VT.isInteger() && "Expected operands to be integers");
10612 
10613   // usub.sat(a, b) -> umax(a, b) - b
10614   if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10615     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10616     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10617   }
10618 
10619   // uadd.sat(a, b) -> umin(a, ~b) + b
10620   if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10621     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10622     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10623     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10624   }
10625 
10626   unsigned OverflowOp;
10627   switch (Opcode) {
10628   case ISD::SADDSAT:
10629     OverflowOp = ISD::SADDO;
10630     break;
10631   case ISD::UADDSAT:
10632     OverflowOp = ISD::UADDO;
10633     break;
10634   case ISD::SSUBSAT:
10635     OverflowOp = ISD::SSUBO;
10636     break;
10637   case ISD::USUBSAT:
10638     OverflowOp = ISD::USUBO;
10639     break;
10640   default:
10641     llvm_unreachable("Expected method to receive signed or unsigned saturation "
10642                      "addition or subtraction node.");
10643   }
10644 
10645   // FIXME: Should really try to split the vector in case it's legal on a
10646   // subvector.
10647   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10648     return DAG.UnrollVectorOp(Node);
10649 
10650   unsigned BitWidth = LHS.getScalarValueSizeInBits();
10651   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10652   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10653   SDValue SumDiff = Result.getValue(0);
10654   SDValue Overflow = Result.getValue(1);
10655   SDValue Zero = DAG.getConstant(0, dl, VT);
10656   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10657 
10658   if (Opcode == ISD::UADDSAT) {
10659     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10660       // (LHS + RHS) | OverflowMask
10661       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10662       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10663     }
10664     // Overflow ? 0xffff.... : (LHS + RHS)
10665     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10666   }
10667 
10668   if (Opcode == ISD::USUBSAT) {
10669     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10670       // (LHS - RHS) & ~OverflowMask
10671       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10672       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10673       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10674     }
10675     // Overflow ? 0 : (LHS - RHS)
10676     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10677   }
10678 
10679   if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10680     APInt MinVal = APInt::getSignedMinValue(BitWidth);
10681     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
10682 
10683     KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10684     KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10685 
10686     // If either of the operand signs are known, then they are guaranteed to
10687     // only saturate in one direction. If non-negative they will saturate
10688     // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10689     //
10690     // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10691     // sign of 'y' has to be flipped.
10692 
10693     bool LHSIsNonNegative = KnownLHS.isNonNegative();
10694     bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10695                                                    : KnownRHS.isNegative();
10696     if (LHSIsNonNegative || RHSIsNonNegative) {
10697       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10698       return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10699     }
10700 
10701     bool LHSIsNegative = KnownLHS.isNegative();
10702     bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10703                                                 : KnownRHS.isNonNegative();
10704     if (LHSIsNegative || RHSIsNegative) {
10705       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10706       return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10707     }
10708   }
10709 
10710   // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10711   APInt MinVal = APInt::getSignedMinValue(BitWidth);
10712   SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10713   SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10714                               DAG.getConstant(BitWidth - 1, dl, VT));
10715   Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10716   return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10717 }
10718 
10719 SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
10720   unsigned Opcode = Node->getOpcode();
10721   SDValue LHS = Node->getOperand(0);
10722   SDValue RHS = Node->getOperand(1);
10723   EVT VT = LHS.getValueType();
10724   EVT ResVT = Node->getValueType(0);
10725   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10726   SDLoc dl(Node);
10727 
10728   auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10729   auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10730   SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10731   SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10732 
10733   // We can't perform arithmetic on i1 values. Extending them would
10734   // probably result in worse codegen, so let's just use two selects instead.
10735   // Some targets are also just better off using selects rather than subtraction
10736   // because one of the conditions can be merged with one of the selects.
10737   // And finally, if we don't know the contents of high bits of a boolean value
10738   // we can't perform any arithmetic either.
10739   if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10740       getBooleanContents(BoolVT) == UndefinedBooleanContent) {
10741     SDValue SelectZeroOrOne =
10742         DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10743                       DAG.getConstant(0, dl, ResVT));
10744     return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10745                          SelectZeroOrOne);
10746   }
10747 
10748   if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent)
10749     std::swap(IsGT, IsLT);
10750   return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10751                             ResVT);
10752 }
10753 
10754 SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10755   unsigned Opcode = Node->getOpcode();
10756   bool IsSigned = Opcode == ISD::SSHLSAT;
10757   SDValue LHS = Node->getOperand(0);
10758   SDValue RHS = Node->getOperand(1);
10759   EVT VT = LHS.getValueType();
10760   SDLoc dl(Node);
10761 
10762   assert((Node->getOpcode() == ISD::SSHLSAT ||
10763           Node->getOpcode() == ISD::USHLSAT) &&
10764           "Expected a SHLSAT opcode");
10765   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10766   assert(VT.isInteger() && "Expected operands to be integers");
10767 
10768   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10769     return DAG.UnrollVectorOp(Node);
10770 
10771   // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10772 
10773   unsigned BW = VT.getScalarSizeInBits();
10774   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10775   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10776   SDValue Orig =
10777       DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10778 
10779   SDValue SatVal;
10780   if (IsSigned) {
10781     SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10782     SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10783     SDValue Cond =
10784         DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10785     SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10786   } else {
10787     SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10788   }
10789   SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10790   return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10791 }
10792 
10793 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10794                                         bool Signed, EVT WideVT,
10795                                         const SDValue LL, const SDValue LH,
10796                                         const SDValue RL, const SDValue RH,
10797                                         SDValue &Lo, SDValue &Hi) const {
10798   // We can fall back to a libcall with an illegal type for the MUL if we
10799   // have a libcall big enough.
10800   // Also, we can fall back to a division in some cases, but that's a big
10801   // performance hit in the general case.
10802   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10803   if (WideVT == MVT::i16)
10804     LC = RTLIB::MUL_I16;
10805   else if (WideVT == MVT::i32)
10806     LC = RTLIB::MUL_I32;
10807   else if (WideVT == MVT::i64)
10808     LC = RTLIB::MUL_I64;
10809   else if (WideVT == MVT::i128)
10810     LC = RTLIB::MUL_I128;
10811 
10812   if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10813     // We'll expand the multiplication by brute force because we have no other
10814     // options. This is a trivially-generalized version of the code from
10815     // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10816     // 4.3.1).
10817     EVT VT = LL.getValueType();
10818     unsigned Bits = VT.getSizeInBits();
10819     unsigned HalfBits = Bits >> 1;
10820     SDValue Mask =
10821         DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10822     SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10823     SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10824 
10825     SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10826     SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10827 
10828     SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10829     SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10830     SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10831     SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10832 
10833     SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10834                             DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10835     SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10836     SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10837 
10838     SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10839                             DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10840     SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10841 
10842     SDValue W =
10843         DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10844                     DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10845     Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10846                      DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10847 
10848     Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10849                      DAG.getNode(ISD::ADD, dl, VT,
10850                                  DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10851                                  DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10852   } else {
10853     // Attempt a libcall.
10854     SDValue Ret;
10855     TargetLowering::MakeLibCallOptions CallOptions;
10856     CallOptions.setSExt(Signed);
10857     CallOptions.setIsPostTypeLegalization(true);
10858     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10859       // Halves of WideVT are packed into registers in different order
10860       // depending on platform endianness. This is usually handled by
10861       // the C calling convention, but we can't defer to it in
10862       // the legalizer.
10863       SDValue Args[] = {LL, LH, RL, RH};
10864       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10865     } else {
10866       SDValue Args[] = {LH, LL, RH, RL};
10867       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10868     }
10869     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10870            "Ret value is a collection of constituent nodes holding result.");
10871     if (DAG.getDataLayout().isLittleEndian()) {
10872       // Same as above.
10873       Lo = Ret.getOperand(0);
10874       Hi = Ret.getOperand(1);
10875     } else {
10876       Lo = Ret.getOperand(1);
10877       Hi = Ret.getOperand(0);
10878     }
10879   }
10880 }
10881 
10882 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10883                                         bool Signed, const SDValue LHS,
10884                                         const SDValue RHS, SDValue &Lo,
10885                                         SDValue &Hi) const {
10886   EVT VT = LHS.getValueType();
10887   assert(RHS.getValueType() == VT && "Mismatching operand types");
10888 
10889   SDValue HiLHS;
10890   SDValue HiRHS;
10891   if (Signed) {
10892     // The high part is obtained by SRA'ing all but one of the bits of low
10893     // part.
10894     unsigned LoSize = VT.getFixedSizeInBits();
10895     HiLHS = DAG.getNode(
10896         ISD::SRA, dl, VT, LHS,
10897         DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10898     HiRHS = DAG.getNode(
10899         ISD::SRA, dl, VT, RHS,
10900         DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10901   } else {
10902     HiLHS = DAG.getConstant(0, dl, VT);
10903     HiRHS = DAG.getConstant(0, dl, VT);
10904   }
10905   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10906   forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10907 }
10908 
10909 SDValue
10910 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
10911   assert((Node->getOpcode() == ISD::SMULFIX ||
10912           Node->getOpcode() == ISD::UMULFIX ||
10913           Node->getOpcode() == ISD::SMULFIXSAT ||
10914           Node->getOpcode() == ISD::UMULFIXSAT) &&
10915          "Expected a fixed point multiplication opcode");
10916 
10917   SDLoc dl(Node);
10918   SDValue LHS = Node->getOperand(0);
10919   SDValue RHS = Node->getOperand(1);
10920   EVT VT = LHS.getValueType();
10921   unsigned Scale = Node->getConstantOperandVal(2);
10922   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10923                      Node->getOpcode() == ISD::UMULFIXSAT);
10924   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10925                  Node->getOpcode() == ISD::SMULFIXSAT);
10926   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10927   unsigned VTSize = VT.getScalarSizeInBits();
10928 
10929   if (!Scale) {
10930     // [us]mul.fix(a, b, 0) -> mul(a, b)
10931     if (!Saturating) {
10932       if (isOperationLegalOrCustom(ISD::MUL, VT))
10933         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10934     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10935       SDValue Result =
10936           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10937       SDValue Product = Result.getValue(0);
10938       SDValue Overflow = Result.getValue(1);
10939       SDValue Zero = DAG.getConstant(0, dl, VT);
10940 
10941       APInt MinVal = APInt::getSignedMinValue(VTSize);
10942       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10943       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10944       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10945       // Xor the inputs, if resulting sign bit is 0 the product will be
10946       // positive, else negative.
10947       SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10948       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10949       Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10950       return DAG.getSelect(dl, VT, Overflow, Result, Product);
10951     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10952       SDValue Result =
10953           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10954       SDValue Product = Result.getValue(0);
10955       SDValue Overflow = Result.getValue(1);
10956 
10957       APInt MaxVal = APInt::getMaxValue(VTSize);
10958       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10959       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10960     }
10961   }
10962 
10963   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10964          "Expected scale to be less than the number of bits if signed or at "
10965          "most the number of bits if unsigned.");
10966   assert(LHS.getValueType() == RHS.getValueType() &&
10967          "Expected both operands to be the same type");
10968 
10969   // Get the upper and lower bits of the result.
10970   SDValue Lo, Hi;
10971   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10972   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10973   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
10974   if (VT.isVector())
10975     WideVT =
10976         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
10977   if (isOperationLegalOrCustom(LoHiOp, VT)) {
10978     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10979     Lo = Result.getValue(0);
10980     Hi = Result.getValue(1);
10981   } else if (isOperationLegalOrCustom(HiOp, VT)) {
10982     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10983     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10984   } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
10985     // Try for a multiplication using a wider type.
10986     unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10987     SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
10988     SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
10989     SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
10990     Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
10991     SDValue Shifted =
10992         DAG.getNode(ISD::SRA, dl, WideVT, Res,
10993                     DAG.getShiftAmountConstant(VTSize, WideVT, dl));
10994     Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
10995   } else if (VT.isVector()) {
10996     return SDValue();
10997   } else {
10998     forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10999   }
11000 
11001   if (Scale == VTSize)
11002     // Result is just the top half since we'd be shifting by the width of the
11003     // operand. Overflow impossible so this works for both UMULFIX and
11004     // UMULFIXSAT.
11005     return Hi;
11006 
11007   // The result will need to be shifted right by the scale since both operands
11008   // are scaled. The result is given to us in 2 halves, so we only want part of
11009   // both in the result.
11010   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11011                                DAG.getShiftAmountConstant(Scale, VT, dl));
11012   if (!Saturating)
11013     return Result;
11014 
11015   if (!Signed) {
11016     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11017     // widened multiplication) aren't all zeroes.
11018 
11019     // Saturate to max if ((Hi >> Scale) != 0),
11020     // which is the same as if (Hi > ((1 << Scale) - 1))
11021     APInt MaxVal = APInt::getMaxValue(VTSize);
11022     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11023                                       dl, VT);
11024     Result = DAG.getSelectCC(dl, Hi, LowMask,
11025                              DAG.getConstant(MaxVal, dl, VT), Result,
11026                              ISD::SETUGT);
11027 
11028     return Result;
11029   }
11030 
11031   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11032   // widened multiplication) aren't all ones or all zeroes.
11033 
11034   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11035   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11036 
11037   if (Scale == 0) {
11038     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11039                                DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11040     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11041     // Saturated to SatMin if wide product is negative, and SatMax if wide
11042     // product is positive ...
11043     SDValue Zero = DAG.getConstant(0, dl, VT);
11044     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11045                                                ISD::SETLT);
11046     // ... but only if we overflowed.
11047     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11048   }
11049 
11050   //  We handled Scale==0 above so all the bits to examine is in Hi.
11051 
11052   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11053   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11054   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11055                                     dl, VT);
11056   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11057   // Saturate to min if (Hi >> (Scale - 1)) < -1),
11058   // which is the same as if (HI < (-1 << (Scale - 1))
11059   SDValue HighMask =
11060       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11061                       dl, VT);
11062   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11063   return Result;
11064 }
11065 
11066 SDValue
11067 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11068                                     SDValue LHS, SDValue RHS,
11069                                     unsigned Scale, SelectionDAG &DAG) const {
11070   assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11071           Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11072          "Expected a fixed point division opcode");
11073 
11074   EVT VT = LHS.getValueType();
11075   bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11076   bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11077   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11078 
11079   // If there is enough room in the type to upscale the LHS or downscale the
11080   // RHS before the division, we can perform it in this type without having to
11081   // resize. For signed operations, the LHS headroom is the number of
11082   // redundant sign bits, and for unsigned ones it is the number of zeroes.
11083   // The headroom for the RHS is the number of trailing zeroes.
11084   unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11085                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
11086   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11087 
11088   // For signed saturating operations, we need to be able to detect true integer
11089   // division overflow; that is, when you have MIN / -EPS. However, this
11090   // is undefined behavior and if we emit divisions that could take such
11091   // values it may cause undesired behavior (arithmetic exceptions on x86, for
11092   // example).
11093   // Avoid this by requiring an extra bit so that we never get this case.
11094   // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11095   // signed saturating division, we need to emit a whopping 32-bit division.
11096   if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11097     return SDValue();
11098 
11099   unsigned LHSShift = std::min(LHSLead, Scale);
11100   unsigned RHSShift = Scale - LHSShift;
11101 
11102   // At this point, we know that if we shift the LHS up by LHSShift and the
11103   // RHS down by RHSShift, we can emit a regular division with a final scaling
11104   // factor of Scale.
11105 
11106   if (LHSShift)
11107     LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11108                       DAG.getShiftAmountConstant(LHSShift, VT, dl));
11109   if (RHSShift)
11110     RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11111                       DAG.getShiftAmountConstant(RHSShift, VT, dl));
11112 
11113   SDValue Quot;
11114   if (Signed) {
11115     // For signed operations, if the resulting quotient is negative and the
11116     // remainder is nonzero, subtract 1 from the quotient to round towards
11117     // negative infinity.
11118     SDValue Rem;
11119     // FIXME: Ideally we would always produce an SDIVREM here, but if the
11120     // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11121     // we couldn't just form a libcall, but the type legalizer doesn't do it.
11122     if (isTypeLegal(VT) &&
11123         isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
11124       Quot = DAG.getNode(ISD::SDIVREM, dl,
11125                          DAG.getVTList(VT, VT),
11126                          LHS, RHS);
11127       Rem = Quot.getValue(1);
11128       Quot = Quot.getValue(0);
11129     } else {
11130       Quot = DAG.getNode(ISD::SDIV, dl, VT,
11131                          LHS, RHS);
11132       Rem = DAG.getNode(ISD::SREM, dl, VT,
11133                         LHS, RHS);
11134     }
11135     SDValue Zero = DAG.getConstant(0, dl, VT);
11136     SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11137     SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11138     SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11139     SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11140     SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11141                                DAG.getConstant(1, dl, VT));
11142     Quot = DAG.getSelect(dl, VT,
11143                          DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11144                          Sub1, Quot);
11145   } else
11146     Quot = DAG.getNode(ISD::UDIV, dl, VT,
11147                        LHS, RHS);
11148 
11149   return Quot;
11150 }
11151 
11152 void TargetLowering::expandUADDSUBO(
11153     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11154   SDLoc dl(Node);
11155   SDValue LHS = Node->getOperand(0);
11156   SDValue RHS = Node->getOperand(1);
11157   bool IsAdd = Node->getOpcode() == ISD::UADDO;
11158 
11159   // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11160   unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11161   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11162     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11163     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11164                                     { LHS, RHS, CarryIn });
11165     Result = SDValue(NodeCarry.getNode(), 0);
11166     Overflow = SDValue(NodeCarry.getNode(), 1);
11167     return;
11168   }
11169 
11170   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11171                             LHS.getValueType(), LHS, RHS);
11172 
11173   EVT ResultType = Node->getValueType(1);
11174   EVT SetCCType = getSetCCResultType(
11175       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11176   SDValue SetCC;
11177   if (IsAdd && isOneConstant(RHS)) {
11178     // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11179     // the live range of X. We assume comparing with 0 is cheap.
11180     // The general case (X + C) < C is not necessarily beneficial. Although we
11181     // reduce the live range of X, we may introduce the materialization of
11182     // constant C.
11183     SetCC =
11184         DAG.getSetCC(dl, SetCCType, Result,
11185                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11186   } else if (IsAdd && isAllOnesConstant(RHS)) {
11187     // Special case: uaddo X, -1 overflows if X != 0.
11188     SetCC =
11189         DAG.getSetCC(dl, SetCCType, LHS,
11190                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11191   } else {
11192     ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11193     SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11194   }
11195   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11196 }
11197 
11198 void TargetLowering::expandSADDSUBO(
11199     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11200   SDLoc dl(Node);
11201   SDValue LHS = Node->getOperand(0);
11202   SDValue RHS = Node->getOperand(1);
11203   bool IsAdd = Node->getOpcode() == ISD::SADDO;
11204 
11205   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11206                             LHS.getValueType(), LHS, RHS);
11207 
11208   EVT ResultType = Node->getValueType(1);
11209   EVT OType = getSetCCResultType(
11210       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11211 
11212   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11213   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11214   if (isOperationLegal(OpcSat, LHS.getValueType())) {
11215     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11216     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11217     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11218     return;
11219   }
11220 
11221   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11222 
11223   // For an addition, the result should be less than one of the operands (LHS)
11224   // if and only if the other operand (RHS) is negative, otherwise there will
11225   // be overflow.
11226   // For a subtraction, the result should be less than one of the operands
11227   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11228   // otherwise there will be overflow.
11229   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11230   SDValue ConditionRHS =
11231       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11232 
11233   Overflow = DAG.getBoolExtOrTrunc(
11234       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11235       ResultType, ResultType);
11236 }
11237 
11238 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11239                                 SDValue &Overflow, SelectionDAG &DAG) const {
11240   SDLoc dl(Node);
11241   EVT VT = Node->getValueType(0);
11242   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11243   SDValue LHS = Node->getOperand(0);
11244   SDValue RHS = Node->getOperand(1);
11245   bool isSigned = Node->getOpcode() == ISD::SMULO;
11246 
11247   // For power-of-two multiplications we can use a simpler shift expansion.
11248   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11249     const APInt &C = RHSC->getAPIntValue();
11250     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11251     if (C.isPowerOf2()) {
11252       // smulo(x, signed_min) is same as umulo(x, signed_min).
11253       bool UseArithShift = isSigned && !C.isMinSignedValue();
11254       SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11255       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11256       Overflow = DAG.getSetCC(dl, SetCCVT,
11257           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11258                       dl, VT, Result, ShiftAmt),
11259           LHS, ISD::SETNE);
11260       return true;
11261     }
11262   }
11263 
11264   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11265   if (VT.isVector())
11266     WideVT =
11267         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
11268 
11269   SDValue BottomHalf;
11270   SDValue TopHalf;
11271   static const unsigned Ops[2][3] =
11272       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
11273         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
11274   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11275     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11276     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11277   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11278     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11279                              RHS);
11280     TopHalf = BottomHalf.getValue(1);
11281   } else if (isTypeLegal(WideVT)) {
11282     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11283     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11284     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11285     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11286     SDValue ShiftAmt =
11287         DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11288     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11289                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11290   } else {
11291     if (VT.isVector())
11292       return false;
11293 
11294     forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11295   }
11296 
11297   Result = BottomHalf;
11298   if (isSigned) {
11299     SDValue ShiftAmt = DAG.getShiftAmountConstant(
11300         VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11301     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11302     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11303   } else {
11304     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11305                             DAG.getConstant(0, dl, VT), ISD::SETNE);
11306   }
11307 
11308   // Truncate the result if SetCC returns a larger type than needed.
11309   EVT RType = Node->getValueType(1);
11310   if (RType.bitsLT(Overflow.getValueType()))
11311     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11312 
11313   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11314          "Unexpected result type for S/UMULO legalization");
11315   return true;
11316 }
11317 
11318 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
11319   SDLoc dl(Node);
11320   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11321   SDValue Op = Node->getOperand(0);
11322   EVT VT = Op.getValueType();
11323 
11324   if (VT.isScalableVector())
11325     report_fatal_error(
11326         "Expanding reductions for scalable vectors is undefined.");
11327 
11328   // Try to use a shuffle reduction for power of two vectors.
11329   if (VT.isPow2VectorType()) {
11330     while (VT.getVectorNumElements() > 1) {
11331       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11332       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11333         break;
11334 
11335       SDValue Lo, Hi;
11336       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11337       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11338       VT = HalfVT;
11339     }
11340   }
11341 
11342   EVT EltVT = VT.getVectorElementType();
11343   unsigned NumElts = VT.getVectorNumElements();
11344 
11345   SmallVector<SDValue, 8> Ops;
11346   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11347 
11348   SDValue Res = Ops[0];
11349   for (unsigned i = 1; i < NumElts; i++)
11350     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11351 
11352   // Result type may be wider than element type.
11353   if (EltVT != Node->getValueType(0))
11354     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11355   return Res;
11356 }
11357 
11358 SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
11359   SDLoc dl(Node);
11360   SDValue AccOp = Node->getOperand(0);
11361   SDValue VecOp = Node->getOperand(1);
11362   SDNodeFlags Flags = Node->getFlags();
11363 
11364   EVT VT = VecOp.getValueType();
11365   EVT EltVT = VT.getVectorElementType();
11366 
11367   if (VT.isScalableVector())
11368     report_fatal_error(
11369         "Expanding reductions for scalable vectors is undefined.");
11370 
11371   unsigned NumElts = VT.getVectorNumElements();
11372 
11373   SmallVector<SDValue, 8> Ops;
11374   DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11375 
11376   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11377 
11378   SDValue Res = AccOp;
11379   for (unsigned i = 0; i < NumElts; i++)
11380     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11381 
11382   return Res;
11383 }
11384 
11385 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11386                                SelectionDAG &DAG) const {
11387   EVT VT = Node->getValueType(0);
11388   SDLoc dl(Node);
11389   bool isSigned = Node->getOpcode() == ISD::SREM;
11390   unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11391   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11392   SDValue Dividend = Node->getOperand(0);
11393   SDValue Divisor = Node->getOperand(1);
11394   if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11395     SDVTList VTs = DAG.getVTList(VT, VT);
11396     Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11397     return true;
11398   }
11399   if (isOperationLegalOrCustom(DivOpc, VT)) {
11400     // X % Y -> X-X/Y*Y
11401     SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11402     SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11403     Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11404     return true;
11405   }
11406   return false;
11407 }
11408 
11409 SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11410                                             SelectionDAG &DAG) const {
11411   bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11412   SDLoc dl(SDValue(Node, 0));
11413   SDValue Src = Node->getOperand(0);
11414 
11415   // DstVT is the result type, while SatVT is the size to which we saturate
11416   EVT SrcVT = Src.getValueType();
11417   EVT DstVT = Node->getValueType(0);
11418 
11419   EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11420   unsigned SatWidth = SatVT.getScalarSizeInBits();
11421   unsigned DstWidth = DstVT.getScalarSizeInBits();
11422   assert(SatWidth <= DstWidth &&
11423          "Expected saturation width smaller than result width");
11424 
11425   // Determine minimum and maximum integer values and their corresponding
11426   // floating-point values.
11427   APInt MinInt, MaxInt;
11428   if (IsSigned) {
11429     MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11430     MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11431   } else {
11432     MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11433     MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11434   }
11435 
11436   // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11437   // libcall emission cannot handle this. Large result types will fail.
11438   if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11439     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11440     SrcVT = Src.getValueType();
11441   }
11442 
11443   const fltSemantics &Sem = SrcVT.getFltSemantics();
11444   APFloat MinFloat(Sem);
11445   APFloat MaxFloat(Sem);
11446 
11447   APFloat::opStatus MinStatus =
11448       MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11449   APFloat::opStatus MaxStatus =
11450       MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11451   bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11452                              !(MaxStatus & APFloat::opStatus::opInexact);
11453 
11454   SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11455   SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11456 
11457   // If the integer bounds are exactly representable as floats and min/max are
11458   // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11459   // of comparisons and selects.
11460   bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11461                      isOperationLegal(ISD::FMAXNUM, SrcVT);
11462   if (AreExactFloatBounds && MinMaxLegal) {
11463     SDValue Clamped = Src;
11464 
11465     // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11466     Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11467     // Clamp by MaxFloat from above. NaN cannot occur.
11468     Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11469     // Convert clamped value to integer.
11470     SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11471                                   dl, DstVT, Clamped);
11472 
11473     // In the unsigned case we're done, because we mapped NaN to MinFloat,
11474     // which will cast to zero.
11475     if (!IsSigned)
11476       return FpToInt;
11477 
11478     // Otherwise, select 0 if Src is NaN.
11479     SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11480     EVT SetCCVT =
11481         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11482     SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11483     return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11484   }
11485 
11486   SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11487   SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11488 
11489   // Result of direct conversion. The assumption here is that the operation is
11490   // non-trapping and it's fine to apply it to an out-of-range value if we
11491   // select it away later.
11492   SDValue FpToInt =
11493       DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11494 
11495   SDValue Select = FpToInt;
11496 
11497   EVT SetCCVT =
11498       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11499 
11500   // If Src ULT MinFloat, select MinInt. In particular, this also selects
11501   // MinInt if Src is NaN.
11502   SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11503   Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11504   // If Src OGT MaxFloat, select MaxInt.
11505   SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11506   Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11507 
11508   // In the unsigned case we are done, because we mapped NaN to MinInt, which
11509   // is already zero.
11510   if (!IsSigned)
11511     return Select;
11512 
11513   // Otherwise, select 0 if Src is NaN.
11514   SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11515   SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11516   return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11517 }
11518 
11519 SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
11520                                                 const SDLoc &dl,
11521                                                 SelectionDAG &DAG) const {
11522   EVT OperandVT = Op.getValueType();
11523   if (OperandVT.getScalarType() == ResultVT.getScalarType())
11524     return Op;
11525   EVT ResultIntVT = ResultVT.changeTypeToInteger();
11526   // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11527   // can induce double-rounding which may alter the results. We can
11528   // correct for this using a trick explained in: Boldo, Sylvie, and
11529   // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11530   // World Congress. 2005.
11531   unsigned BitSize = OperandVT.getScalarSizeInBits();
11532   EVT WideIntVT = OperandVT.changeTypeToInteger();
11533   SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11534   SDValue SignBit =
11535       DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11536                   DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11537   SDValue AbsWide;
11538   if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11539     AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11540   } else {
11541     SDValue ClearedSign = DAG.getNode(
11542         ISD::AND, dl, WideIntVT, OpAsInt,
11543         DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11544     AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11545   }
11546   SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11547   SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11548 
11549   // We can keep the narrow value as-is if narrowing was exact (no
11550   // rounding error), the wide value was NaN (the narrow value is also
11551   // NaN and should be preserved) or if we rounded to the odd value.
11552   SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11553   SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11554   SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11555   SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11556   EVT ResultIntVTCCVT = getSetCCResultType(
11557       DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11558   SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11559   // The result is already odd so we don't need to do anything.
11560   SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11561 
11562   EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11563                                        AbsWide.getValueType());
11564   // We keep results which are exact, odd or NaN.
11565   SDValue KeepNarrow =
11566       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11567   KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11568   // We morally performed a round-down if AbsNarrow is smaller than
11569   // AbsWide.
11570   SDValue NarrowIsRd =
11571       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11572   // If the narrow value is odd or exact, pick it.
11573   // Otherwise, narrow is even and corresponds to either the rounded-up
11574   // or rounded-down value. If narrow is the rounded-down value, we want
11575   // the rounded-up value as it will be odd.
11576   SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11577   SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11578   Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11579   int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11580   SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11581   SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11582   SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11583   Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11584   return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11585 }
11586 
11587 SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
11588   assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11589   SDValue Op = Node->getOperand(0);
11590   EVT VT = Node->getValueType(0);
11591   SDLoc dl(Node);
11592   if (VT.getScalarType() == MVT::bf16) {
11593     if (Node->getConstantOperandVal(1) == 1) {
11594       return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11595     }
11596     EVT OperandVT = Op.getValueType();
11597     SDValue IsNaN = DAG.getSetCC(
11598         dl,
11599         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11600         Op, Op, ISD::SETUO);
11601 
11602     // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11603     // can induce double-rounding which may alter the results. We can
11604     // correct for this using a trick explained in: Boldo, Sylvie, and
11605     // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11606     // World Congress. 2005.
11607     EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11608     EVT I32 = F32.changeTypeToInteger();
11609     Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11610     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11611 
11612     // Conversions should set NaN's quiet bit. This also prevents NaNs from
11613     // turning into infinities.
11614     SDValue NaN =
11615         DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11616 
11617     // Factor in the contribution of the low 16 bits.
11618     SDValue One = DAG.getConstant(1, dl, I32);
11619     SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11620                               DAG.getShiftAmountConstant(16, I32, dl));
11621     Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11622     SDValue RoundingBias =
11623         DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11624     SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11625 
11626     // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11627     // 0x80000000.
11628     Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11629 
11630     // Now that we have rounded, shift the bits into position.
11631     Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11632                      DAG.getShiftAmountConstant(16, I32, dl));
11633     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11634     EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11635     Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11636     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11637   }
11638   return SDValue();
11639 }
11640 
11641 SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11642                                            SelectionDAG &DAG) const {
11643   assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11644   assert(Node->getValueType(0).isScalableVector() &&
11645          "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11646 
11647   EVT VT = Node->getValueType(0);
11648   SDValue V1 = Node->getOperand(0);
11649   SDValue V2 = Node->getOperand(1);
11650   int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11651   SDLoc DL(Node);
11652 
11653   // Expand through memory thusly:
11654   //  Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11655   //  Store V1, Ptr
11656   //  Store V2, Ptr + sizeof(V1)
11657   //  If (Imm < 0)
11658   //    TrailingElts = -Imm
11659   //    Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11660   //  else
11661   //    Ptr = Ptr + (Imm * sizeof(VT.Elt))
11662   //  Res = Load Ptr
11663 
11664   Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11665 
11666   EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
11667                                VT.getVectorElementCount() * 2);
11668   SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11669   EVT PtrVT = StackPtr.getValueType();
11670   auto &MF = DAG.getMachineFunction();
11671   auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11672   auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11673 
11674   // Store the lo part of CONCAT_VECTORS(V1, V2)
11675   SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11676   // Store the hi part of CONCAT_VECTORS(V1, V2)
11677   SDValue OffsetToV2 = DAG.getVScale(
11678       DL, PtrVT,
11679       APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11680   SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11681   SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11682 
11683   if (Imm >= 0) {
11684     // Load back the required element. getVectorElementPointer takes care of
11685     // clamping the index if it's out-of-bounds.
11686     StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11687     // Load the spliced result
11688     return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11689                        MachinePointerInfo::getUnknownStack(MF));
11690   }
11691 
11692   uint64_t TrailingElts = -Imm;
11693 
11694   // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11695   TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11696   SDValue TrailingBytes =
11697       DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11698 
11699   if (TrailingElts > VT.getVectorMinNumElements()) {
11700     SDValue VLBytes =
11701         DAG.getVScale(DL, PtrVT,
11702                       APInt(PtrVT.getFixedSizeInBits(),
11703                             VT.getStoreSize().getKnownMinValue()));
11704     TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11705   }
11706 
11707   // Calculate the start address of the spliced result.
11708   StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11709 
11710   // Load the spliced result
11711   return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11712                      MachinePointerInfo::getUnknownStack(MF));
11713 }
11714 
11715 SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
11716                                               SelectionDAG &DAG) const {
11717   SDLoc DL(Node);
11718   SDValue Vec = Node->getOperand(0);
11719   SDValue Mask = Node->getOperand(1);
11720   SDValue Passthru = Node->getOperand(2);
11721 
11722   EVT VecVT = Vec.getValueType();
11723   EVT ScalarVT = VecVT.getScalarType();
11724   EVT MaskVT = Mask.getValueType();
11725   EVT MaskScalarVT = MaskVT.getScalarType();
11726 
11727   // Needs to be handled by targets that have scalable vector types.
11728   if (VecVT.isScalableVector())
11729     report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11730 
11731   SDValue StackPtr = DAG.CreateStackTemporary(
11732       VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11733   int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11734   MachinePointerInfo PtrInfo =
11735       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
11736 
11737   MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11738   SDValue Chain = DAG.getEntryNode();
11739   SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11740 
11741   bool HasPassthru = !Passthru.isUndef();
11742 
11743   // If we have a passthru vector, store it on the stack, overwrite the matching
11744   // positions and then re-write the last element that was potentially
11745   // overwritten even though mask[i] = false.
11746   if (HasPassthru)
11747     Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11748 
11749   SDValue LastWriteVal;
11750   APInt PassthruSplatVal;
11751   bool IsSplatPassthru =
11752       ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11753 
11754   if (IsSplatPassthru) {
11755     // As we do not know which position we wrote to last, we cannot simply
11756     // access that index from the passthru vector. So we first check if passthru
11757     // is a splat vector, to use any element ...
11758     LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11759   } else if (HasPassthru) {
11760     // ... if it is not a splat vector, we need to get the passthru value at
11761     // position = popcount(mask) and re-load it from the stack before it is
11762     // overwritten in the loop below.
11763     EVT PopcountVT = ScalarVT.changeTypeToInteger();
11764     SDValue Popcount = DAG.getNode(
11765         ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11766     Popcount =
11767         DAG.getNode(ISD::ZERO_EXTEND, DL,
11768                     MaskVT.changeVectorElementType(PopcountVT), Popcount);
11769     Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
11770     SDValue LastElmtPtr =
11771         getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11772     LastWriteVal = DAG.getLoad(
11773         ScalarVT, DL, Chain, LastElmtPtr,
11774         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11775     Chain = LastWriteVal.getValue(1);
11776   }
11777 
11778   unsigned NumElms = VecVT.getVectorNumElements();
11779   for (unsigned I = 0; I < NumElms; I++) {
11780     SDValue Idx = DAG.getVectorIdxConstant(I, DL);
11781 
11782     SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11783     SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11784     Chain = DAG.getStore(
11785         Chain, DL, ValI, OutPtr,
11786         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11787 
11788     // Get the mask value and add it to the current output position. This
11789     // either increments by 1 if MaskI is true or adds 0 otherwise.
11790     // Freeze in case we have poison/undef mask entries.
11791     SDValue MaskI = DAG.getFreeze(
11792         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11793     MaskI = DAG.getFreeze(MaskI);
11794     MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11795     MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11796     OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11797 
11798     if (HasPassthru && I == NumElms - 1) {
11799       SDValue EndOfVector =
11800           DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11801       SDValue AllLanesSelected =
11802           DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11803       OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11804       OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11805 
11806       // Re-write the last ValI if all lanes were selected. Otherwise,
11807       // overwrite the last write it with the passthru value.
11808       SDNodeFlags Flags{};
11809       Flags.setUnpredictable(true);
11810       LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
11811                                    LastWriteVal, Flags);
11812       Chain = DAG.getStore(
11813           Chain, DL, LastWriteVal, OutPtr,
11814           MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11815     }
11816   }
11817 
11818   return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11819 }
11820 
11821 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
11822                                            SDValue &LHS, SDValue &RHS,
11823                                            SDValue &CC, SDValue Mask,
11824                                            SDValue EVL, bool &NeedInvert,
11825                                            const SDLoc &dl, SDValue &Chain,
11826                                            bool IsSignaling) const {
11827   MVT OpVT = LHS.getSimpleValueType();
11828   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11829   NeedInvert = false;
11830   assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11831   bool IsNonVP = !EVL;
11832   switch (getCondCodeAction(CCCode, OpVT)) {
11833   default:
11834     llvm_unreachable("Unknown condition code action!");
11835   case TargetLowering::Legal:
11836     // Nothing to do.
11837     break;
11838   case TargetLowering::Expand: {
11839     ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
11840     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11841       std::swap(LHS, RHS);
11842       CC = DAG.getCondCode(InvCC);
11843       return true;
11844     }
11845     // Swapping operands didn't work. Try inverting the condition.
11846     bool NeedSwap = false;
11847     InvCC = getSetCCInverse(CCCode, OpVT);
11848     if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
11849       // If inverting the condition is not enough, try swapping operands
11850       // on top of it.
11851       InvCC = ISD::getSetCCSwappedOperands(InvCC);
11852       NeedSwap = true;
11853     }
11854     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11855       CC = DAG.getCondCode(InvCC);
11856       NeedInvert = true;
11857       if (NeedSwap)
11858         std::swap(LHS, RHS);
11859       return true;
11860     }
11861 
11862     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
11863     unsigned Opc = 0;
11864     switch (CCCode) {
11865     default:
11866       llvm_unreachable("Don't know how to expand this condition!");
11867     case ISD::SETUO:
11868       if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
11869         CC1 = ISD::SETUNE;
11870         CC2 = ISD::SETUNE;
11871         Opc = ISD::OR;
11872         break;
11873       }
11874       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11875              "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11876       NeedInvert = true;
11877       [[fallthrough]];
11878     case ISD::SETO:
11879       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11880              "If SETO is expanded, SETOEQ must be legal!");
11881       CC1 = ISD::SETOEQ;
11882       CC2 = ISD::SETOEQ;
11883       Opc = ISD::AND;
11884       break;
11885     case ISD::SETONE:
11886     case ISD::SETUEQ:
11887       // If the SETUO or SETO CC isn't legal, we might be able to use
11888       // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11889       // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11890       // the operands.
11891       CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11892       if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
11893                                           isCondCodeLegal(ISD::SETOLT, OpVT))) {
11894         CC1 = ISD::SETOGT;
11895         CC2 = ISD::SETOLT;
11896         Opc = ISD::OR;
11897         NeedInvert = ((unsigned)CCCode & 0x8U);
11898         break;
11899       }
11900       [[fallthrough]];
11901     case ISD::SETOEQ:
11902     case ISD::SETOGT:
11903     case ISD::SETOGE:
11904     case ISD::SETOLT:
11905     case ISD::SETOLE:
11906     case ISD::SETUNE:
11907     case ISD::SETUGT:
11908     case ISD::SETUGE:
11909     case ISD::SETULT:
11910     case ISD::SETULE:
11911       // If we are floating point, assign and break, otherwise fall through.
11912       if (!OpVT.isInteger()) {
11913         // We can use the 4th bit to tell if we are the unordered
11914         // or ordered version of the opcode.
11915         CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11916         Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11917         CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11918         break;
11919       }
11920       // Fallthrough if we are unsigned integer.
11921       [[fallthrough]];
11922     case ISD::SETLE:
11923     case ISD::SETGT:
11924     case ISD::SETGE:
11925     case ISD::SETLT:
11926     case ISD::SETNE:
11927     case ISD::SETEQ:
11928       // If all combinations of inverting the condition and swapping operands
11929       // didn't work then we have no means to expand the condition.
11930       llvm_unreachable("Don't know how to expand this condition!");
11931     }
11932 
11933     SDValue SetCC1, SetCC2;
11934     if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11935       // If we aren't the ordered or unorder operation,
11936       // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11937       if (IsNonVP) {
11938         SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11939         SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11940       } else {
11941         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11942         SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11943       }
11944     } else {
11945       // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11946       if (IsNonVP) {
11947         SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11948         SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11949       } else {
11950         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11951         SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11952       }
11953     }
11954     if (Chain)
11955       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11956                           SetCC2.getValue(1));
11957     if (IsNonVP)
11958       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11959     else {
11960       // Transform the binary opcode to the VP equivalent.
11961       assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11962       Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11963       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11964     }
11965     RHS = SDValue();
11966     CC = SDValue();
11967     return true;
11968   }
11969   }
11970   return false;
11971 }
11972 
11973 SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
11974                                                       SelectionDAG &DAG) const {
11975   EVT VT = Node->getValueType(0);
11976   // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
11977   // split into two equal parts.
11978   if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
11979     return SDValue();
11980 
11981   // Restrict expansion to cases where both parts can be concatenated.
11982   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
11983   if (LoVT != HiVT || !isTypeLegal(LoVT))
11984     return SDValue();
11985 
11986   SDLoc DL(Node);
11987   unsigned Opcode = Node->getOpcode();
11988 
11989   // Don't expand if the result is likely to be unrolled anyway.
11990   if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
11991     return SDValue();
11992 
11993   SmallVector<SDValue, 4> LoOps, HiOps;
11994   for (const SDValue &V : Node->op_values()) {
11995     auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
11996     LoOps.push_back(Lo);
11997     HiOps.push_back(Hi);
11998   }
11999 
12000   SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12001   SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12002   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12003 }
12004