xref: /llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (revision cf9d1c1486ef53213b434700a4117d71a2cb67e3)
1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Analysis/VectorUtils.h"
16 #include "llvm/CodeGen/CallingConvLower.h"
17 #include "llvm/CodeGen/CodeGenCommonISel.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineJumpTableInfo.h"
21 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/TargetRegisterInfo.h"
25 #include "llvm/IR/DataLayout.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/Support/DivisionByConstantInfo.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/KnownBits.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include <cctype>
37 using namespace llvm;
38 
39 /// NOTE: The TargetMachine owns TLOF.
40 TargetLowering::TargetLowering(const TargetMachine &tm)
41     : TargetLoweringBase(tm) {}
42 
43 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44   return nullptr;
45 }
46 
47 bool TargetLowering::isPositionIndependent() const {
48   return getTargetMachine().isPositionIndependent();
49 }
50 
51 /// Check whether a given call node is in tail position within its function. If
52 /// so, it sets Chain to the input chain of the tail call.
53 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54                                           SDValue &Chain) const {
55   const Function &F = DAG.getMachineFunction().getFunction();
56 
57   // First, check if tail calls have been disabled in this function.
58   if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59     return false;
60 
61   // Conservatively require the attributes of the call to match those of
62   // the return. Ignore following attributes because they don't affect the
63   // call sequence.
64   AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65   for (const auto &Attr :
66        {Attribute::Alignment, Attribute::Dereferenceable,
67         Attribute::DereferenceableOrNull, Attribute::NoAlias,
68         Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
69     CallerAttrs.removeAttribute(Attr);
70 
71   if (CallerAttrs.hasAttributes())
72     return false;
73 
74   // It's not safe to eliminate the sign / zero extension of the return value.
75   if (CallerAttrs.contains(Attribute::ZExt) ||
76       CallerAttrs.contains(Attribute::SExt))
77     return false;
78 
79   // Check if the only use is a function return node.
80   return isUsedByReturnOnly(Node, Chain);
81 }
82 
83 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
84     const uint32_t *CallerPreservedMask,
85     const SmallVectorImpl<CCValAssign> &ArgLocs,
86     const SmallVectorImpl<SDValue> &OutVals) const {
87   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
88     const CCValAssign &ArgLoc = ArgLocs[I];
89     if (!ArgLoc.isRegLoc())
90       continue;
91     MCRegister Reg = ArgLoc.getLocReg();
92     // Only look at callee saved registers.
93     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94       continue;
95     // Check that we pass the value used for the caller.
96     // (We look for a CopyFromReg reading a virtual register that is used
97     //  for the function live-in value of register Reg)
98     SDValue Value = OutVals[I];
99     if (Value->getOpcode() == ISD::AssertZext)
100       Value = Value.getOperand(0);
101     if (Value->getOpcode() != ISD::CopyFromReg)
102       return false;
103     Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105       return false;
106   }
107   return true;
108 }
109 
110 /// Set CallLoweringInfo attribute flags based on a call instruction
111 /// and called function attributes.
112 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
113                                                      unsigned ArgIdx) {
114   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116   IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
117   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
118   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
119   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
120   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
121   IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
122   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
123   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
124   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
125   IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
126   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
127   Alignment = Call->getParamStackAlign(ArgIdx);
128   IndirectType = nullptr;
129   assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
130          "multiple ABI attributes?");
131   if (IsByVal) {
132     IndirectType = Call->getParamByValType(ArgIdx);
133     if (!Alignment)
134       Alignment = Call->getParamAlign(ArgIdx);
135   }
136   if (IsPreallocated)
137     IndirectType = Call->getParamPreallocatedType(ArgIdx);
138   if (IsInAlloca)
139     IndirectType = Call->getParamInAllocaType(ArgIdx);
140   if (IsSRet)
141     IndirectType = Call->getParamStructRetType(ArgIdx);
142 }
143 
144 /// Generate a libcall taking the given operands as arguments and returning a
145 /// result of type RetVT.
146 std::pair<SDValue, SDValue>
147 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
148                             ArrayRef<SDValue> Ops,
149                             MakeLibCallOptions CallOptions,
150                             const SDLoc &dl,
151                             SDValue InChain) const {
152   if (!InChain)
153     InChain = DAG.getEntryNode();
154 
155   TargetLowering::ArgListTy Args;
156   Args.reserve(Ops.size());
157 
158   TargetLowering::ArgListEntry Entry;
159   for (unsigned i = 0; i < Ops.size(); ++i) {
160     SDValue NewOp = Ops[i];
161     Entry.Node = NewOp;
162     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
163     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
164                                                  CallOptions.IsSExt);
165     Entry.IsZExt = !Entry.IsSExt;
166 
167     if (CallOptions.IsSoften &&
168         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
169       Entry.IsSExt = Entry.IsZExt = false;
170     }
171     Args.push_back(Entry);
172   }
173 
174   if (LC == RTLIB::UNKNOWN_LIBCALL)
175     report_fatal_error("Unsupported library call operation!");
176   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
177                                          getPointerTy(DAG.getDataLayout()));
178 
179   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180   TargetLowering::CallLoweringInfo CLI(DAG);
181   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
182   bool zeroExtend = !signExtend;
183 
184   if (CallOptions.IsSoften &&
185       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
186     signExtend = zeroExtend = false;
187   }
188 
189   CLI.setDebugLoc(dl)
190       .setChain(InChain)
191       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
192       .setNoReturn(CallOptions.DoesNotReturn)
193       .setDiscardResult(!CallOptions.IsReturnValueUsed)
194       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
195       .setSExtResult(signExtend)
196       .setZExtResult(zeroExtend);
197   return LowerCallTo(CLI);
198 }
199 
200 bool TargetLowering::findOptimalMemOpLowering(
201     std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
202     unsigned SrcAS, const AttributeList &FuncAttributes) const {
203   if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
204       Op.getSrcAlign() < Op.getDstAlign())
205     return false;
206 
207   EVT VT = getOptimalMemOpType(Op, FuncAttributes);
208 
209   if (VT == MVT::Other) {
210     // Use the largest integer type whose alignment constraints are satisfied.
211     // We only need to check DstAlign here as SrcAlign is always greater or
212     // equal to DstAlign (or zero).
213     VT = MVT::LAST_INTEGER_VALUETYPE;
214     if (Op.isFixedDstAlign())
215       while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
216              !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
217         VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
218     assert(VT.isInteger());
219 
220     // Find the largest legal integer type.
221     MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
222     while (!isTypeLegal(LVT))
223       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
224     assert(LVT.isInteger());
225 
226     // If the type we've chosen is larger than the largest legal integer type
227     // then use that instead.
228     if (VT.bitsGT(LVT))
229       VT = LVT;
230   }
231 
232   unsigned NumMemOps = 0;
233   uint64_t Size = Op.size();
234   while (Size) {
235     unsigned VTSize = VT.getSizeInBits() / 8;
236     while (VTSize > Size) {
237       // For now, only use non-vector load / store's for the left-over pieces.
238       EVT NewVT = VT;
239       unsigned NewVTSize;
240 
241       bool Found = false;
242       if (VT.isVector() || VT.isFloatingPoint()) {
243         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
244         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
245             isSafeMemOpType(NewVT.getSimpleVT()))
246           Found = true;
247         else if (NewVT == MVT::i64 &&
248                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
249                  isSafeMemOpType(MVT::f64)) {
250           // i64 is usually not legal on 32-bit targets, but f64 may be.
251           NewVT = MVT::f64;
252           Found = true;
253         }
254       }
255 
256       if (!Found) {
257         do {
258           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
259           if (NewVT == MVT::i8)
260             break;
261         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
262       }
263       NewVTSize = NewVT.getSizeInBits() / 8;
264 
265       // If the new VT cannot cover all of the remaining bits, then consider
266       // issuing a (or a pair of) unaligned and overlapping load / store.
267       unsigned Fast;
268       if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
269           allowsMisalignedMemoryAccesses(
270               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
271               MachineMemOperand::MONone, &Fast) &&
272           Fast)
273         VTSize = Size;
274       else {
275         VT = NewVT;
276         VTSize = NewVTSize;
277       }
278     }
279 
280     if (++NumMemOps > Limit)
281       return false;
282 
283     MemOps.push_back(VT);
284     Size -= VTSize;
285   }
286 
287   return true;
288 }
289 
290 /// Soften the operands of a comparison. This code is shared among BR_CC,
291 /// SELECT_CC, and SETCC handlers.
292 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
293                                          SDValue &NewLHS, SDValue &NewRHS,
294                                          ISD::CondCode &CCCode,
295                                          const SDLoc &dl, const SDValue OldLHS,
296                                          const SDValue OldRHS) const {
297   SDValue Chain;
298   return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
299                              OldRHS, Chain);
300 }
301 
302 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
303                                          SDValue &NewLHS, SDValue &NewRHS,
304                                          ISD::CondCode &CCCode,
305                                          const SDLoc &dl, const SDValue OldLHS,
306                                          const SDValue OldRHS,
307                                          SDValue &Chain,
308                                          bool IsSignaling) const {
309   // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
310   // not supporting it. We can update this code when libgcc provides such
311   // functions.
312 
313   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
314          && "Unsupported setcc type!");
315 
316   // Expand into one or more soft-fp libcall(s).
317   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
318   bool ShouldInvertCC = false;
319   switch (CCCode) {
320   case ISD::SETEQ:
321   case ISD::SETOEQ:
322     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
323           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
324           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
325     break;
326   case ISD::SETNE:
327   case ISD::SETUNE:
328     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
329           (VT == MVT::f64) ? RTLIB::UNE_F64 :
330           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
331     break;
332   case ISD::SETGE:
333   case ISD::SETOGE:
334     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
335           (VT == MVT::f64) ? RTLIB::OGE_F64 :
336           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
337     break;
338   case ISD::SETLT:
339   case ISD::SETOLT:
340     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
341           (VT == MVT::f64) ? RTLIB::OLT_F64 :
342           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
343     break;
344   case ISD::SETLE:
345   case ISD::SETOLE:
346     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
347           (VT == MVT::f64) ? RTLIB::OLE_F64 :
348           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
349     break;
350   case ISD::SETGT:
351   case ISD::SETOGT:
352     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
353           (VT == MVT::f64) ? RTLIB::OGT_F64 :
354           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
355     break;
356   case ISD::SETO:
357     ShouldInvertCC = true;
358     [[fallthrough]];
359   case ISD::SETUO:
360     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
361           (VT == MVT::f64) ? RTLIB::UO_F64 :
362           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
363     break;
364   case ISD::SETONE:
365     // SETONE = O && UNE
366     ShouldInvertCC = true;
367     [[fallthrough]];
368   case ISD::SETUEQ:
369     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
370           (VT == MVT::f64) ? RTLIB::UO_F64 :
371           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
372     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
373           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
374           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
375     break;
376   default:
377     // Invert CC for unordered comparisons
378     ShouldInvertCC = true;
379     switch (CCCode) {
380     case ISD::SETULT:
381       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
382             (VT == MVT::f64) ? RTLIB::OGE_F64 :
383             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
384       break;
385     case ISD::SETULE:
386       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
387             (VT == MVT::f64) ? RTLIB::OGT_F64 :
388             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
389       break;
390     case ISD::SETUGT:
391       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
392             (VT == MVT::f64) ? RTLIB::OLE_F64 :
393             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
394       break;
395     case ISD::SETUGE:
396       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
397             (VT == MVT::f64) ? RTLIB::OLT_F64 :
398             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
399       break;
400     default: llvm_unreachable("Do not know how to soften this setcc!");
401     }
402   }
403 
404   // Use the target specific return value for comparison lib calls.
405   EVT RetVT = getCmpLibcallReturnType();
406   SDValue Ops[2] = {NewLHS, NewRHS};
407   TargetLowering::MakeLibCallOptions CallOptions;
408   EVT OpsVT[2] = { OldLHS.getValueType(),
409                    OldRHS.getValueType() };
410   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
411   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
412   NewLHS = Call.first;
413   NewRHS = DAG.getConstant(0, dl, RetVT);
414 
415   CCCode = getCmpLibcallCC(LC1);
416   if (ShouldInvertCC) {
417     assert(RetVT.isInteger());
418     CCCode = getSetCCInverse(CCCode, RetVT);
419   }
420 
421   if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
422     // Update Chain.
423     Chain = Call.second;
424   } else {
425     EVT SetCCVT =
426         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
427     SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
428     auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
429     CCCode = getCmpLibcallCC(LC2);
430     if (ShouldInvertCC)
431       CCCode = getSetCCInverse(CCCode, RetVT);
432     NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
433     if (Chain)
434       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
435                           Call2.second);
436     NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
437                          Tmp.getValueType(), Tmp, NewLHS);
438     NewRHS = SDValue();
439   }
440 }
441 
442 /// Return the entry encoding for a jump table in the current function. The
443 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444 unsigned TargetLowering::getJumpTableEncoding() const {
445   // In non-pic modes, just use the address of a block.
446   if (!isPositionIndependent())
447     return MachineJumpTableInfo::EK_BlockAddress;
448 
449   // In PIC mode, if the target supports a GPRel32 directive, use it.
450   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
451     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
452 
453   // Otherwise, use a label difference.
454   return MachineJumpTableInfo::EK_LabelDifference32;
455 }
456 
457 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
458                                                  SelectionDAG &DAG) const {
459   // If our PIC model is GP relative, use the global offset table as the base.
460   unsigned JTEncoding = getJumpTableEncoding();
461 
462   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
463       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
464     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
465 
466   return Table;
467 }
468 
469 /// This returns the relocation base for the given PIC jumptable, the same as
470 /// getPICJumpTableRelocBase, but as an MCExpr.
471 const MCExpr *
472 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
473                                              unsigned JTI,MCContext &Ctx) const{
474   // The normal PIC reloc base is the label at the start of the jump table.
475   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
476 }
477 
478 SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
479                                                SDValue Addr, int JTI,
480                                                SelectionDAG &DAG) const {
481   SDValue Chain = Value;
482   // Jump table debug info is only needed if CodeView is enabled.
483   if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
484     Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
485   }
486   return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
487 }
488 
489 bool
490 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
491   const TargetMachine &TM = getTargetMachine();
492   const GlobalValue *GV = GA->getGlobal();
493 
494   // If the address is not even local to this DSO we will have to load it from
495   // a got and then add the offset.
496   if (!TM.shouldAssumeDSOLocal(GV))
497     return false;
498 
499   // If the code is position independent we will have to add a base register.
500   if (isPositionIndependent())
501     return false;
502 
503   // Otherwise we can do it.
504   return true;
505 }
506 
507 //===----------------------------------------------------------------------===//
508 //  Optimization Methods
509 //===----------------------------------------------------------------------===//
510 
511 /// If the specified instruction has a constant integer operand and there are
512 /// bits set in that constant that are not demanded, then clear those bits and
513 /// return true.
514 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
515                                             const APInt &DemandedBits,
516                                             const APInt &DemandedElts,
517                                             TargetLoweringOpt &TLO) const {
518   SDLoc DL(Op);
519   unsigned Opcode = Op.getOpcode();
520 
521   // Early-out if we've ended up calling an undemanded node, leave this to
522   // constant folding.
523   if (DemandedBits.isZero() || DemandedElts.isZero())
524     return false;
525 
526   // Do target-specific constant optimization.
527   if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
528     return TLO.New.getNode();
529 
530   // FIXME: ISD::SELECT, ISD::SELECT_CC
531   switch (Opcode) {
532   default:
533     break;
534   case ISD::XOR:
535   case ISD::AND:
536   case ISD::OR: {
537     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
538     if (!Op1C || Op1C->isOpaque())
539       return false;
540 
541     // If this is a 'not' op, don't touch it because that's a canonical form.
542     const APInt &C = Op1C->getAPIntValue();
543     if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
544       return false;
545 
546     if (!C.isSubsetOf(DemandedBits)) {
547       EVT VT = Op.getValueType();
548       SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
549       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
550                                       Op->getFlags());
551       return TLO.CombineTo(Op, NewOp);
552     }
553 
554     break;
555   }
556   }
557 
558   return false;
559 }
560 
561 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
562                                             const APInt &DemandedBits,
563                                             TargetLoweringOpt &TLO) const {
564   EVT VT = Op.getValueType();
565   APInt DemandedElts = VT.isVector()
566                            ? APInt::getAllOnes(VT.getVectorNumElements())
567                            : APInt(1, 1);
568   return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
569 }
570 
571 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
572 /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
573 /// but it could be generalized for targets with other types of implicit
574 /// widening casts.
575 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
576                                       const APInt &DemandedBits,
577                                       TargetLoweringOpt &TLO) const {
578   assert(Op.getNumOperands() == 2 &&
579          "ShrinkDemandedOp only supports binary operators!");
580   assert(Op.getNode()->getNumValues() == 1 &&
581          "ShrinkDemandedOp only supports nodes with one result!");
582 
583   EVT VT = Op.getValueType();
584   SelectionDAG &DAG = TLO.DAG;
585   SDLoc dl(Op);
586 
587   // Early return, as this function cannot handle vector types.
588   if (VT.isVector())
589     return false;
590 
591   assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
592          Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
593          "ShrinkDemandedOp only supports operands that have the same size!");
594 
595   // Don't do this if the node has another user, which may require the
596   // full value.
597   if (!Op.getNode()->hasOneUse())
598     return false;
599 
600   // Search for the smallest integer type with free casts to and from
601   // Op's type. For expedience, just check power-of-2 integer types.
602   unsigned DemandedSize = DemandedBits.getActiveBits();
603   for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
604        SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
605     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
606     if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
607       // We found a type with free casts.
608       SDValue X = DAG.getNode(
609           Op.getOpcode(), dl, SmallVT,
610           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
611           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
612       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
613       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
614       return TLO.CombineTo(Op, Z);
615     }
616   }
617   return false;
618 }
619 
620 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
621                                           DAGCombinerInfo &DCI) const {
622   SelectionDAG &DAG = DCI.DAG;
623   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
624                         !DCI.isBeforeLegalizeOps());
625   KnownBits Known;
626 
627   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
628   if (Simplified) {
629     DCI.AddToWorklist(Op.getNode());
630     DCI.CommitTargetLoweringOpt(TLO);
631   }
632   return Simplified;
633 }
634 
635 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
636                                           const APInt &DemandedElts,
637                                           DAGCombinerInfo &DCI) const {
638   SelectionDAG &DAG = DCI.DAG;
639   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
640                         !DCI.isBeforeLegalizeOps());
641   KnownBits Known;
642 
643   bool Simplified =
644       SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
645   if (Simplified) {
646     DCI.AddToWorklist(Op.getNode());
647     DCI.CommitTargetLoweringOpt(TLO);
648   }
649   return Simplified;
650 }
651 
652 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
653                                           KnownBits &Known,
654                                           TargetLoweringOpt &TLO,
655                                           unsigned Depth,
656                                           bool AssumeSingleUse) const {
657   EVT VT = Op.getValueType();
658 
659   // Since the number of lanes in a scalable vector is unknown at compile time,
660   // we track one bit which is implicitly broadcast to all lanes.  This means
661   // that all lanes in a scalable vector are considered demanded.
662   APInt DemandedElts = VT.isFixedLengthVector()
663                            ? APInt::getAllOnes(VT.getVectorNumElements())
664                            : APInt(1, 1);
665   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
666                               AssumeSingleUse);
667 }
668 
669 // TODO: Under what circumstances can we create nodes? Constant folding?
670 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
671     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
672     SelectionDAG &DAG, unsigned Depth) const {
673   EVT VT = Op.getValueType();
674 
675   // Limit search depth.
676   if (Depth >= SelectionDAG::MaxRecursionDepth)
677     return SDValue();
678 
679   // Ignore UNDEFs.
680   if (Op.isUndef())
681     return SDValue();
682 
683   // Not demanding any bits/elts from Op.
684   if (DemandedBits == 0 || DemandedElts == 0)
685     return DAG.getUNDEF(VT);
686 
687   bool IsLE = DAG.getDataLayout().isLittleEndian();
688   unsigned NumElts = DemandedElts.getBitWidth();
689   unsigned BitWidth = DemandedBits.getBitWidth();
690   KnownBits LHSKnown, RHSKnown;
691   switch (Op.getOpcode()) {
692   case ISD::BITCAST: {
693     if (VT.isScalableVector())
694       return SDValue();
695 
696     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
697     EVT SrcVT = Src.getValueType();
698     EVT DstVT = Op.getValueType();
699     if (SrcVT == DstVT)
700       return Src;
701 
702     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
703     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
704     if (NumSrcEltBits == NumDstEltBits)
705       if (SDValue V = SimplifyMultipleUseDemandedBits(
706               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
707         return DAG.getBitcast(DstVT, V);
708 
709     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
710       unsigned Scale = NumDstEltBits / NumSrcEltBits;
711       unsigned NumSrcElts = SrcVT.getVectorNumElements();
712       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
713       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
714       for (unsigned i = 0; i != Scale; ++i) {
715         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
716         unsigned BitOffset = EltOffset * NumSrcEltBits;
717         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
718         if (!Sub.isZero()) {
719           DemandedSrcBits |= Sub;
720           for (unsigned j = 0; j != NumElts; ++j)
721             if (DemandedElts[j])
722               DemandedSrcElts.setBit((j * Scale) + i);
723         }
724       }
725 
726       if (SDValue V = SimplifyMultipleUseDemandedBits(
727               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
728         return DAG.getBitcast(DstVT, V);
729     }
730 
731     // TODO - bigendian once we have test coverage.
732     if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
733       unsigned Scale = NumSrcEltBits / NumDstEltBits;
734       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
735       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
736       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
737       for (unsigned i = 0; i != NumElts; ++i)
738         if (DemandedElts[i]) {
739           unsigned Offset = (i % Scale) * NumDstEltBits;
740           DemandedSrcBits.insertBits(DemandedBits, Offset);
741           DemandedSrcElts.setBit(i / Scale);
742         }
743 
744       if (SDValue V = SimplifyMultipleUseDemandedBits(
745               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
746         return DAG.getBitcast(DstVT, V);
747     }
748 
749     break;
750   }
751   case ISD::FREEZE: {
752     SDValue N0 = Op.getOperand(0);
753     if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
754                                              /*PoisonOnly=*/false))
755       return N0;
756     break;
757   }
758   case ISD::AND: {
759     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
760     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
761 
762     // If all of the demanded bits are known 1 on one side, return the other.
763     // These bits cannot contribute to the result of the 'and' in this
764     // context.
765     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
766       return Op.getOperand(0);
767     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
768       return Op.getOperand(1);
769     break;
770   }
771   case ISD::OR: {
772     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
773     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
774 
775     // If all of the demanded bits are known zero on one side, return the
776     // other.  These bits cannot contribute to the result of the 'or' in this
777     // context.
778     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
779       return Op.getOperand(0);
780     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
781       return Op.getOperand(1);
782     break;
783   }
784   case ISD::XOR: {
785     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
786     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
787 
788     // If all of the demanded bits are known zero on one side, return the
789     // other.
790     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
791       return Op.getOperand(0);
792     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
793       return Op.getOperand(1);
794     break;
795   }
796   case ISD::SHL: {
797     // If we are only demanding sign bits then we can use the shift source
798     // directly.
799     if (std::optional<uint64_t> MaxSA =
800             DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
801       SDValue Op0 = Op.getOperand(0);
802       unsigned ShAmt = *MaxSA;
803       unsigned NumSignBits =
804           DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
805       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
806       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
807         return Op0;
808     }
809     break;
810   }
811   case ISD::SETCC: {
812     SDValue Op0 = Op.getOperand(0);
813     SDValue Op1 = Op.getOperand(1);
814     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
815     // If (1) we only need the sign-bit, (2) the setcc operands are the same
816     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
817     // -1, we may be able to bypass the setcc.
818     if (DemandedBits.isSignMask() &&
819         Op0.getScalarValueSizeInBits() == BitWidth &&
820         getBooleanContents(Op0.getValueType()) ==
821             BooleanContent::ZeroOrNegativeOneBooleanContent) {
822       // If we're testing X < 0, then this compare isn't needed - just use X!
823       // FIXME: We're limiting to integer types here, but this should also work
824       // if we don't care about FP signed-zero. The use of SETLT with FP means
825       // that we don't care about NaNs.
826       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
827           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
828         return Op0;
829     }
830     break;
831   }
832   case ISD::SIGN_EXTEND_INREG: {
833     // If none of the extended bits are demanded, eliminate the sextinreg.
834     SDValue Op0 = Op.getOperand(0);
835     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
836     unsigned ExBits = ExVT.getScalarSizeInBits();
837     if (DemandedBits.getActiveBits() <= ExBits &&
838         shouldRemoveRedundantExtend(Op))
839       return Op0;
840     // If the input is already sign extended, just drop the extension.
841     unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
842     if (NumSignBits >= (BitWidth - ExBits + 1))
843       return Op0;
844     break;
845   }
846   case ISD::ANY_EXTEND_VECTOR_INREG:
847   case ISD::SIGN_EXTEND_VECTOR_INREG:
848   case ISD::ZERO_EXTEND_VECTOR_INREG: {
849     if (VT.isScalableVector())
850       return SDValue();
851 
852     // If we only want the lowest element and none of extended bits, then we can
853     // return the bitcasted source vector.
854     SDValue Src = Op.getOperand(0);
855     EVT SrcVT = Src.getValueType();
856     EVT DstVT = Op.getValueType();
857     if (IsLE && DemandedElts == 1 &&
858         DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
859         DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
860       return DAG.getBitcast(DstVT, Src);
861     }
862     break;
863   }
864   case ISD::INSERT_VECTOR_ELT: {
865     if (VT.isScalableVector())
866       return SDValue();
867 
868     // If we don't demand the inserted element, return the base vector.
869     SDValue Vec = Op.getOperand(0);
870     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
871     EVT VecVT = Vec.getValueType();
872     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
873         !DemandedElts[CIdx->getZExtValue()])
874       return Vec;
875     break;
876   }
877   case ISD::INSERT_SUBVECTOR: {
878     if (VT.isScalableVector())
879       return SDValue();
880 
881     SDValue Vec = Op.getOperand(0);
882     SDValue Sub = Op.getOperand(1);
883     uint64_t Idx = Op.getConstantOperandVal(2);
884     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
885     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
886     // If we don't demand the inserted subvector, return the base vector.
887     if (DemandedSubElts == 0)
888       return Vec;
889     break;
890   }
891   case ISD::VECTOR_SHUFFLE: {
892     assert(!VT.isScalableVector());
893     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
894 
895     // If all the demanded elts are from one operand and are inline,
896     // then we can use the operand directly.
897     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
898     for (unsigned i = 0; i != NumElts; ++i) {
899       int M = ShuffleMask[i];
900       if (M < 0 || !DemandedElts[i])
901         continue;
902       AllUndef = false;
903       IdentityLHS &= (M == (int)i);
904       IdentityRHS &= ((M - NumElts) == i);
905     }
906 
907     if (AllUndef)
908       return DAG.getUNDEF(Op.getValueType());
909     if (IdentityLHS)
910       return Op.getOperand(0);
911     if (IdentityRHS)
912       return Op.getOperand(1);
913     break;
914   }
915   default:
916     // TODO: Probably okay to remove after audit; here to reduce change size
917     // in initial enablement patch for scalable vectors
918     if (VT.isScalableVector())
919       return SDValue();
920 
921     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
922       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
923               Op, DemandedBits, DemandedElts, DAG, Depth))
924         return V;
925     break;
926   }
927   return SDValue();
928 }
929 
930 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
931     SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
932     unsigned Depth) const {
933   EVT VT = Op.getValueType();
934   // Since the number of lanes in a scalable vector is unknown at compile time,
935   // we track one bit which is implicitly broadcast to all lanes.  This means
936   // that all lanes in a scalable vector are considered demanded.
937   APInt DemandedElts = VT.isFixedLengthVector()
938                            ? APInt::getAllOnes(VT.getVectorNumElements())
939                            : APInt(1, 1);
940   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
941                                          Depth);
942 }
943 
944 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
945     SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
946     unsigned Depth) const {
947   APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
948   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
949                                          Depth);
950 }
951 
952 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
953 //      or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
954 static SDValue combineShiftToAVG(SDValue Op,
955                                  TargetLowering::TargetLoweringOpt &TLO,
956                                  const TargetLowering &TLI,
957                                  const APInt &DemandedBits,
958                                  const APInt &DemandedElts, unsigned Depth) {
959   assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
960          "SRL or SRA node is required here!");
961   // Is the right shift using an immediate value of 1?
962   ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
963   if (!N1C || !N1C->isOne())
964     return SDValue();
965 
966   // We are looking for an avgfloor
967   // add(ext, ext)
968   // or one of these as a avgceil
969   // add(add(ext, ext), 1)
970   // add(add(ext, 1), ext)
971   // add(ext, add(ext, 1))
972   SDValue Add = Op.getOperand(0);
973   if (Add.getOpcode() != ISD::ADD)
974     return SDValue();
975 
976   SDValue ExtOpA = Add.getOperand(0);
977   SDValue ExtOpB = Add.getOperand(1);
978   SDValue Add2;
979   auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
980     ConstantSDNode *ConstOp;
981     if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
982         ConstOp->isOne()) {
983       ExtOpA = Op1;
984       ExtOpB = Op3;
985       Add2 = A;
986       return true;
987     }
988     if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
989         ConstOp->isOne()) {
990       ExtOpA = Op1;
991       ExtOpB = Op2;
992       Add2 = A;
993       return true;
994     }
995     return false;
996   };
997   bool IsCeil =
998       (ExtOpA.getOpcode() == ISD::ADD &&
999        MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1000       (ExtOpB.getOpcode() == ISD::ADD &&
1001        MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1002 
1003   // If the shift is signed (sra):
1004   //  - Needs >= 2 sign bit for both operands.
1005   //  - Needs >= 2 zero bits.
1006   // If the shift is unsigned (srl):
1007   //  - Needs >= 1 zero bit for both operands.
1008   //  - Needs 1 demanded bit zero and >= 2 sign bits.
1009   SelectionDAG &DAG = TLO.DAG;
1010   unsigned ShiftOpc = Op.getOpcode();
1011   bool IsSigned = false;
1012   unsigned KnownBits;
1013   unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1014   unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1015   unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1016   unsigned NumZeroA =
1017       DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1018   unsigned NumZeroB =
1019       DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1020   unsigned NumZero = std::min(NumZeroA, NumZeroB);
1021 
1022   switch (ShiftOpc) {
1023   default:
1024     llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1025   case ISD::SRA: {
1026     if (NumZero >= 2 && NumSigned < NumZero) {
1027       IsSigned = false;
1028       KnownBits = NumZero;
1029       break;
1030     }
1031     if (NumSigned >= 1) {
1032       IsSigned = true;
1033       KnownBits = NumSigned;
1034       break;
1035     }
1036     return SDValue();
1037   }
1038   case ISD::SRL: {
1039     if (NumZero >= 1 && NumSigned < NumZero) {
1040       IsSigned = false;
1041       KnownBits = NumZero;
1042       break;
1043     }
1044     if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1045       IsSigned = true;
1046       KnownBits = NumSigned;
1047       break;
1048     }
1049     return SDValue();
1050   }
1051   }
1052 
1053   unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1054                            : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1055 
1056   // Find the smallest power-2 type that is legal for this vector size and
1057   // operation, given the original type size and the number of known sign/zero
1058   // bits.
1059   EVT VT = Op.getValueType();
1060   unsigned MinWidth =
1061       std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1062   EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1063   if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1064     return SDValue();
1065   if (VT.isVector())
1066     NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1067   if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1068     // If we could not transform, and (both) adds are nuw/nsw, we can use the
1069     // larger type size to do the transform.
1070     if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1071       return SDValue();
1072     if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1073                                Add.getOperand(1)) &&
1074         (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1075                                          Add2.getOperand(1))))
1076       NVT = VT;
1077     else
1078       return SDValue();
1079   }
1080 
1081   // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1082   // this is likely to stop other folds (reassociation, value tracking etc.)
1083   if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1084       (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1085     return SDValue();
1086 
1087   SDLoc DL(Op);
1088   SDValue ResultAVG =
1089       DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1090                   DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1091   return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1092 }
1093 
1094 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1095 /// result of Op are ever used downstream. If we can use this information to
1096 /// simplify Op, create a new simplified DAG node and return true, returning the
1097 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1098 /// return a mask of Known bits for the expression (used to simplify the
1099 /// caller).  The Known bits may only be accurate for those bits in the
1100 /// OriginalDemandedBits and OriginalDemandedElts.
1101 bool TargetLowering::SimplifyDemandedBits(
1102     SDValue Op, const APInt &OriginalDemandedBits,
1103     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1104     unsigned Depth, bool AssumeSingleUse) const {
1105   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1106   assert(Op.getScalarValueSizeInBits() == BitWidth &&
1107          "Mask size mismatches value type size!");
1108 
1109   // Don't know anything.
1110   Known = KnownBits(BitWidth);
1111 
1112   EVT VT = Op.getValueType();
1113   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1114   unsigned NumElts = OriginalDemandedElts.getBitWidth();
1115   assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1116          "Unexpected vector size");
1117 
1118   APInt DemandedBits = OriginalDemandedBits;
1119   APInt DemandedElts = OriginalDemandedElts;
1120   SDLoc dl(Op);
1121 
1122   // Undef operand.
1123   if (Op.isUndef())
1124     return false;
1125 
1126   // We can't simplify target constants.
1127   if (Op.getOpcode() == ISD::TargetConstant)
1128     return false;
1129 
1130   if (Op.getOpcode() == ISD::Constant) {
1131     // We know all of the bits for a constant!
1132     Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1133     return false;
1134   }
1135 
1136   if (Op.getOpcode() == ISD::ConstantFP) {
1137     // We know all of the bits for a floating point constant!
1138     Known = KnownBits::makeConstant(
1139         cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1140     return false;
1141   }
1142 
1143   // Other users may use these bits.
1144   bool HasMultiUse = false;
1145   if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1146     if (Depth >= SelectionDAG::MaxRecursionDepth) {
1147       // Limit search depth.
1148       return false;
1149     }
1150     // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1151     DemandedBits = APInt::getAllOnes(BitWidth);
1152     DemandedElts = APInt::getAllOnes(NumElts);
1153     HasMultiUse = true;
1154   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1155     // Not demanding any bits/elts from Op.
1156     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1157   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1158     // Limit search depth.
1159     return false;
1160   }
1161 
1162   KnownBits Known2;
1163   switch (Op.getOpcode()) {
1164   case ISD::SCALAR_TO_VECTOR: {
1165     if (VT.isScalableVector())
1166       return false;
1167     if (!DemandedElts[0])
1168       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1169 
1170     KnownBits SrcKnown;
1171     SDValue Src = Op.getOperand(0);
1172     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1173     APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1174     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1175       return true;
1176 
1177     // Upper elements are undef, so only get the knownbits if we just demand
1178     // the bottom element.
1179     if (DemandedElts == 1)
1180       Known = SrcKnown.anyextOrTrunc(BitWidth);
1181     break;
1182   }
1183   case ISD::BUILD_VECTOR:
1184     // Collect the known bits that are shared by every demanded element.
1185     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1186     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1187     return false; // Don't fall through, will infinitely loop.
1188   case ISD::SPLAT_VECTOR: {
1189     SDValue Scl = Op.getOperand(0);
1190     APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1191     KnownBits KnownScl;
1192     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1193       return true;
1194 
1195     // Implicitly truncate the bits to match the official semantics of
1196     // SPLAT_VECTOR.
1197     Known = KnownScl.trunc(BitWidth);
1198     break;
1199   }
1200   case ISD::LOAD: {
1201     auto *LD = cast<LoadSDNode>(Op);
1202     if (getTargetConstantFromLoad(LD)) {
1203       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1204       return false; // Don't fall through, will infinitely loop.
1205     }
1206     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1207       // If this is a ZEXTLoad and we are looking at the loaded value.
1208       EVT MemVT = LD->getMemoryVT();
1209       unsigned MemBits = MemVT.getScalarSizeInBits();
1210       Known.Zero.setBitsFrom(MemBits);
1211       return false; // Don't fall through, will infinitely loop.
1212     }
1213     break;
1214   }
1215   case ISD::INSERT_VECTOR_ELT: {
1216     if (VT.isScalableVector())
1217       return false;
1218     SDValue Vec = Op.getOperand(0);
1219     SDValue Scl = Op.getOperand(1);
1220     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1221     EVT VecVT = Vec.getValueType();
1222 
1223     // If index isn't constant, assume we need all vector elements AND the
1224     // inserted element.
1225     APInt DemandedVecElts(DemandedElts);
1226     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1227       unsigned Idx = CIdx->getZExtValue();
1228       DemandedVecElts.clearBit(Idx);
1229 
1230       // Inserted element is not required.
1231       if (!DemandedElts[Idx])
1232         return TLO.CombineTo(Op, Vec);
1233     }
1234 
1235     KnownBits KnownScl;
1236     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1237     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1238     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1239       return true;
1240 
1241     Known = KnownScl.anyextOrTrunc(BitWidth);
1242 
1243     KnownBits KnownVec;
1244     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1245                              Depth + 1))
1246       return true;
1247 
1248     if (!!DemandedVecElts)
1249       Known = Known.intersectWith(KnownVec);
1250 
1251     return false;
1252   }
1253   case ISD::INSERT_SUBVECTOR: {
1254     if (VT.isScalableVector())
1255       return false;
1256     // Demand any elements from the subvector and the remainder from the src its
1257     // inserted into.
1258     SDValue Src = Op.getOperand(0);
1259     SDValue Sub = Op.getOperand(1);
1260     uint64_t Idx = Op.getConstantOperandVal(2);
1261     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1262     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1263     APInt DemandedSrcElts = DemandedElts;
1264     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1265 
1266     KnownBits KnownSub, KnownSrc;
1267     if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1268                              Depth + 1))
1269       return true;
1270     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1271                              Depth + 1))
1272       return true;
1273 
1274     Known.Zero.setAllBits();
1275     Known.One.setAllBits();
1276     if (!!DemandedSubElts)
1277       Known = Known.intersectWith(KnownSub);
1278     if (!!DemandedSrcElts)
1279       Known = Known.intersectWith(KnownSrc);
1280 
1281     // Attempt to avoid multi-use src if we don't need anything from it.
1282     if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1283         !DemandedSrcElts.isAllOnes()) {
1284       SDValue NewSub = SimplifyMultipleUseDemandedBits(
1285           Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1286       SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1287           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1288       if (NewSub || NewSrc) {
1289         NewSub = NewSub ? NewSub : Sub;
1290         NewSrc = NewSrc ? NewSrc : Src;
1291         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1292                                         Op.getOperand(2));
1293         return TLO.CombineTo(Op, NewOp);
1294       }
1295     }
1296     break;
1297   }
1298   case ISD::EXTRACT_SUBVECTOR: {
1299     if (VT.isScalableVector())
1300       return false;
1301     // Offset the demanded elts by the subvector index.
1302     SDValue Src = Op.getOperand(0);
1303     if (Src.getValueType().isScalableVector())
1304       break;
1305     uint64_t Idx = Op.getConstantOperandVal(1);
1306     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1307     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1308 
1309     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1310                              Depth + 1))
1311       return true;
1312 
1313     // Attempt to avoid multi-use src if we don't need anything from it.
1314     if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1315       SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1316           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1317       if (DemandedSrc) {
1318         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1319                                         Op.getOperand(1));
1320         return TLO.CombineTo(Op, NewOp);
1321       }
1322     }
1323     break;
1324   }
1325   case ISD::CONCAT_VECTORS: {
1326     if (VT.isScalableVector())
1327       return false;
1328     Known.Zero.setAllBits();
1329     Known.One.setAllBits();
1330     EVT SubVT = Op.getOperand(0).getValueType();
1331     unsigned NumSubVecs = Op.getNumOperands();
1332     unsigned NumSubElts = SubVT.getVectorNumElements();
1333     for (unsigned i = 0; i != NumSubVecs; ++i) {
1334       APInt DemandedSubElts =
1335           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1336       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1337                                Known2, TLO, Depth + 1))
1338         return true;
1339       // Known bits are shared by every demanded subvector element.
1340       if (!!DemandedSubElts)
1341         Known = Known.intersectWith(Known2);
1342     }
1343     break;
1344   }
1345   case ISD::VECTOR_SHUFFLE: {
1346     assert(!VT.isScalableVector());
1347     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1348 
1349     // Collect demanded elements from shuffle operands..
1350     APInt DemandedLHS, DemandedRHS;
1351     if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1352                                 DemandedRHS))
1353       break;
1354 
1355     if (!!DemandedLHS || !!DemandedRHS) {
1356       SDValue Op0 = Op.getOperand(0);
1357       SDValue Op1 = Op.getOperand(1);
1358 
1359       Known.Zero.setAllBits();
1360       Known.One.setAllBits();
1361       if (!!DemandedLHS) {
1362         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1363                                  Depth + 1))
1364           return true;
1365         Known = Known.intersectWith(Known2);
1366       }
1367       if (!!DemandedRHS) {
1368         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1369                                  Depth + 1))
1370           return true;
1371         Known = Known.intersectWith(Known2);
1372       }
1373 
1374       // Attempt to avoid multi-use ops if we don't need anything from them.
1375       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1376           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1377       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1378           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1379       if (DemandedOp0 || DemandedOp1) {
1380         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1381         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1382         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1383         return TLO.CombineTo(Op, NewOp);
1384       }
1385     }
1386     break;
1387   }
1388   case ISD::AND: {
1389     SDValue Op0 = Op.getOperand(0);
1390     SDValue Op1 = Op.getOperand(1);
1391 
1392     // If the RHS is a constant, check to see if the LHS would be zero without
1393     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1394     // simplify the LHS, here we're using information from the LHS to simplify
1395     // the RHS.
1396     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1397       // Do not increment Depth here; that can cause an infinite loop.
1398       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1399       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1400       if ((LHSKnown.Zero & DemandedBits) ==
1401           (~RHSC->getAPIntValue() & DemandedBits))
1402         return TLO.CombineTo(Op, Op0);
1403 
1404       // If any of the set bits in the RHS are known zero on the LHS, shrink
1405       // the constant.
1406       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1407                                  DemandedElts, TLO))
1408         return true;
1409 
1410       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1411       // constant, but if this 'and' is only clearing bits that were just set by
1412       // the xor, then this 'and' can be eliminated by shrinking the mask of
1413       // the xor. For example, for a 32-bit X:
1414       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1415       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1416           LHSKnown.One == ~RHSC->getAPIntValue()) {
1417         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1418         return TLO.CombineTo(Op, Xor);
1419       }
1420     }
1421 
1422     // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1423     // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1424     if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1425         (Op0.getOperand(0).isUndef() ||
1426          ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
1427         Op0->hasOneUse()) {
1428       unsigned NumSubElts =
1429           Op0.getOperand(1).getValueType().getVectorNumElements();
1430       unsigned SubIdx = Op0.getConstantOperandVal(2);
1431       APInt DemandedSub =
1432           APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1433       KnownBits KnownSubMask =
1434           TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1435       if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1436         SDValue NewAnd =
1437             TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1438         SDValue NewInsert =
1439             TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1440                             Op0.getOperand(1), Op0.getOperand(2));
1441         return TLO.CombineTo(Op, NewInsert);
1442       }
1443     }
1444 
1445     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1446                              Depth + 1))
1447       return true;
1448     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1449                              Known2, TLO, Depth + 1))
1450       return true;
1451 
1452     // If all of the demanded bits are known one on one side, return the other.
1453     // These bits cannot contribute to the result of the 'and'.
1454     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1455       return TLO.CombineTo(Op, Op0);
1456     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1457       return TLO.CombineTo(Op, Op1);
1458     // If all of the demanded bits in the inputs are known zeros, return zero.
1459     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1460       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1461     // If the RHS is a constant, see if we can simplify it.
1462     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1463                                TLO))
1464       return true;
1465     // If the operation can be done in a smaller type, do so.
1466     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1467       return true;
1468 
1469     // Attempt to avoid multi-use ops if we don't need anything from them.
1470     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1471       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1472           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1473       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1474           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1475       if (DemandedOp0 || DemandedOp1) {
1476         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1477         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1478         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1479         return TLO.CombineTo(Op, NewOp);
1480       }
1481     }
1482 
1483     Known &= Known2;
1484     break;
1485   }
1486   case ISD::OR: {
1487     SDValue Op0 = Op.getOperand(0);
1488     SDValue Op1 = Op.getOperand(1);
1489     SDNodeFlags Flags = Op.getNode()->getFlags();
1490     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1491                              Depth + 1)) {
1492       Op->dropFlags(SDNodeFlags::Disjoint);
1493       return true;
1494     }
1495 
1496     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1497                              Known2, TLO, Depth + 1)) {
1498       Op->dropFlags(SDNodeFlags::Disjoint);
1499       return true;
1500     }
1501 
1502     // If all of the demanded bits are known zero on one side, return the other.
1503     // These bits cannot contribute to the result of the 'or'.
1504     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1505       return TLO.CombineTo(Op, Op0);
1506     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1507       return TLO.CombineTo(Op, Op1);
1508     // If the RHS is a constant, see if we can simplify it.
1509     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1510       return true;
1511     // If the operation can be done in a smaller type, do so.
1512     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1513       return true;
1514 
1515     // Attempt to avoid multi-use ops if we don't need anything from them.
1516     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1517       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1518           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1519       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1520           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1521       if (DemandedOp0 || DemandedOp1) {
1522         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1523         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1524         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1525         return TLO.CombineTo(Op, NewOp);
1526       }
1527     }
1528 
1529     // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1530     // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1531     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1532         Op0->hasOneUse() && Op1->hasOneUse()) {
1533       // Attempt to match all commutations - m_c_Or would've been useful!
1534       for (int I = 0; I != 2; ++I) {
1535         SDValue X = Op.getOperand(I).getOperand(0);
1536         SDValue C1 = Op.getOperand(I).getOperand(1);
1537         SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1538         SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1539         if (Alt.getOpcode() == ISD::OR) {
1540           for (int J = 0; J != 2; ++J) {
1541             if (X == Alt.getOperand(J)) {
1542               SDValue Y = Alt.getOperand(1 - J);
1543               if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1544                                                                {C1, C2})) {
1545                 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1546                 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1547                 return TLO.CombineTo(
1548                     Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1549               }
1550             }
1551           }
1552         }
1553       }
1554     }
1555 
1556     Known |= Known2;
1557     break;
1558   }
1559   case ISD::XOR: {
1560     SDValue Op0 = Op.getOperand(0);
1561     SDValue Op1 = Op.getOperand(1);
1562 
1563     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1564                              Depth + 1))
1565       return true;
1566     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1567                              Depth + 1))
1568       return true;
1569 
1570     // If all of the demanded bits are known zero on one side, return the other.
1571     // These bits cannot contribute to the result of the 'xor'.
1572     if (DemandedBits.isSubsetOf(Known.Zero))
1573       return TLO.CombineTo(Op, Op0);
1574     if (DemandedBits.isSubsetOf(Known2.Zero))
1575       return TLO.CombineTo(Op, Op1);
1576     // If the operation can be done in a smaller type, do so.
1577     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1578       return true;
1579 
1580     // If all of the unknown bits are known to be zero on one side or the other
1581     // turn this into an *inclusive* or.
1582     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1583     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1584       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1585 
1586     ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1587     if (C) {
1588       // If one side is a constant, and all of the set bits in the constant are
1589       // also known set on the other side, turn this into an AND, as we know
1590       // the bits will be cleared.
1591       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1592       // NB: it is okay if more bits are known than are requested
1593       if (C->getAPIntValue() == Known2.One) {
1594         SDValue ANDC =
1595             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1596         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1597       }
1598 
1599       // If the RHS is a constant, see if we can change it. Don't alter a -1
1600       // constant because that's a 'not' op, and that is better for combining
1601       // and codegen.
1602       if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1603         // We're flipping all demanded bits. Flip the undemanded bits too.
1604         SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1605         return TLO.CombineTo(Op, New);
1606       }
1607 
1608       unsigned Op0Opcode = Op0.getOpcode();
1609       if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1610         if (ConstantSDNode *ShiftC =
1611                 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1612           // Don't crash on an oversized shift. We can not guarantee that a
1613           // bogus shift has been simplified to undef.
1614           if (ShiftC->getAPIntValue().ult(BitWidth)) {
1615             uint64_t ShiftAmt = ShiftC->getZExtValue();
1616             APInt Ones = APInt::getAllOnes(BitWidth);
1617             Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1618                                          : Ones.lshr(ShiftAmt);
1619             if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1620                 isDesirableToCommuteXorWithShift(Op.getNode())) {
1621               // If the xor constant is a demanded mask, do a 'not' before the
1622               // shift:
1623               // xor (X << ShiftC), XorC --> (not X) << ShiftC
1624               // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1625               SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1626               return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1627                                                        Op0.getOperand(1)));
1628             }
1629           }
1630         }
1631       }
1632     }
1633 
1634     // If we can't turn this into a 'not', try to shrink the constant.
1635     if (!C || !C->isAllOnes())
1636       if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1637         return true;
1638 
1639     // Attempt to avoid multi-use ops if we don't need anything from them.
1640     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1641       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1642           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1643       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1644           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1645       if (DemandedOp0 || DemandedOp1) {
1646         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1647         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1648         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1649         return TLO.CombineTo(Op, NewOp);
1650       }
1651     }
1652 
1653     Known ^= Known2;
1654     break;
1655   }
1656   case ISD::SELECT:
1657     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1658                              Known, TLO, Depth + 1))
1659       return true;
1660     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1661                              Known2, TLO, Depth + 1))
1662       return true;
1663 
1664     // If the operands are constants, see if we can simplify them.
1665     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1666       return true;
1667 
1668     // Only known if known in both the LHS and RHS.
1669     Known = Known.intersectWith(Known2);
1670     break;
1671   case ISD::VSELECT:
1672     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1673                              Known, TLO, Depth + 1))
1674       return true;
1675     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1676                              Known2, TLO, Depth + 1))
1677       return true;
1678 
1679     // Only known if known in both the LHS and RHS.
1680     Known = Known.intersectWith(Known2);
1681     break;
1682   case ISD::SELECT_CC:
1683     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1684                              Known, TLO, Depth + 1))
1685       return true;
1686     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1687                              Known2, TLO, Depth + 1))
1688       return true;
1689 
1690     // If the operands are constants, see if we can simplify them.
1691     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1692       return true;
1693 
1694     // Only known if known in both the LHS and RHS.
1695     Known = Known.intersectWith(Known2);
1696     break;
1697   case ISD::SETCC: {
1698     SDValue Op0 = Op.getOperand(0);
1699     SDValue Op1 = Op.getOperand(1);
1700     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1701     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1702     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1703     // -1, we may be able to bypass the setcc.
1704     if (DemandedBits.isSignMask() &&
1705         Op0.getScalarValueSizeInBits() == BitWidth &&
1706         getBooleanContents(Op0.getValueType()) ==
1707             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1708       // If we're testing X < 0, then this compare isn't needed - just use X!
1709       // FIXME: We're limiting to integer types here, but this should also work
1710       // if we don't care about FP signed-zero. The use of SETLT with FP means
1711       // that we don't care about NaNs.
1712       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1713           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1714         return TLO.CombineTo(Op, Op0);
1715 
1716       // TODO: Should we check for other forms of sign-bit comparisons?
1717       // Examples: X <= -1, X >= 0
1718     }
1719     if (getBooleanContents(Op0.getValueType()) ==
1720             TargetLowering::ZeroOrOneBooleanContent &&
1721         BitWidth > 1)
1722       Known.Zero.setBitsFrom(1);
1723     break;
1724   }
1725   case ISD::SHL: {
1726     SDValue Op0 = Op.getOperand(0);
1727     SDValue Op1 = Op.getOperand(1);
1728     EVT ShiftVT = Op1.getValueType();
1729 
1730     if (std::optional<uint64_t> KnownSA =
1731             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1732       unsigned ShAmt = *KnownSA;
1733       if (ShAmt == 0)
1734         return TLO.CombineTo(Op, Op0);
1735 
1736       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1737       // single shift.  We can do this if the bottom bits (which are shifted
1738       // out) are never demanded.
1739       // TODO - support non-uniform vector amounts.
1740       if (Op0.getOpcode() == ISD::SRL) {
1741         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1742           if (std::optional<uint64_t> InnerSA =
1743                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1744             unsigned C1 = *InnerSA;
1745             unsigned Opc = ISD::SHL;
1746             int Diff = ShAmt - C1;
1747             if (Diff < 0) {
1748               Diff = -Diff;
1749               Opc = ISD::SRL;
1750             }
1751             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1752             return TLO.CombineTo(
1753                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1754           }
1755         }
1756       }
1757 
1758       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1759       // are not demanded. This will likely allow the anyext to be folded away.
1760       // TODO - support non-uniform vector amounts.
1761       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1762         SDValue InnerOp = Op0.getOperand(0);
1763         EVT InnerVT = InnerOp.getValueType();
1764         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1765         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1766             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1767           SDValue NarrowShl = TLO.DAG.getNode(
1768               ISD::SHL, dl, InnerVT, InnerOp,
1769               TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1770           return TLO.CombineTo(
1771               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1772         }
1773 
1774         // Repeat the SHL optimization above in cases where an extension
1775         // intervenes: (shl (anyext (shr x, c1)), c2) to
1776         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1777         // aren't demanded (as above) and that the shifted upper c1 bits of
1778         // x aren't demanded.
1779         // TODO - support non-uniform vector amounts.
1780         if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1781             InnerOp.hasOneUse()) {
1782           if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1783                   InnerOp, DemandedElts, Depth + 2)) {
1784             unsigned InnerShAmt = *SA2;
1785             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1786                 DemandedBits.getActiveBits() <=
1787                     (InnerBits - InnerShAmt + ShAmt) &&
1788                 DemandedBits.countr_zero() >= ShAmt) {
1789               SDValue NewSA =
1790                   TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1791               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1792                                                InnerOp.getOperand(0));
1793               return TLO.CombineTo(
1794                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1795             }
1796           }
1797         }
1798       }
1799 
1800       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1801       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1802                                Depth + 1)) {
1803         // Disable the nsw and nuw flags. We can no longer guarantee that we
1804         // won't wrap after simplification.
1805         Op->dropFlags(SDNodeFlags::NoWrap);
1806         return true;
1807       }
1808       Known.Zero <<= ShAmt;
1809       Known.One <<= ShAmt;
1810       // low bits known zero.
1811       Known.Zero.setLowBits(ShAmt);
1812 
1813       // Attempt to avoid multi-use ops if we don't need anything from them.
1814       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1815         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1816             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1817         if (DemandedOp0) {
1818           SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1819           return TLO.CombineTo(Op, NewOp);
1820         }
1821       }
1822 
1823       // TODO: Can we merge this fold with the one below?
1824       // Try shrinking the operation as long as the shift amount will still be
1825       // in range.
1826       if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1827           Op.getNode()->hasOneUse()) {
1828         // Search for the smallest integer type with free casts to and from
1829         // Op's type. For expedience, just check power-of-2 integer types.
1830         unsigned DemandedSize = DemandedBits.getActiveBits();
1831         for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1832              SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1833           EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1834           if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1835               isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1836               isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1837               (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1838             assert(DemandedSize <= SmallVTBits &&
1839                    "Narrowed below demanded bits?");
1840             // We found a type with free casts.
1841             SDValue NarrowShl = TLO.DAG.getNode(
1842                 ISD::SHL, dl, SmallVT,
1843                 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1844                 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1845             return TLO.CombineTo(
1846                 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1847           }
1848         }
1849       }
1850 
1851       // Narrow shift to lower half - similar to ShrinkDemandedOp.
1852       // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1853       // Only do this if we demand the upper half so the knownbits are correct.
1854       unsigned HalfWidth = BitWidth / 2;
1855       if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1856           DemandedBits.countLeadingOnes() >= HalfWidth) {
1857         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1858         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1859             isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1860             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1861             (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1862           // If we're demanding the upper bits at all, we must ensure
1863           // that the upper bits of the shift result are known to be zero,
1864           // which is equivalent to the narrow shift being NUW.
1865           if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1866             bool IsNSW = Known.countMinSignBits() > HalfWidth;
1867             SDNodeFlags Flags;
1868             Flags.setNoSignedWrap(IsNSW);
1869             Flags.setNoUnsignedWrap(IsNUW);
1870             SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1871             SDValue NewShiftAmt =
1872                 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1873             SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1874                                                NewShiftAmt, Flags);
1875             SDValue NewExt =
1876                 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1877             return TLO.CombineTo(Op, NewExt);
1878           }
1879         }
1880       }
1881     } else {
1882       // This is a variable shift, so we can't shift the demand mask by a known
1883       // amount. But if we are not demanding high bits, then we are not
1884       // demanding those bits from the pre-shifted operand either.
1885       if (unsigned CTLZ = DemandedBits.countl_zero()) {
1886         APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1887         if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1888                                  Depth + 1)) {
1889           // Disable the nsw and nuw flags. We can no longer guarantee that we
1890           // won't wrap after simplification.
1891           Op->dropFlags(SDNodeFlags::NoWrap);
1892           return true;
1893         }
1894         Known.resetAll();
1895       }
1896     }
1897 
1898     // If we are only demanding sign bits then we can use the shift source
1899     // directly.
1900     if (std::optional<uint64_t> MaxSA =
1901             TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1902       unsigned ShAmt = *MaxSA;
1903       unsigned NumSignBits =
1904           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1905       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1906       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1907         return TLO.CombineTo(Op, Op0);
1908     }
1909     break;
1910   }
1911   case ISD::SRL: {
1912     SDValue Op0 = Op.getOperand(0);
1913     SDValue Op1 = Op.getOperand(1);
1914     EVT ShiftVT = Op1.getValueType();
1915 
1916     if (std::optional<uint64_t> KnownSA =
1917             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1918       unsigned ShAmt = *KnownSA;
1919       if (ShAmt == 0)
1920         return TLO.CombineTo(Op, Op0);
1921 
1922       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1923       // single shift.  We can do this if the top bits (which are shifted out)
1924       // are never demanded.
1925       // TODO - support non-uniform vector amounts.
1926       if (Op0.getOpcode() == ISD::SHL) {
1927         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1928           if (std::optional<uint64_t> InnerSA =
1929                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1930             unsigned C1 = *InnerSA;
1931             unsigned Opc = ISD::SRL;
1932             int Diff = ShAmt - C1;
1933             if (Diff < 0) {
1934               Diff = -Diff;
1935               Opc = ISD::SHL;
1936             }
1937             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1938             return TLO.CombineTo(
1939                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1940           }
1941         }
1942       }
1943 
1944       // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1945       // single sra. We can do this if the top bits are never demanded.
1946       if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1947         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1948           if (std::optional<uint64_t> InnerSA =
1949                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1950             unsigned C1 = *InnerSA;
1951             // Clamp the combined shift amount if it exceeds the bit width.
1952             unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
1953             SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
1954             return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
1955                                                      Op0.getOperand(0), NewSA));
1956           }
1957         }
1958       }
1959 
1960       APInt InDemandedMask = (DemandedBits << ShAmt);
1961 
1962       // If the shift is exact, then it does demand the low bits (and knows that
1963       // they are zero).
1964       if (Op->getFlags().hasExact())
1965         InDemandedMask.setLowBits(ShAmt);
1966 
1967       // Narrow shift to lower half - similar to ShrinkDemandedOp.
1968       // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1969       if ((BitWidth % 2) == 0 && !VT.isVector()) {
1970         APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2);
1971         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1972         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1973             isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1974             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1975             (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1976             ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1977              TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1978           SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1979           SDValue NewShiftAmt =
1980               TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1981           SDValue NewShift =
1982               TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1983           return TLO.CombineTo(
1984               Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1985         }
1986       }
1987 
1988       // Compute the new bits that are at the top now.
1989       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1990                                Depth + 1))
1991         return true;
1992       Known.Zero.lshrInPlace(ShAmt);
1993       Known.One.lshrInPlace(ShAmt);
1994       // High bits known zero.
1995       Known.Zero.setHighBits(ShAmt);
1996 
1997       // Attempt to avoid multi-use ops if we don't need anything from them.
1998       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1999         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2000             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2001         if (DemandedOp0) {
2002           SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2003           return TLO.CombineTo(Op, NewOp);
2004         }
2005       }
2006     } else {
2007       // Use generic knownbits computation as it has support for non-uniform
2008       // shift amounts.
2009       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2010     }
2011 
2012     // Try to match AVG patterns (after shift simplification).
2013     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2014                                         DemandedElts, Depth + 1))
2015       return TLO.CombineTo(Op, AVG);
2016 
2017     break;
2018   }
2019   case ISD::SRA: {
2020     SDValue Op0 = Op.getOperand(0);
2021     SDValue Op1 = Op.getOperand(1);
2022     EVT ShiftVT = Op1.getValueType();
2023 
2024     // If we only want bits that already match the signbit then we don't need
2025     // to shift.
2026     unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2027     if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2028         NumHiDemandedBits)
2029       return TLO.CombineTo(Op, Op0);
2030 
2031     // If this is an arithmetic shift right and only the low-bit is set, we can
2032     // always convert this into a logical shr, even if the shift amount is
2033     // variable.  The low bit of the shift cannot be an input sign bit unless
2034     // the shift amount is >= the size of the datatype, which is undefined.
2035     if (DemandedBits.isOne())
2036       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2037 
2038     if (std::optional<uint64_t> KnownSA =
2039             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2040       unsigned ShAmt = *KnownSA;
2041       if (ShAmt == 0)
2042         return TLO.CombineTo(Op, Op0);
2043 
2044       // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2045       // supports sext_inreg.
2046       if (Op0.getOpcode() == ISD::SHL) {
2047         if (std::optional<uint64_t> InnerSA =
2048                 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2049           unsigned LowBits = BitWidth - ShAmt;
2050           EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2051           if (VT.isVector())
2052             ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2053                                      VT.getVectorElementCount());
2054 
2055           if (*InnerSA == ShAmt) {
2056             if (!TLO.LegalOperations() ||
2057                 getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
2058               return TLO.CombineTo(
2059                   Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2060                                       Op0.getOperand(0),
2061                                       TLO.DAG.getValueType(ExtVT)));
2062 
2063             // Even if we can't convert to sext_inreg, we might be able to
2064             // remove this shift pair if the input is already sign extended.
2065             unsigned NumSignBits =
2066                 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2067             if (NumSignBits > ShAmt)
2068               return TLO.CombineTo(Op, Op0.getOperand(0));
2069           }
2070         }
2071       }
2072 
2073       APInt InDemandedMask = (DemandedBits << ShAmt);
2074 
2075       // If the shift is exact, then it does demand the low bits (and knows that
2076       // they are zero).
2077       if (Op->getFlags().hasExact())
2078         InDemandedMask.setLowBits(ShAmt);
2079 
2080       // If any of the demanded bits are produced by the sign extension, we also
2081       // demand the input sign bit.
2082       if (DemandedBits.countl_zero() < ShAmt)
2083         InDemandedMask.setSignBit();
2084 
2085       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2086                                Depth + 1))
2087         return true;
2088       Known.Zero.lshrInPlace(ShAmt);
2089       Known.One.lshrInPlace(ShAmt);
2090 
2091       // If the input sign bit is known to be zero, or if none of the top bits
2092       // are demanded, turn this into an unsigned shift right.
2093       if (Known.Zero[BitWidth - ShAmt - 1] ||
2094           DemandedBits.countl_zero() >= ShAmt) {
2095         SDNodeFlags Flags;
2096         Flags.setExact(Op->getFlags().hasExact());
2097         return TLO.CombineTo(
2098             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2099       }
2100 
2101       int Log2 = DemandedBits.exactLogBase2();
2102       if (Log2 >= 0) {
2103         // The bit must come from the sign.
2104         SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2105         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2106       }
2107 
2108       if (Known.One[BitWidth - ShAmt - 1])
2109         // New bits are known one.
2110         Known.One.setHighBits(ShAmt);
2111 
2112       // Attempt to avoid multi-use ops if we don't need anything from them.
2113       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2114         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2115             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2116         if (DemandedOp0) {
2117           SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2118           return TLO.CombineTo(Op, NewOp);
2119         }
2120       }
2121     }
2122 
2123     // Try to match AVG patterns (after shift simplification).
2124     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2125                                         DemandedElts, Depth + 1))
2126       return TLO.CombineTo(Op, AVG);
2127 
2128     break;
2129   }
2130   case ISD::FSHL:
2131   case ISD::FSHR: {
2132     SDValue Op0 = Op.getOperand(0);
2133     SDValue Op1 = Op.getOperand(1);
2134     SDValue Op2 = Op.getOperand(2);
2135     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2136 
2137     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2138       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2139 
2140       // For fshl, 0-shift returns the 1st arg.
2141       // For fshr, 0-shift returns the 2nd arg.
2142       if (Amt == 0) {
2143         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2144                                  Known, TLO, Depth + 1))
2145           return true;
2146         break;
2147       }
2148 
2149       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2150       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2151       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2152       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2153       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2154                                Depth + 1))
2155         return true;
2156       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2157                                Depth + 1))
2158         return true;
2159 
2160       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2161       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2162       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2163       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2164       Known = Known.unionWith(Known2);
2165 
2166       // Attempt to avoid multi-use ops if we don't need anything from them.
2167       if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2168           !DemandedElts.isAllOnes()) {
2169         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2170             Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2171         SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2172             Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2173         if (DemandedOp0 || DemandedOp1) {
2174           DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2175           DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2176           SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2177                                           DemandedOp1, Op2);
2178           return TLO.CombineTo(Op, NewOp);
2179         }
2180       }
2181     }
2182 
2183     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2184     if (isPowerOf2_32(BitWidth)) {
2185       APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2186       if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2187                                Known2, TLO, Depth + 1))
2188         return true;
2189     }
2190     break;
2191   }
2192   case ISD::ROTL:
2193   case ISD::ROTR: {
2194     SDValue Op0 = Op.getOperand(0);
2195     SDValue Op1 = Op.getOperand(1);
2196     bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2197 
2198     // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2199     if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2200       return TLO.CombineTo(Op, Op0);
2201 
2202     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2203       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2204       unsigned RevAmt = BitWidth - Amt;
2205 
2206       // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2207       // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2208       APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2209       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2210                                Depth + 1))
2211         return true;
2212 
2213       // rot*(x, 0) --> x
2214       if (Amt == 0)
2215         return TLO.CombineTo(Op, Op0);
2216 
2217       // See if we don't demand either half of the rotated bits.
2218       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2219           DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2220         Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2221         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2222       }
2223       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2224           DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2225         Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2226         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2227       }
2228     }
2229 
2230     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2231     if (isPowerOf2_32(BitWidth)) {
2232       APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2233       if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2234                                Depth + 1))
2235         return true;
2236     }
2237     break;
2238   }
2239   case ISD::SMIN:
2240   case ISD::SMAX:
2241   case ISD::UMIN:
2242   case ISD::UMAX: {
2243     unsigned Opc = Op.getOpcode();
2244     SDValue Op0 = Op.getOperand(0);
2245     SDValue Op1 = Op.getOperand(1);
2246 
2247     // If we're only demanding signbits, then we can simplify to OR/AND node.
2248     unsigned BitOp =
2249         (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2250     unsigned NumSignBits =
2251         std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2252                  TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2253     unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2254     if (NumSignBits >= NumDemandedUpperBits)
2255       return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2256 
2257     // Check if one arg is always less/greater than (or equal) to the other arg.
2258     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2259     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2260     switch (Opc) {
2261     case ISD::SMIN:
2262       if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2263         return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2264       if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2265         return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2266       Known = KnownBits::smin(Known0, Known1);
2267       break;
2268     case ISD::SMAX:
2269       if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2270         return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2271       if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2272         return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2273       Known = KnownBits::smax(Known0, Known1);
2274       break;
2275     case ISD::UMIN:
2276       if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2277         return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2278       if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2279         return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2280       Known = KnownBits::umin(Known0, Known1);
2281       break;
2282     case ISD::UMAX:
2283       if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2284         return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2285       if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2286         return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2287       Known = KnownBits::umax(Known0, Known1);
2288       break;
2289     }
2290     break;
2291   }
2292   case ISD::BITREVERSE: {
2293     SDValue Src = Op.getOperand(0);
2294     APInt DemandedSrcBits = DemandedBits.reverseBits();
2295     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2296                              Depth + 1))
2297       return true;
2298     Known.One = Known2.One.reverseBits();
2299     Known.Zero = Known2.Zero.reverseBits();
2300     break;
2301   }
2302   case ISD::BSWAP: {
2303     SDValue Src = Op.getOperand(0);
2304 
2305     // If the only bits demanded come from one byte of the bswap result,
2306     // just shift the input byte into position to eliminate the bswap.
2307     unsigned NLZ = DemandedBits.countl_zero();
2308     unsigned NTZ = DemandedBits.countr_zero();
2309 
2310     // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
2311     // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
2312     // have 14 leading zeros, round to 8.
2313     NLZ = alignDown(NLZ, 8);
2314     NTZ = alignDown(NTZ, 8);
2315     // If we need exactly one byte, we can do this transformation.
2316     if (BitWidth - NLZ - NTZ == 8) {
2317       // Replace this with either a left or right shift to get the byte into
2318       // the right place.
2319       unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2320       if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2321         unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2322         SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2323         SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2324         return TLO.CombineTo(Op, NewOp);
2325       }
2326     }
2327 
2328     APInt DemandedSrcBits = DemandedBits.byteSwap();
2329     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2330                              Depth + 1))
2331       return true;
2332     Known.One = Known2.One.byteSwap();
2333     Known.Zero = Known2.Zero.byteSwap();
2334     break;
2335   }
2336   case ISD::CTPOP: {
2337     // If only 1 bit is demanded, replace with PARITY as long as we're before
2338     // op legalization.
2339     // FIXME: Limit to scalars for now.
2340     if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2341       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2342                                                Op.getOperand(0)));
2343 
2344     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2345     break;
2346   }
2347   case ISD::SIGN_EXTEND_INREG: {
2348     SDValue Op0 = Op.getOperand(0);
2349     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2350     unsigned ExVTBits = ExVT.getScalarSizeInBits();
2351 
2352     // If we only care about the highest bit, don't bother shifting right.
2353     if (DemandedBits.isSignMask()) {
2354       unsigned MinSignedBits =
2355           TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2356       bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2357       // However if the input is already sign extended we expect the sign
2358       // extension to be dropped altogether later and do not simplify.
2359       if (!AlreadySignExtended) {
2360         // Compute the correct shift amount type, which must be getShiftAmountTy
2361         // for scalar types after legalization.
2362         SDValue ShiftAmt =
2363             TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2364         return TLO.CombineTo(Op,
2365                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2366       }
2367     }
2368 
2369     // If none of the extended bits are demanded, eliminate the sextinreg.
2370     if (DemandedBits.getActiveBits() <= ExVTBits)
2371       return TLO.CombineTo(Op, Op0);
2372 
2373     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2374 
2375     // Since the sign extended bits are demanded, we know that the sign
2376     // bit is demanded.
2377     InputDemandedBits.setBit(ExVTBits - 1);
2378 
2379     if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2380                              Depth + 1))
2381       return true;
2382 
2383     // If the sign bit of the input is known set or clear, then we know the
2384     // top bits of the result.
2385 
2386     // If the input sign bit is known zero, convert this into a zero extension.
2387     if (Known.Zero[ExVTBits - 1])
2388       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2389 
2390     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2391     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2392       Known.One.setBitsFrom(ExVTBits);
2393       Known.Zero &= Mask;
2394     } else { // Input sign bit unknown
2395       Known.Zero &= Mask;
2396       Known.One &= Mask;
2397     }
2398     break;
2399   }
2400   case ISD::BUILD_PAIR: {
2401     EVT HalfVT = Op.getOperand(0).getValueType();
2402     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2403 
2404     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2405     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2406 
2407     KnownBits KnownLo, KnownHi;
2408 
2409     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2410       return true;
2411 
2412     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2413       return true;
2414 
2415     Known = KnownHi.concat(KnownLo);
2416     break;
2417   }
2418   case ISD::ZERO_EXTEND_VECTOR_INREG:
2419     if (VT.isScalableVector())
2420       return false;
2421     [[fallthrough]];
2422   case ISD::ZERO_EXTEND: {
2423     SDValue Src = Op.getOperand(0);
2424     EVT SrcVT = Src.getValueType();
2425     unsigned InBits = SrcVT.getScalarSizeInBits();
2426     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2427     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2428 
2429     // If none of the top bits are demanded, convert this into an any_extend.
2430     if (DemandedBits.getActiveBits() <= InBits) {
2431       // If we only need the non-extended bits of the bottom element
2432       // then we can just bitcast to the result.
2433       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2434           VT.getSizeInBits() == SrcVT.getSizeInBits())
2435         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2436 
2437       unsigned Opc =
2438           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2439       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2440         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2441     }
2442 
2443     APInt InDemandedBits = DemandedBits.trunc(InBits);
2444     APInt InDemandedElts = DemandedElts.zext(InElts);
2445     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2446                              Depth + 1)) {
2447       Op->dropFlags(SDNodeFlags::NonNeg);
2448       return true;
2449     }
2450     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2451     Known = Known.zext(BitWidth);
2452 
2453     // Attempt to avoid multi-use ops if we don't need anything from them.
2454     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2455             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2456       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2457     break;
2458   }
2459   case ISD::SIGN_EXTEND_VECTOR_INREG:
2460     if (VT.isScalableVector())
2461       return false;
2462     [[fallthrough]];
2463   case ISD::SIGN_EXTEND: {
2464     SDValue Src = Op.getOperand(0);
2465     EVT SrcVT = Src.getValueType();
2466     unsigned InBits = SrcVT.getScalarSizeInBits();
2467     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2468     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2469 
2470     APInt InDemandedElts = DemandedElts.zext(InElts);
2471     APInt InDemandedBits = DemandedBits.trunc(InBits);
2472 
2473     // Since some of the sign extended bits are demanded, we know that the sign
2474     // bit is demanded.
2475     InDemandedBits.setBit(InBits - 1);
2476 
2477     // If none of the top bits are demanded, convert this into an any_extend.
2478     if (DemandedBits.getActiveBits() <= InBits) {
2479       // If we only need the non-extended bits of the bottom element
2480       // then we can just bitcast to the result.
2481       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2482           VT.getSizeInBits() == SrcVT.getSizeInBits())
2483         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2484 
2485       // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2486       if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
2487           TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2488               InBits) {
2489         unsigned Opc =
2490             IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2491         if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2492           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2493       }
2494     }
2495 
2496     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2497                              Depth + 1))
2498       return true;
2499     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2500 
2501     // If the sign bit is known one, the top bits match.
2502     Known = Known.sext(BitWidth);
2503 
2504     // If the sign bit is known zero, convert this to a zero extend.
2505     if (Known.isNonNegative()) {
2506       unsigned Opc =
2507           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2508       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2509         SDNodeFlags Flags;
2510         if (!IsVecInReg)
2511           Flags |= SDNodeFlags::NonNeg;
2512         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2513       }
2514     }
2515 
2516     // Attempt to avoid multi-use ops if we don't need anything from them.
2517     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2518             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2519       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2520     break;
2521   }
2522   case ISD::ANY_EXTEND_VECTOR_INREG:
2523     if (VT.isScalableVector())
2524       return false;
2525     [[fallthrough]];
2526   case ISD::ANY_EXTEND: {
2527     SDValue Src = Op.getOperand(0);
2528     EVT SrcVT = Src.getValueType();
2529     unsigned InBits = SrcVT.getScalarSizeInBits();
2530     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2531     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2532 
2533     // If we only need the bottom element then we can just bitcast.
2534     // TODO: Handle ANY_EXTEND?
2535     if (IsLE && IsVecInReg && DemandedElts == 1 &&
2536         VT.getSizeInBits() == SrcVT.getSizeInBits())
2537       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2538 
2539     APInt InDemandedBits = DemandedBits.trunc(InBits);
2540     APInt InDemandedElts = DemandedElts.zext(InElts);
2541     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2542                              Depth + 1))
2543       return true;
2544     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2545     Known = Known.anyext(BitWidth);
2546 
2547     // Attempt to avoid multi-use ops if we don't need anything from them.
2548     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2549             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2550       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2551     break;
2552   }
2553   case ISD::TRUNCATE: {
2554     SDValue Src = Op.getOperand(0);
2555 
2556     // Simplify the input, using demanded bit information, and compute the known
2557     // zero/one bits live out.
2558     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2559     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2560     if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2561                              Depth + 1))
2562       return true;
2563     Known = Known.trunc(BitWidth);
2564 
2565     // Attempt to avoid multi-use ops if we don't need anything from them.
2566     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2567             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2568       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2569 
2570     // If the input is only used by this truncate, see if we can shrink it based
2571     // on the known demanded bits.
2572     switch (Src.getOpcode()) {
2573     default:
2574       break;
2575     case ISD::SRL:
2576       // Shrink SRL by a constant if none of the high bits shifted in are
2577       // demanded.
2578       if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2579         // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2580         // undesirable.
2581         break;
2582 
2583       if (Src.getNode()->hasOneUse()) {
2584         if (isTruncateFree(Src, VT) &&
2585             !isTruncateFree(Src.getValueType(), VT)) {
2586           // If truncate is only free at trunc(srl), do not turn it into
2587           // srl(trunc). The check is done by first check the truncate is free
2588           // at Src's opcode(srl), then check the truncate is not done by
2589           // referencing sub-register. In test, if both trunc(srl) and
2590           // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2591           // trunc(srl)'s trunc is free, trunc(srl) is better.
2592           break;
2593         }
2594 
2595         std::optional<uint64_t> ShAmtC =
2596             TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2597         if (!ShAmtC || *ShAmtC >= BitWidth)
2598           break;
2599         uint64_t ShVal = *ShAmtC;
2600 
2601         APInt HighBits =
2602             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2603         HighBits.lshrInPlace(ShVal);
2604         HighBits = HighBits.trunc(BitWidth);
2605         if (!(HighBits & DemandedBits)) {
2606           // None of the shifted in bits are needed.  Add a truncate of the
2607           // shift input, then shift it.
2608           SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2609           SDValue NewTrunc =
2610               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2611           return TLO.CombineTo(
2612               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2613         }
2614       }
2615       break;
2616     }
2617 
2618     break;
2619   }
2620   case ISD::AssertZext: {
2621     // AssertZext demands all of the high bits, plus any of the low bits
2622     // demanded by its users.
2623     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2624     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2625     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2626                              TLO, Depth + 1))
2627       return true;
2628 
2629     Known.Zero |= ~InMask;
2630     Known.One &= (~Known.Zero);
2631     break;
2632   }
2633   case ISD::EXTRACT_VECTOR_ELT: {
2634     SDValue Src = Op.getOperand(0);
2635     SDValue Idx = Op.getOperand(1);
2636     ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2637     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2638 
2639     if (SrcEltCnt.isScalable())
2640       return false;
2641 
2642     // Demand the bits from every vector element without a constant index.
2643     unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2644     APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2645     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2646       if (CIdx->getAPIntValue().ult(NumSrcElts))
2647         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2648 
2649     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2650     // anything about the extended bits.
2651     APInt DemandedSrcBits = DemandedBits;
2652     if (BitWidth > EltBitWidth)
2653       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2654 
2655     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2656                              Depth + 1))
2657       return true;
2658 
2659     // Attempt to avoid multi-use ops if we don't need anything from them.
2660     if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2661       if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2662               Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2663         SDValue NewOp =
2664             TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2665         return TLO.CombineTo(Op, NewOp);
2666       }
2667     }
2668 
2669     Known = Known2;
2670     if (BitWidth > EltBitWidth)
2671       Known = Known.anyext(BitWidth);
2672     break;
2673   }
2674   case ISD::BITCAST: {
2675     if (VT.isScalableVector())
2676       return false;
2677     SDValue Src = Op.getOperand(0);
2678     EVT SrcVT = Src.getValueType();
2679     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2680 
2681     // If this is an FP->Int bitcast and if the sign bit is the only
2682     // thing demanded, turn this into a FGETSIGN.
2683     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2684         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2685         SrcVT.isFloatingPoint()) {
2686       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2687       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2688       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2689           SrcVT != MVT::f128) {
2690         // Cannot eliminate/lower SHL for f128 yet.
2691         EVT Ty = OpVTLegal ? VT : MVT::i32;
2692         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2693         // place.  We expect the SHL to be eliminated by other optimizations.
2694         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2695         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2696         if (!OpVTLegal && OpVTSizeInBits > 32)
2697           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2698         unsigned ShVal = Op.getValueSizeInBits() - 1;
2699         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2700         return TLO.CombineTo(Op,
2701                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2702       }
2703     }
2704 
2705     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2706     // Demand the elt/bit if any of the original elts/bits are demanded.
2707     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2708       unsigned Scale = BitWidth / NumSrcEltBits;
2709       unsigned NumSrcElts = SrcVT.getVectorNumElements();
2710       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2711       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2712       for (unsigned i = 0; i != Scale; ++i) {
2713         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2714         unsigned BitOffset = EltOffset * NumSrcEltBits;
2715         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2716         if (!Sub.isZero()) {
2717           DemandedSrcBits |= Sub;
2718           for (unsigned j = 0; j != NumElts; ++j)
2719             if (DemandedElts[j])
2720               DemandedSrcElts.setBit((j * Scale) + i);
2721         }
2722       }
2723 
2724       APInt KnownSrcUndef, KnownSrcZero;
2725       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2726                                      KnownSrcZero, TLO, Depth + 1))
2727         return true;
2728 
2729       KnownBits KnownSrcBits;
2730       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2731                                KnownSrcBits, TLO, Depth + 1))
2732         return true;
2733     } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2734       // TODO - bigendian once we have test coverage.
2735       unsigned Scale = NumSrcEltBits / BitWidth;
2736       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2737       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2738       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2739       for (unsigned i = 0; i != NumElts; ++i)
2740         if (DemandedElts[i]) {
2741           unsigned Offset = (i % Scale) * BitWidth;
2742           DemandedSrcBits.insertBits(DemandedBits, Offset);
2743           DemandedSrcElts.setBit(i / Scale);
2744         }
2745 
2746       if (SrcVT.isVector()) {
2747         APInt KnownSrcUndef, KnownSrcZero;
2748         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2749                                        KnownSrcZero, TLO, Depth + 1))
2750           return true;
2751       }
2752 
2753       KnownBits KnownSrcBits;
2754       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2755                                KnownSrcBits, TLO, Depth + 1))
2756         return true;
2757 
2758       // Attempt to avoid multi-use ops if we don't need anything from them.
2759       if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2760         if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2761                 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2762           SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2763           return TLO.CombineTo(Op, NewOp);
2764         }
2765       }
2766     }
2767 
2768     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2769     // recursive call where Known may be useful to the caller.
2770     if (Depth > 0) {
2771       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2772       return false;
2773     }
2774     break;
2775   }
2776   case ISD::MUL:
2777     if (DemandedBits.isPowerOf2()) {
2778       // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2779       // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2780       // odd (has LSB set), then the left-shifted low bit of X is the answer.
2781       unsigned CTZ = DemandedBits.countr_zero();
2782       ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2783       if (C && C->getAPIntValue().countr_zero() == CTZ) {
2784         SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2785         SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2786         return TLO.CombineTo(Op, Shl);
2787       }
2788     }
2789     // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2790     // X * X is odd iff X is odd.
2791     // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2792     if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2793       SDValue One = TLO.DAG.getConstant(1, dl, VT);
2794       SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2795       return TLO.CombineTo(Op, And1);
2796     }
2797     [[fallthrough]];
2798   case ISD::ADD:
2799   case ISD::SUB: {
2800     // Add, Sub, and Mul don't demand any bits in positions beyond that
2801     // of the highest bit demanded of them.
2802     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2803     SDNodeFlags Flags = Op.getNode()->getFlags();
2804     unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2805     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2806     KnownBits KnownOp0, KnownOp1;
2807     auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2808                                       const KnownBits &KnownRHS) {
2809       if (Op.getOpcode() == ISD::MUL)
2810         Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2811       return Demanded;
2812     };
2813     if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2814                              Depth + 1) ||
2815         SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2816                              DemandedElts, KnownOp0, TLO, Depth + 1) ||
2817         // See if the operation should be performed at a smaller bit width.
2818         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2819       // Disable the nsw and nuw flags. We can no longer guarantee that we
2820       // won't wrap after simplification.
2821       Op->dropFlags(SDNodeFlags::NoWrap);
2822       return true;
2823     }
2824 
2825     // neg x with only low bit demanded is simply x.
2826     if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2827         isNullConstant(Op0))
2828       return TLO.CombineTo(Op, Op1);
2829 
2830     // Attempt to avoid multi-use ops if we don't need anything from them.
2831     if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2832       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2833           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2834       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2835           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2836       if (DemandedOp0 || DemandedOp1) {
2837         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2838         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2839         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2840                                         Flags & ~SDNodeFlags::NoWrap);
2841         return TLO.CombineTo(Op, NewOp);
2842       }
2843     }
2844 
2845     // If we have a constant operand, we may be able to turn it into -1 if we
2846     // do not demand the high bits. This can make the constant smaller to
2847     // encode, allow more general folding, or match specialized instruction
2848     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2849     // is probably not useful (and could be detrimental).
2850     ConstantSDNode *C = isConstOrConstSplat(Op1);
2851     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2852     if (C && !C->isAllOnes() && !C->isOne() &&
2853         (C->getAPIntValue() | HighMask).isAllOnes()) {
2854       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2855       // Disable the nsw and nuw flags. We can no longer guarantee that we
2856       // won't wrap after simplification.
2857       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2858                                       Flags & ~SDNodeFlags::NoWrap);
2859       return TLO.CombineTo(Op, NewOp);
2860     }
2861 
2862     // Match a multiply with a disguised negated-power-of-2 and convert to a
2863     // an equivalent shift-left amount.
2864     // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2865     auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2866       if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2867         return 0;
2868 
2869       // Don't touch opaque constants. Also, ignore zero and power-of-2
2870       // multiplies. Those will get folded later.
2871       ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2872       if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2873           !MulC->getAPIntValue().isPowerOf2()) {
2874         APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2875         if (UnmaskedC.isNegatedPowerOf2())
2876           return (-UnmaskedC).logBase2();
2877       }
2878       return 0;
2879     };
2880 
2881     auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2882                        unsigned ShlAmt) {
2883       SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2884       SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2885       SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2886       return TLO.CombineTo(Op, Res);
2887     };
2888 
2889     if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2890       if (Op.getOpcode() == ISD::ADD) {
2891         // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2892         if (unsigned ShAmt = getShiftLeftAmt(Op0))
2893           return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2894         // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2895         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2896           return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2897       }
2898       if (Op.getOpcode() == ISD::SUB) {
2899         // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2900         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2901           return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2902       }
2903     }
2904 
2905     if (Op.getOpcode() == ISD::MUL) {
2906       Known = KnownBits::mul(KnownOp0, KnownOp1);
2907     } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2908       Known = KnownBits::computeForAddSub(
2909           Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2910           Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2911     }
2912     break;
2913   }
2914   default:
2915     // We also ask the target about intrinsics (which could be specific to it).
2916     if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2917         Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2918       // TODO: Probably okay to remove after audit; here to reduce change size
2919       // in initial enablement patch for scalable vectors
2920       if (Op.getValueType().isScalableVector())
2921         break;
2922       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2923                                             Known, TLO, Depth))
2924         return true;
2925       break;
2926     }
2927 
2928     // Just use computeKnownBits to compute output bits.
2929     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2930     break;
2931   }
2932 
2933   // If we know the value of all of the demanded bits, return this as a
2934   // constant.
2935   if (!isTargetCanonicalConstantNode(Op) &&
2936       DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2937     // Avoid folding to a constant if any OpaqueConstant is involved.
2938     const SDNode *N = Op.getNode();
2939     for (SDNode *Op :
2940          llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
2941       if (auto *C = dyn_cast<ConstantSDNode>(Op))
2942         if (C->isOpaque())
2943           return false;
2944     }
2945     if (VT.isInteger())
2946       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2947     if (VT.isFloatingPoint())
2948       return TLO.CombineTo(
2949           Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
2950                                     dl, VT));
2951   }
2952 
2953   // A multi use 'all demanded elts' simplify failed to find any knownbits.
2954   // Try again just for the original demanded elts.
2955   // Ensure we do this AFTER constant folding above.
2956   if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2957     Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2958 
2959   return false;
2960 }
2961 
2962 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2963                                                 const APInt &DemandedElts,
2964                                                 DAGCombinerInfo &DCI) const {
2965   SelectionDAG &DAG = DCI.DAG;
2966   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2967                         !DCI.isBeforeLegalizeOps());
2968 
2969   APInt KnownUndef, KnownZero;
2970   bool Simplified =
2971       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2972   if (Simplified) {
2973     DCI.AddToWorklist(Op.getNode());
2974     DCI.CommitTargetLoweringOpt(TLO);
2975   }
2976 
2977   return Simplified;
2978 }
2979 
2980 /// Given a vector binary operation and known undefined elements for each input
2981 /// operand, compute whether each element of the output is undefined.
2982 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2983                                          const APInt &UndefOp0,
2984                                          const APInt &UndefOp1) {
2985   EVT VT = BO.getValueType();
2986   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2987          "Vector binop only");
2988 
2989   EVT EltVT = VT.getVectorElementType();
2990   unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2991   assert(UndefOp0.getBitWidth() == NumElts &&
2992          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2993 
2994   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2995                                    const APInt &UndefVals) {
2996     if (UndefVals[Index])
2997       return DAG.getUNDEF(EltVT);
2998 
2999     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3000       // Try hard to make sure that the getNode() call is not creating temporary
3001       // nodes. Ignore opaque integers because they do not constant fold.
3002       SDValue Elt = BV->getOperand(Index);
3003       auto *C = dyn_cast<ConstantSDNode>(Elt);
3004       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3005         return Elt;
3006     }
3007 
3008     return SDValue();
3009   };
3010 
3011   APInt KnownUndef = APInt::getZero(NumElts);
3012   for (unsigned i = 0; i != NumElts; ++i) {
3013     // If both inputs for this element are either constant or undef and match
3014     // the element type, compute the constant/undef result for this element of
3015     // the vector.
3016     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3017     // not handle FP constants. The code within getNode() should be refactored
3018     // to avoid the danger of creating a bogus temporary node here.
3019     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3020     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3021     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3022       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3023         KnownUndef.setBit(i);
3024   }
3025   return KnownUndef;
3026 }
3027 
3028 bool TargetLowering::SimplifyDemandedVectorElts(
3029     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3030     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3031     bool AssumeSingleUse) const {
3032   EVT VT = Op.getValueType();
3033   unsigned Opcode = Op.getOpcode();
3034   APInt DemandedElts = OriginalDemandedElts;
3035   unsigned NumElts = DemandedElts.getBitWidth();
3036   assert(VT.isVector() && "Expected vector op");
3037 
3038   KnownUndef = KnownZero = APInt::getZero(NumElts);
3039 
3040   if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3041     return false;
3042 
3043   // TODO: For now we assume we know nothing about scalable vectors.
3044   if (VT.isScalableVector())
3045     return false;
3046 
3047   assert(VT.getVectorNumElements() == NumElts &&
3048          "Mask size mismatches value type element count!");
3049 
3050   // Undef operand.
3051   if (Op.isUndef()) {
3052     KnownUndef.setAllBits();
3053     return false;
3054   }
3055 
3056   // If Op has other users, assume that all elements are needed.
3057   if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3058     DemandedElts.setAllBits();
3059 
3060   // Not demanding any elements from Op.
3061   if (DemandedElts == 0) {
3062     KnownUndef.setAllBits();
3063     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3064   }
3065 
3066   // Limit search depth.
3067   if (Depth >= SelectionDAG::MaxRecursionDepth)
3068     return false;
3069 
3070   SDLoc DL(Op);
3071   unsigned EltSizeInBits = VT.getScalarSizeInBits();
3072   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3073 
3074   // Helper for demanding the specified elements and all the bits of both binary
3075   // operands.
3076   auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3077     SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3078                                                            TLO.DAG, Depth + 1);
3079     SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3080                                                            TLO.DAG, Depth + 1);
3081     if (NewOp0 || NewOp1) {
3082       SDValue NewOp =
3083           TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3084                           NewOp1 ? NewOp1 : Op1, Op->getFlags());
3085       return TLO.CombineTo(Op, NewOp);
3086     }
3087     return false;
3088   };
3089 
3090   switch (Opcode) {
3091   case ISD::SCALAR_TO_VECTOR: {
3092     if (!DemandedElts[0]) {
3093       KnownUndef.setAllBits();
3094       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3095     }
3096     SDValue ScalarSrc = Op.getOperand(0);
3097     if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3098       SDValue Src = ScalarSrc.getOperand(0);
3099       SDValue Idx = ScalarSrc.getOperand(1);
3100       EVT SrcVT = Src.getValueType();
3101 
3102       ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3103 
3104       if (SrcEltCnt.isScalable())
3105         return false;
3106 
3107       unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3108       if (isNullConstant(Idx)) {
3109         APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3110         APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3111         APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3112         if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3113                                        TLO, Depth + 1))
3114           return true;
3115       }
3116     }
3117     KnownUndef.setHighBits(NumElts - 1);
3118     break;
3119   }
3120   case ISD::BITCAST: {
3121     SDValue Src = Op.getOperand(0);
3122     EVT SrcVT = Src.getValueType();
3123 
3124     // We only handle vectors here.
3125     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3126     if (!SrcVT.isVector())
3127       break;
3128 
3129     // Fast handling of 'identity' bitcasts.
3130     unsigned NumSrcElts = SrcVT.getVectorNumElements();
3131     if (NumSrcElts == NumElts)
3132       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3133                                         KnownZero, TLO, Depth + 1);
3134 
3135     APInt SrcDemandedElts, SrcZero, SrcUndef;
3136 
3137     // Bitcast from 'large element' src vector to 'small element' vector, we
3138     // must demand a source element if any DemandedElt maps to it.
3139     if ((NumElts % NumSrcElts) == 0) {
3140       unsigned Scale = NumElts / NumSrcElts;
3141       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3142       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3143                                      TLO, Depth + 1))
3144         return true;
3145 
3146       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3147       // of the large element.
3148       // TODO - bigendian once we have test coverage.
3149       if (IsLE) {
3150         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3151         APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3152         for (unsigned i = 0; i != NumElts; ++i)
3153           if (DemandedElts[i]) {
3154             unsigned Ofs = (i % Scale) * EltSizeInBits;
3155             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3156           }
3157 
3158         KnownBits Known;
3159         if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3160                                  TLO, Depth + 1))
3161           return true;
3162 
3163         // The bitcast has split each wide element into a number of
3164         // narrow subelements. We have just computed the Known bits
3165         // for wide elements. See if element splitting results in
3166         // some subelements being zero. Only for demanded elements!
3167         for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3168           if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3169                    .isAllOnes())
3170             continue;
3171           for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3172             unsigned Elt = Scale * SrcElt + SubElt;
3173             if (DemandedElts[Elt])
3174               KnownZero.setBit(Elt);
3175           }
3176         }
3177       }
3178 
3179       // If the src element is zero/undef then all the output elements will be -
3180       // only demanded elements are guaranteed to be correct.
3181       for (unsigned i = 0; i != NumSrcElts; ++i) {
3182         if (SrcDemandedElts[i]) {
3183           if (SrcZero[i])
3184             KnownZero.setBits(i * Scale, (i + 1) * Scale);
3185           if (SrcUndef[i])
3186             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3187         }
3188       }
3189     }
3190 
3191     // Bitcast from 'small element' src vector to 'large element' vector, we
3192     // demand all smaller source elements covered by the larger demanded element
3193     // of this vector.
3194     if ((NumSrcElts % NumElts) == 0) {
3195       unsigned Scale = NumSrcElts / NumElts;
3196       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3197       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3198                                      TLO, Depth + 1))
3199         return true;
3200 
3201       // If all the src elements covering an output element are zero/undef, then
3202       // the output element will be as well, assuming it was demanded.
3203       for (unsigned i = 0; i != NumElts; ++i) {
3204         if (DemandedElts[i]) {
3205           if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3206             KnownZero.setBit(i);
3207           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3208             KnownUndef.setBit(i);
3209         }
3210       }
3211     }
3212     break;
3213   }
3214   case ISD::FREEZE: {
3215     SDValue N0 = Op.getOperand(0);
3216     if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3217                                                  /*PoisonOnly=*/false))
3218       return TLO.CombineTo(Op, N0);
3219 
3220     // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3221     // freeze(op(x, ...)) -> op(freeze(x), ...).
3222     if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3223       return TLO.CombineTo(
3224           Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
3225                               TLO.DAG.getFreeze(N0.getOperand(0))));
3226     break;
3227   }
3228   case ISD::BUILD_VECTOR: {
3229     // Check all elements and simplify any unused elements with UNDEF.
3230     if (!DemandedElts.isAllOnes()) {
3231       // Don't simplify BROADCASTS.
3232       if (llvm::any_of(Op->op_values(),
3233                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3234         SmallVector<SDValue, 32> Ops(Op->ops());
3235         bool Updated = false;
3236         for (unsigned i = 0; i != NumElts; ++i) {
3237           if (!DemandedElts[i] && !Ops[i].isUndef()) {
3238             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3239             KnownUndef.setBit(i);
3240             Updated = true;
3241           }
3242         }
3243         if (Updated)
3244           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3245       }
3246     }
3247     for (unsigned i = 0; i != NumElts; ++i) {
3248       SDValue SrcOp = Op.getOperand(i);
3249       if (SrcOp.isUndef()) {
3250         KnownUndef.setBit(i);
3251       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3252                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
3253         KnownZero.setBit(i);
3254       }
3255     }
3256     break;
3257   }
3258   case ISD::CONCAT_VECTORS: {
3259     EVT SubVT = Op.getOperand(0).getValueType();
3260     unsigned NumSubVecs = Op.getNumOperands();
3261     unsigned NumSubElts = SubVT.getVectorNumElements();
3262     for (unsigned i = 0; i != NumSubVecs; ++i) {
3263       SDValue SubOp = Op.getOperand(i);
3264       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3265       APInt SubUndef, SubZero;
3266       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3267                                      Depth + 1))
3268         return true;
3269       KnownUndef.insertBits(SubUndef, i * NumSubElts);
3270       KnownZero.insertBits(SubZero, i * NumSubElts);
3271     }
3272 
3273     // Attempt to avoid multi-use ops if we don't need anything from them.
3274     if (!DemandedElts.isAllOnes()) {
3275       bool FoundNewSub = false;
3276       SmallVector<SDValue, 2> DemandedSubOps;
3277       for (unsigned i = 0; i != NumSubVecs; ++i) {
3278         SDValue SubOp = Op.getOperand(i);
3279         APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3280         SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3281             SubOp, SubElts, TLO.DAG, Depth + 1);
3282         DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3283         FoundNewSub = NewSubOp ? true : FoundNewSub;
3284       }
3285       if (FoundNewSub) {
3286         SDValue NewOp =
3287             TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3288         return TLO.CombineTo(Op, NewOp);
3289       }
3290     }
3291     break;
3292   }
3293   case ISD::INSERT_SUBVECTOR: {
3294     // Demand any elements from the subvector and the remainder from the src its
3295     // inserted into.
3296     SDValue Src = Op.getOperand(0);
3297     SDValue Sub = Op.getOperand(1);
3298     uint64_t Idx = Op.getConstantOperandVal(2);
3299     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3300     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3301     APInt DemandedSrcElts = DemandedElts;
3302     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3303 
3304     APInt SubUndef, SubZero;
3305     if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3306                                    Depth + 1))
3307       return true;
3308 
3309     // If none of the src operand elements are demanded, replace it with undef.
3310     if (!DemandedSrcElts && !Src.isUndef())
3311       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3312                                                TLO.DAG.getUNDEF(VT), Sub,
3313                                                Op.getOperand(2)));
3314 
3315     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3316                                    TLO, Depth + 1))
3317       return true;
3318     KnownUndef.insertBits(SubUndef, Idx);
3319     KnownZero.insertBits(SubZero, Idx);
3320 
3321     // Attempt to avoid multi-use ops if we don't need anything from them.
3322     if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3323       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3324           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3325       SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3326           Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3327       if (NewSrc || NewSub) {
3328         NewSrc = NewSrc ? NewSrc : Src;
3329         NewSub = NewSub ? NewSub : Sub;
3330         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3331                                         NewSub, Op.getOperand(2));
3332         return TLO.CombineTo(Op, NewOp);
3333       }
3334     }
3335     break;
3336   }
3337   case ISD::EXTRACT_SUBVECTOR: {
3338     // Offset the demanded elts by the subvector index.
3339     SDValue Src = Op.getOperand(0);
3340     if (Src.getValueType().isScalableVector())
3341       break;
3342     uint64_t Idx = Op.getConstantOperandVal(1);
3343     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3344     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3345 
3346     APInt SrcUndef, SrcZero;
3347     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3348                                    Depth + 1))
3349       return true;
3350     KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3351     KnownZero = SrcZero.extractBits(NumElts, Idx);
3352 
3353     // Attempt to avoid multi-use ops if we don't need anything from them.
3354     if (!DemandedElts.isAllOnes()) {
3355       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3356           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3357       if (NewSrc) {
3358         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3359                                         Op.getOperand(1));
3360         return TLO.CombineTo(Op, NewOp);
3361       }
3362     }
3363     break;
3364   }
3365   case ISD::INSERT_VECTOR_ELT: {
3366     SDValue Vec = Op.getOperand(0);
3367     SDValue Scl = Op.getOperand(1);
3368     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3369 
3370     // For a legal, constant insertion index, if we don't need this insertion
3371     // then strip it, else remove it from the demanded elts.
3372     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3373       unsigned Idx = CIdx->getZExtValue();
3374       if (!DemandedElts[Idx])
3375         return TLO.CombineTo(Op, Vec);
3376 
3377       APInt DemandedVecElts(DemandedElts);
3378       DemandedVecElts.clearBit(Idx);
3379       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3380                                      KnownZero, TLO, Depth + 1))
3381         return true;
3382 
3383       KnownUndef.setBitVal(Idx, Scl.isUndef());
3384 
3385       KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3386       break;
3387     }
3388 
3389     APInt VecUndef, VecZero;
3390     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3391                                    Depth + 1))
3392       return true;
3393     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3394     break;
3395   }
3396   case ISD::VSELECT: {
3397     SDValue Sel = Op.getOperand(0);
3398     SDValue LHS = Op.getOperand(1);
3399     SDValue RHS = Op.getOperand(2);
3400 
3401     // Try to transform the select condition based on the current demanded
3402     // elements.
3403     APInt UndefSel, ZeroSel;
3404     if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3405                                    Depth + 1))
3406       return true;
3407 
3408     // See if we can simplify either vselect operand.
3409     APInt DemandedLHS(DemandedElts);
3410     APInt DemandedRHS(DemandedElts);
3411     APInt UndefLHS, ZeroLHS;
3412     APInt UndefRHS, ZeroRHS;
3413     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3414                                    Depth + 1))
3415       return true;
3416     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3417                                    Depth + 1))
3418       return true;
3419 
3420     KnownUndef = UndefLHS & UndefRHS;
3421     KnownZero = ZeroLHS & ZeroRHS;
3422 
3423     // If we know that the selected element is always zero, we don't need the
3424     // select value element.
3425     APInt DemandedSel = DemandedElts & ~KnownZero;
3426     if (DemandedSel != DemandedElts)
3427       if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3428                                      Depth + 1))
3429         return true;
3430 
3431     break;
3432   }
3433   case ISD::VECTOR_SHUFFLE: {
3434     SDValue LHS = Op.getOperand(0);
3435     SDValue RHS = Op.getOperand(1);
3436     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3437 
3438     // Collect demanded elements from shuffle operands..
3439     APInt DemandedLHS(NumElts, 0);
3440     APInt DemandedRHS(NumElts, 0);
3441     for (unsigned i = 0; i != NumElts; ++i) {
3442       int M = ShuffleMask[i];
3443       if (M < 0 || !DemandedElts[i])
3444         continue;
3445       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3446       if (M < (int)NumElts)
3447         DemandedLHS.setBit(M);
3448       else
3449         DemandedRHS.setBit(M - NumElts);
3450     }
3451 
3452     // See if we can simplify either shuffle operand.
3453     APInt UndefLHS, ZeroLHS;
3454     APInt UndefRHS, ZeroRHS;
3455     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3456                                    Depth + 1))
3457       return true;
3458     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3459                                    Depth + 1))
3460       return true;
3461 
3462     // Simplify mask using undef elements from LHS/RHS.
3463     bool Updated = false;
3464     bool IdentityLHS = true, IdentityRHS = true;
3465     SmallVector<int, 32> NewMask(ShuffleMask);
3466     for (unsigned i = 0; i != NumElts; ++i) {
3467       int &M = NewMask[i];
3468       if (M < 0)
3469         continue;
3470       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3471           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3472         Updated = true;
3473         M = -1;
3474       }
3475       IdentityLHS &= (M < 0) || (M == (int)i);
3476       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3477     }
3478 
3479     // Update legal shuffle masks based on demanded elements if it won't reduce
3480     // to Identity which can cause premature removal of the shuffle mask.
3481     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3482       SDValue LegalShuffle =
3483           buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3484       if (LegalShuffle)
3485         return TLO.CombineTo(Op, LegalShuffle);
3486     }
3487 
3488     // Propagate undef/zero elements from LHS/RHS.
3489     for (unsigned i = 0; i != NumElts; ++i) {
3490       int M = ShuffleMask[i];
3491       if (M < 0) {
3492         KnownUndef.setBit(i);
3493       } else if (M < (int)NumElts) {
3494         if (UndefLHS[M])
3495           KnownUndef.setBit(i);
3496         if (ZeroLHS[M])
3497           KnownZero.setBit(i);
3498       } else {
3499         if (UndefRHS[M - NumElts])
3500           KnownUndef.setBit(i);
3501         if (ZeroRHS[M - NumElts])
3502           KnownZero.setBit(i);
3503       }
3504     }
3505     break;
3506   }
3507   case ISD::ANY_EXTEND_VECTOR_INREG:
3508   case ISD::SIGN_EXTEND_VECTOR_INREG:
3509   case ISD::ZERO_EXTEND_VECTOR_INREG: {
3510     APInt SrcUndef, SrcZero;
3511     SDValue Src = Op.getOperand(0);
3512     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3513     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3514     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3515                                    Depth + 1))
3516       return true;
3517     KnownZero = SrcZero.zextOrTrunc(NumElts);
3518     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3519 
3520     if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3521         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3522         DemandedSrcElts == 1) {
3523       // aext - if we just need the bottom element then we can bitcast.
3524       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3525     }
3526 
3527     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3528       // zext(undef) upper bits are guaranteed to be zero.
3529       if (DemandedElts.isSubsetOf(KnownUndef))
3530         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3531       KnownUndef.clearAllBits();
3532 
3533       // zext - if we just need the bottom element then we can mask:
3534       // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3535       if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3536           Op->isOnlyUserOf(Src.getNode()) &&
3537           Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3538         SDLoc DL(Op);
3539         EVT SrcVT = Src.getValueType();
3540         EVT SrcSVT = SrcVT.getScalarType();
3541         SmallVector<SDValue> MaskElts;
3542         MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3543         MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3544         SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3545         if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3546                 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3547           Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3548           return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3549         }
3550       }
3551     }
3552     break;
3553   }
3554 
3555   // TODO: There are more binop opcodes that could be handled here - MIN,
3556   // MAX, saturated math, etc.
3557   case ISD::ADD: {
3558     SDValue Op0 = Op.getOperand(0);
3559     SDValue Op1 = Op.getOperand(1);
3560     if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3561       APInt UndefLHS, ZeroLHS;
3562       if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3563                                      Depth + 1, /*AssumeSingleUse*/ true))
3564         return true;
3565     }
3566     [[fallthrough]];
3567   }
3568   case ISD::AVGCEILS:
3569   case ISD::AVGCEILU:
3570   case ISD::AVGFLOORS:
3571   case ISD::AVGFLOORU:
3572   case ISD::OR:
3573   case ISD::XOR:
3574   case ISD::SUB:
3575   case ISD::FADD:
3576   case ISD::FSUB:
3577   case ISD::FMUL:
3578   case ISD::FDIV:
3579   case ISD::FREM: {
3580     SDValue Op0 = Op.getOperand(0);
3581     SDValue Op1 = Op.getOperand(1);
3582 
3583     APInt UndefRHS, ZeroRHS;
3584     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3585                                    Depth + 1))
3586       return true;
3587     APInt UndefLHS, ZeroLHS;
3588     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3589                                    Depth + 1))
3590       return true;
3591 
3592     KnownZero = ZeroLHS & ZeroRHS;
3593     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3594 
3595     // Attempt to avoid multi-use ops if we don't need anything from them.
3596     // TODO - use KnownUndef to relax the demandedelts?
3597     if (!DemandedElts.isAllOnes())
3598       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3599         return true;
3600     break;
3601   }
3602   case ISD::SHL:
3603   case ISD::SRL:
3604   case ISD::SRA:
3605   case ISD::ROTL:
3606   case ISD::ROTR: {
3607     SDValue Op0 = Op.getOperand(0);
3608     SDValue Op1 = Op.getOperand(1);
3609 
3610     APInt UndefRHS, ZeroRHS;
3611     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3612                                    Depth + 1))
3613       return true;
3614     APInt UndefLHS, ZeroLHS;
3615     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3616                                    Depth + 1))
3617       return true;
3618 
3619     KnownZero = ZeroLHS;
3620     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3621 
3622     // Attempt to avoid multi-use ops if we don't need anything from them.
3623     // TODO - use KnownUndef to relax the demandedelts?
3624     if (!DemandedElts.isAllOnes())
3625       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3626         return true;
3627     break;
3628   }
3629   case ISD::MUL:
3630   case ISD::MULHU:
3631   case ISD::MULHS:
3632   case ISD::AND: {
3633     SDValue Op0 = Op.getOperand(0);
3634     SDValue Op1 = Op.getOperand(1);
3635 
3636     APInt SrcUndef, SrcZero;
3637     if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3638                                    Depth + 1))
3639       return true;
3640     // If we know that a demanded element was zero in Op1 we don't need to
3641     // demand it in Op0 - its guaranteed to be zero.
3642     APInt DemandedElts0 = DemandedElts & ~SrcZero;
3643     if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3644                                    TLO, Depth + 1))
3645       return true;
3646 
3647     KnownUndef &= DemandedElts0;
3648     KnownZero &= DemandedElts0;
3649 
3650     // If every element pair has a zero/undef then just fold to zero.
3651     // fold (and x, undef) -> 0  /  (and x, 0) -> 0
3652     // fold (mul x, undef) -> 0  /  (mul x, 0) -> 0
3653     if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3654       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3655 
3656     // If either side has a zero element, then the result element is zero, even
3657     // if the other is an UNDEF.
3658     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3659     // and then handle 'and' nodes with the rest of the binop opcodes.
3660     KnownZero |= SrcZero;
3661     KnownUndef &= SrcUndef;
3662     KnownUndef &= ~KnownZero;
3663 
3664     // Attempt to avoid multi-use ops if we don't need anything from them.
3665     if (!DemandedElts.isAllOnes())
3666       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3667         return true;
3668     break;
3669   }
3670   case ISD::TRUNCATE:
3671   case ISD::SIGN_EXTEND:
3672   case ISD::ZERO_EXTEND:
3673     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3674                                    KnownZero, TLO, Depth + 1))
3675       return true;
3676 
3677     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3678       // zext(undef) upper bits are guaranteed to be zero.
3679       if (DemandedElts.isSubsetOf(KnownUndef))
3680         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3681       KnownUndef.clearAllBits();
3682     }
3683     break;
3684   default: {
3685     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3686       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3687                                                   KnownZero, TLO, Depth))
3688         return true;
3689     } else {
3690       KnownBits Known;
3691       APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3692       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3693                                TLO, Depth, AssumeSingleUse))
3694         return true;
3695     }
3696     break;
3697   }
3698   }
3699   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3700 
3701   // Constant fold all undef cases.
3702   // TODO: Handle zero cases as well.
3703   if (DemandedElts.isSubsetOf(KnownUndef))
3704     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3705 
3706   return false;
3707 }
3708 
3709 /// Determine which of the bits specified in Mask are known to be either zero or
3710 /// one and return them in the Known.
3711 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3712                                                    KnownBits &Known,
3713                                                    const APInt &DemandedElts,
3714                                                    const SelectionDAG &DAG,
3715                                                    unsigned Depth) const {
3716   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3717           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3718           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3719           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3720          "Should use MaskedValueIsZero if you don't know whether Op"
3721          " is a target node!");
3722   Known.resetAll();
3723 }
3724 
3725 void TargetLowering::computeKnownBitsForTargetInstr(
3726     GISelKnownBits &Analysis, Register R, KnownBits &Known,
3727     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3728     unsigned Depth) const {
3729   Known.resetAll();
3730 }
3731 
3732 void TargetLowering::computeKnownBitsForFrameIndex(
3733   const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3734   // The low bits are known zero if the pointer is aligned.
3735   Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3736 }
3737 
3738 Align TargetLowering::computeKnownAlignForTargetInstr(
3739   GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3740   unsigned Depth) const {
3741   return Align(1);
3742 }
3743 
3744 /// This method can be implemented by targets that want to expose additional
3745 /// information about sign bits to the DAG Combiner.
3746 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3747                                                          const APInt &,
3748                                                          const SelectionDAG &,
3749                                                          unsigned Depth) const {
3750   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3751           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3752           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3753           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3754          "Should use ComputeNumSignBits if you don't know whether Op"
3755          " is a target node!");
3756   return 1;
3757 }
3758 
3759 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3760   GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3761   const MachineRegisterInfo &MRI, unsigned Depth) const {
3762   return 1;
3763 }
3764 
3765 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3766     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3767     TargetLoweringOpt &TLO, unsigned Depth) const {
3768   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3769           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3770           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3771           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3772          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3773          " is a target node!");
3774   return false;
3775 }
3776 
3777 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3778     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3779     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3780   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3781           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3782           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3783           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3784          "Should use SimplifyDemandedBits if you don't know whether Op"
3785          " is a target node!");
3786   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3787   return false;
3788 }
3789 
3790 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3791     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3792     SelectionDAG &DAG, unsigned Depth) const {
3793   assert(
3794       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3795        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3796        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3797        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3798       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3799       " is a target node!");
3800   return SDValue();
3801 }
3802 
3803 SDValue
3804 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3805                                         SDValue N1, MutableArrayRef<int> Mask,
3806                                         SelectionDAG &DAG) const {
3807   bool LegalMask = isShuffleMaskLegal(Mask, VT);
3808   if (!LegalMask) {
3809     std::swap(N0, N1);
3810     ShuffleVectorSDNode::commuteMask(Mask);
3811     LegalMask = isShuffleMaskLegal(Mask, VT);
3812   }
3813 
3814   if (!LegalMask)
3815     return SDValue();
3816 
3817   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3818 }
3819 
3820 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3821   return nullptr;
3822 }
3823 
3824 bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3825     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3826     bool PoisonOnly, unsigned Depth) const {
3827   assert(
3828       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3829        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3830        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3831        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3832       "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3833       " is a target node!");
3834 
3835   // If Op can't create undef/poison and none of its operands are undef/poison
3836   // then Op is never undef/poison.
3837   return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3838                                               /*ConsiderFlags*/ true, Depth) &&
3839          all_of(Op->ops(), [&](SDValue V) {
3840            return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3841                                                        Depth + 1);
3842          });
3843 }
3844 
3845 bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3846     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3847     bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3848   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3849           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3850           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3851           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3852          "Should use canCreateUndefOrPoison if you don't know whether Op"
3853          " is a target node!");
3854   // Be conservative and return true.
3855   return true;
3856 }
3857 
3858 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3859                                                   const SelectionDAG &DAG,
3860                                                   bool SNaN,
3861                                                   unsigned Depth) const {
3862   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3863           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3864           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3865           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3866          "Should use isKnownNeverNaN if you don't know whether Op"
3867          " is a target node!");
3868   return false;
3869 }
3870 
3871 bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3872                                                const APInt &DemandedElts,
3873                                                APInt &UndefElts,
3874                                                const SelectionDAG &DAG,
3875                                                unsigned Depth) const {
3876   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3877           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3878           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3879           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3880          "Should use isSplatValue if you don't know whether Op"
3881          " is a target node!");
3882   return false;
3883 }
3884 
3885 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3886 // work with truncating build vectors and vectors with elements of less than
3887 // 8 bits.
3888 bool TargetLowering::isConstTrueVal(SDValue N) const {
3889   if (!N)
3890     return false;
3891 
3892   unsigned EltWidth;
3893   APInt CVal;
3894   if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3895                                                /*AllowTruncation=*/true)) {
3896     CVal = CN->getAPIntValue();
3897     EltWidth = N.getValueType().getScalarSizeInBits();
3898   } else
3899     return false;
3900 
3901   // If this is a truncating splat, truncate the splat value.
3902   // Otherwise, we may fail to match the expected values below.
3903   if (EltWidth < CVal.getBitWidth())
3904     CVal = CVal.trunc(EltWidth);
3905 
3906   switch (getBooleanContents(N.getValueType())) {
3907   case UndefinedBooleanContent:
3908     return CVal[0];
3909   case ZeroOrOneBooleanContent:
3910     return CVal.isOne();
3911   case ZeroOrNegativeOneBooleanContent:
3912     return CVal.isAllOnes();
3913   }
3914 
3915   llvm_unreachable("Invalid boolean contents");
3916 }
3917 
3918 bool TargetLowering::isConstFalseVal(SDValue N) const {
3919   if (!N)
3920     return false;
3921 
3922   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3923   if (!CN) {
3924     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3925     if (!BV)
3926       return false;
3927 
3928     // Only interested in constant splats, we don't care about undef
3929     // elements in identifying boolean constants and getConstantSplatNode
3930     // returns NULL if all ops are undef;
3931     CN = BV->getConstantSplatNode();
3932     if (!CN)
3933       return false;
3934   }
3935 
3936   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3937     return !CN->getAPIntValue()[0];
3938 
3939   return CN->isZero();
3940 }
3941 
3942 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3943                                        bool SExt) const {
3944   if (VT == MVT::i1)
3945     return N->isOne();
3946 
3947   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3948   switch (Cnt) {
3949   case TargetLowering::ZeroOrOneBooleanContent:
3950     // An extended value of 1 is always true, unless its original type is i1,
3951     // in which case it will be sign extended to -1.
3952     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3953   case TargetLowering::UndefinedBooleanContent:
3954   case TargetLowering::ZeroOrNegativeOneBooleanContent:
3955     return N->isAllOnes() && SExt;
3956   }
3957   llvm_unreachable("Unexpected enumeration.");
3958 }
3959 
3960 /// This helper function of SimplifySetCC tries to optimize the comparison when
3961 /// either operand of the SetCC node is a bitwise-and instruction.
3962 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3963                                          ISD::CondCode Cond, const SDLoc &DL,
3964                                          DAGCombinerInfo &DCI) const {
3965   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3966     std::swap(N0, N1);
3967 
3968   SelectionDAG &DAG = DCI.DAG;
3969   EVT OpVT = N0.getValueType();
3970   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3971       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3972     return SDValue();
3973 
3974   // (X & Y) != 0 --> zextOrTrunc(X & Y)
3975   // iff everything but LSB is known zero:
3976   if (Cond == ISD::SETNE && isNullConstant(N1) &&
3977       (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
3978        getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3979     unsigned NumEltBits = OpVT.getScalarSizeInBits();
3980     APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3981     if (DAG.MaskedValueIsZero(N0, UpperBits))
3982       return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3983   }
3984 
3985   // Try to eliminate a power-of-2 mask constant by converting to a signbit
3986   // test in a narrow type that we can truncate to with no cost. Examples:
3987   // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3988   // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3989   // TODO: This conservatively checks for type legality on the source and
3990   //       destination types. That may inhibit optimizations, but it also
3991   //       allows setcc->shift transforms that may be more beneficial.
3992   auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3993   if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3994       isTypeLegal(OpVT) && N0.hasOneUse()) {
3995     EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3996                                      AndC->getAPIntValue().getActiveBits());
3997     if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3998       SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3999       SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4000       return DAG.getSetCC(DL, VT, Trunc, Zero,
4001                           Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4002     }
4003   }
4004 
4005   // Match these patterns in any of their permutations:
4006   // (X & Y) == Y
4007   // (X & Y) != Y
4008   SDValue X, Y;
4009   if (N0.getOperand(0) == N1) {
4010     X = N0.getOperand(1);
4011     Y = N0.getOperand(0);
4012   } else if (N0.getOperand(1) == N1) {
4013     X = N0.getOperand(0);
4014     Y = N0.getOperand(1);
4015   } else {
4016     return SDValue();
4017   }
4018 
4019   // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4020   // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4021   // its liable to create and infinite loop.
4022   SDValue Zero = DAG.getConstant(0, DL, OpVT);
4023   if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4024       DAG.isKnownToBeAPowerOfTwo(Y)) {
4025     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4026     // Note that where Y is variable and is known to have at most one bit set
4027     // (for example, if it is Z & 1) we cannot do this; the expressions are not
4028     // equivalent when Y == 0.
4029     assert(OpVT.isInteger());
4030     Cond = ISD::getSetCCInverse(Cond, OpVT);
4031     if (DCI.isBeforeLegalizeOps() ||
4032         isCondCodeLegal(Cond, N0.getSimpleValueType()))
4033       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4034   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4035     // If the target supports an 'and-not' or 'and-complement' logic operation,
4036     // try to use that to make a comparison operation more efficient.
4037     // But don't do this transform if the mask is a single bit because there are
4038     // more efficient ways to deal with that case (for example, 'bt' on x86 or
4039     // 'rlwinm' on PPC).
4040 
4041     // Bail out if the compare operand that we want to turn into a zero is
4042     // already a zero (otherwise, infinite loop).
4043     if (isNullConstant(Y))
4044       return SDValue();
4045 
4046     // Transform this into: ~X & Y == 0.
4047     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4048     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4049     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4050   }
4051 
4052   return SDValue();
4053 }
4054 
4055 /// There are multiple IR patterns that could be checking whether certain
4056 /// truncation of a signed number would be lossy or not. The pattern which is
4057 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
4058 /// We are looking for the following pattern: (KeptBits is a constant)
4059 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4060 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4061 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
4062 /// We will unfold it into the natural trunc+sext pattern:
4063 ///   ((%x << C) a>> C) dstcond %x
4064 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
4065 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4066     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4067     const SDLoc &DL) const {
4068   // We must be comparing with a constant.
4069   ConstantSDNode *C1;
4070   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4071     return SDValue();
4072 
4073   // N0 should be:  add %x, (1 << (KeptBits-1))
4074   if (N0->getOpcode() != ISD::ADD)
4075     return SDValue();
4076 
4077   // And we must be 'add'ing a constant.
4078   ConstantSDNode *C01;
4079   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4080     return SDValue();
4081 
4082   SDValue X = N0->getOperand(0);
4083   EVT XVT = X.getValueType();
4084 
4085   // Validate constants ...
4086 
4087   APInt I1 = C1->getAPIntValue();
4088 
4089   ISD::CondCode NewCond;
4090   if (Cond == ISD::CondCode::SETULT) {
4091     NewCond = ISD::CondCode::SETEQ;
4092   } else if (Cond == ISD::CondCode::SETULE) {
4093     NewCond = ISD::CondCode::SETEQ;
4094     // But need to 'canonicalize' the constant.
4095     I1 += 1;
4096   } else if (Cond == ISD::CondCode::SETUGT) {
4097     NewCond = ISD::CondCode::SETNE;
4098     // But need to 'canonicalize' the constant.
4099     I1 += 1;
4100   } else if (Cond == ISD::CondCode::SETUGE) {
4101     NewCond = ISD::CondCode::SETNE;
4102   } else
4103     return SDValue();
4104 
4105   APInt I01 = C01->getAPIntValue();
4106 
4107   auto checkConstants = [&I1, &I01]() -> bool {
4108     // Both of them must be power-of-two, and the constant from setcc is bigger.
4109     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4110   };
4111 
4112   if (checkConstants()) {
4113     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
4114   } else {
4115     // What if we invert constants? (and the target predicate)
4116     I1.negate();
4117     I01.negate();
4118     assert(XVT.isInteger());
4119     NewCond = getSetCCInverse(NewCond, XVT);
4120     if (!checkConstants())
4121       return SDValue();
4122     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
4123   }
4124 
4125   // They are power-of-two, so which bit is set?
4126   const unsigned KeptBits = I1.logBase2();
4127   const unsigned KeptBitsMinusOne = I01.logBase2();
4128 
4129   // Magic!
4130   if (KeptBits != (KeptBitsMinusOne + 1))
4131     return SDValue();
4132   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4133 
4134   // We don't want to do this in every single case.
4135   SelectionDAG &DAG = DCI.DAG;
4136   if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4137     return SDValue();
4138 
4139   // Unfold into:  sext_inreg(%x) cond %x
4140   // Where 'cond' will be either 'eq' or 'ne'.
4141   SDValue SExtInReg = DAG.getNode(
4142       ISD::SIGN_EXTEND_INREG, DL, XVT, X,
4143       DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4144   return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4145 }
4146 
4147 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4148 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4149     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4150     DAGCombinerInfo &DCI, const SDLoc &DL) const {
4151   assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4152          "Should be a comparison with 0.");
4153   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4154          "Valid only for [in]equality comparisons.");
4155 
4156   unsigned NewShiftOpcode;
4157   SDValue X, C, Y;
4158 
4159   SelectionDAG &DAG = DCI.DAG;
4160 
4161   // Look for '(C l>>/<< Y)'.
4162   auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4163     // The shift should be one-use.
4164     if (!V.hasOneUse())
4165       return false;
4166     unsigned OldShiftOpcode = V.getOpcode();
4167     switch (OldShiftOpcode) {
4168     case ISD::SHL:
4169       NewShiftOpcode = ISD::SRL;
4170       break;
4171     case ISD::SRL:
4172       NewShiftOpcode = ISD::SHL;
4173       break;
4174     default:
4175       return false; // must be a logical shift.
4176     }
4177     // We should be shifting a constant.
4178     // FIXME: best to use isConstantOrConstantVector().
4179     C = V.getOperand(0);
4180     ConstantSDNode *CC =
4181         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4182     if (!CC)
4183       return false;
4184     Y = V.getOperand(1);
4185 
4186     ConstantSDNode *XC =
4187         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4188     return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4189         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4190   };
4191 
4192   // LHS of comparison should be an one-use 'and'.
4193   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4194     return SDValue();
4195 
4196   X = N0.getOperand(0);
4197   SDValue Mask = N0.getOperand(1);
4198 
4199   // 'and' is commutative!
4200   if (!Match(Mask)) {
4201     std::swap(X, Mask);
4202     if (!Match(Mask))
4203       return SDValue();
4204   }
4205 
4206   EVT VT = X.getValueType();
4207 
4208   // Produce:
4209   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4210   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4211   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4212   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4213   return T2;
4214 }
4215 
4216 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4217 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4218 /// handle the commuted versions of these patterns.
4219 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4220                                            ISD::CondCode Cond, const SDLoc &DL,
4221                                            DAGCombinerInfo &DCI) const {
4222   unsigned BOpcode = N0.getOpcode();
4223   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4224          "Unexpected binop");
4225   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4226 
4227   // (X + Y) == X --> Y == 0
4228   // (X - Y) == X --> Y == 0
4229   // (X ^ Y) == X --> Y == 0
4230   SelectionDAG &DAG = DCI.DAG;
4231   EVT OpVT = N0.getValueType();
4232   SDValue X = N0.getOperand(0);
4233   SDValue Y = N0.getOperand(1);
4234   if (X == N1)
4235     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4236 
4237   if (Y != N1)
4238     return SDValue();
4239 
4240   // (X + Y) == Y --> X == 0
4241   // (X ^ Y) == Y --> X == 0
4242   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4243     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4244 
4245   // The shift would not be valid if the operands are boolean (i1).
4246   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4247     return SDValue();
4248 
4249   // (X - Y) == Y --> X == Y << 1
4250   SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4251   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4252   if (!DCI.isCalledByLegalizer())
4253     DCI.AddToWorklist(YShl1.getNode());
4254   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4255 }
4256 
4257 static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4258                                       SDValue N0, const APInt &C1,
4259                                       ISD::CondCode Cond, const SDLoc &dl,
4260                                       SelectionDAG &DAG) {
4261   // Look through truncs that don't change the value of a ctpop.
4262   // FIXME: Add vector support? Need to be careful with setcc result type below.
4263   SDValue CTPOP = N0;
4264   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4265       N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
4266     CTPOP = N0.getOperand(0);
4267 
4268   if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4269     return SDValue();
4270 
4271   EVT CTVT = CTPOP.getValueType();
4272   SDValue CTOp = CTPOP.getOperand(0);
4273 
4274   // Expand a power-of-2-or-zero comparison based on ctpop:
4275   // (ctpop x) u< 2 -> (x & x-1) == 0
4276   // (ctpop x) u> 1 -> (x & x-1) != 0
4277   if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4278     // Keep the CTPOP if it is a cheap vector op.
4279     if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4280       return SDValue();
4281 
4282     unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4283     if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4284       return SDValue();
4285     if (C1 == 0 && (Cond == ISD::SETULT))
4286       return SDValue(); // This is handled elsewhere.
4287 
4288     unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4289 
4290     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4291     SDValue Result = CTOp;
4292     for (unsigned i = 0; i < Passes; i++) {
4293       SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4294       Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4295     }
4296     ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4297     return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4298   }
4299 
4300   // Expand a power-of-2 comparison based on ctpop
4301   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4302     // Keep the CTPOP if it is cheap.
4303     if (TLI.isCtpopFast(CTVT))
4304       return SDValue();
4305 
4306     SDValue Zero = DAG.getConstant(0, dl, CTVT);
4307     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4308     assert(CTVT.isInteger());
4309     SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4310 
4311     // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4312     // check before emitting a potentially unnecessary op.
4313     if (DAG.isKnownNeverZero(CTOp)) {
4314       // (ctpop x) == 1 --> (x & x-1) == 0
4315       // (ctpop x) != 1 --> (x & x-1) != 0
4316       SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4317       SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4318       return RHS;
4319     }
4320 
4321     // (ctpop x) == 1 --> (x ^ x-1) >  x-1
4322     // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4323     SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4324     ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4325     return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4326   }
4327 
4328   return SDValue();
4329 }
4330 
4331 static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4332                                    ISD::CondCode Cond, const SDLoc &dl,
4333                                    SelectionDAG &DAG) {
4334   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4335     return SDValue();
4336 
4337   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4338   if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4339     return SDValue();
4340 
4341   auto getRotateSource = [](SDValue X) {
4342     if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4343       return X.getOperand(0);
4344     return SDValue();
4345   };
4346 
4347   // Peek through a rotated value compared against 0 or -1:
4348   // (rot X, Y) == 0/-1 --> X == 0/-1
4349   // (rot X, Y) != 0/-1 --> X != 0/-1
4350   if (SDValue R = getRotateSource(N0))
4351     return DAG.getSetCC(dl, VT, R, N1, Cond);
4352 
4353   // Peek through an 'or' of a rotated value compared against 0:
4354   // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4355   // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4356   //
4357   // TODO: Add the 'and' with -1 sibling.
4358   // TODO: Recurse through a series of 'or' ops to find the rotate.
4359   EVT OpVT = N0.getValueType();
4360   if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4361     if (SDValue R = getRotateSource(N0.getOperand(0))) {
4362       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4363       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4364     }
4365     if (SDValue R = getRotateSource(N0.getOperand(1))) {
4366       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4367       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4368     }
4369   }
4370 
4371   return SDValue();
4372 }
4373 
4374 static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4375                                         ISD::CondCode Cond, const SDLoc &dl,
4376                                         SelectionDAG &DAG) {
4377   // If we are testing for all-bits-clear, we might be able to do that with
4378   // less shifting since bit-order does not matter.
4379   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4380     return SDValue();
4381 
4382   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4383   if (!C1 || !C1->isZero())
4384     return SDValue();
4385 
4386   if (!N0.hasOneUse() ||
4387       (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4388     return SDValue();
4389 
4390   unsigned BitWidth = N0.getScalarValueSizeInBits();
4391   auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4392   if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4393     return SDValue();
4394 
4395   // Canonicalize fshr as fshl to reduce pattern-matching.
4396   unsigned ShAmt = ShAmtC->getZExtValue();
4397   if (N0.getOpcode() == ISD::FSHR)
4398     ShAmt = BitWidth - ShAmt;
4399 
4400   // Match an 'or' with a specific operand 'Other' in either commuted variant.
4401   SDValue X, Y;
4402   auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4403     if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4404       return false;
4405     if (Or.getOperand(0) == Other) {
4406       X = Or.getOperand(0);
4407       Y = Or.getOperand(1);
4408       return true;
4409     }
4410     if (Or.getOperand(1) == Other) {
4411       X = Or.getOperand(1);
4412       Y = Or.getOperand(0);
4413       return true;
4414     }
4415     return false;
4416   };
4417 
4418   EVT OpVT = N0.getValueType();
4419   EVT ShAmtVT = N0.getOperand(2).getValueType();
4420   SDValue F0 = N0.getOperand(0);
4421   SDValue F1 = N0.getOperand(1);
4422   if (matchOr(F0, F1)) {
4423     // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4424     SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4425     SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4426     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4427     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4428   }
4429   if (matchOr(F1, F0)) {
4430     // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4431     SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4432     SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4433     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4434     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4435   }
4436 
4437   return SDValue();
4438 }
4439 
4440 /// Try to simplify a setcc built with the specified operands and cc. If it is
4441 /// unable to simplify it, return a null SDValue.
4442 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4443                                       ISD::CondCode Cond, bool foldBooleans,
4444                                       DAGCombinerInfo &DCI,
4445                                       const SDLoc &dl) const {
4446   SelectionDAG &DAG = DCI.DAG;
4447   const DataLayout &Layout = DAG.getDataLayout();
4448   EVT OpVT = N0.getValueType();
4449   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4450 
4451   // Constant fold or commute setcc.
4452   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4453     return Fold;
4454 
4455   bool N0ConstOrSplat =
4456       isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4457   bool N1ConstOrSplat =
4458       isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4459 
4460   // Canonicalize toward having the constant on the RHS.
4461   // TODO: Handle non-splat vector constants. All undef causes trouble.
4462   // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4463   // infinite loop here when we encounter one.
4464   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
4465   if (N0ConstOrSplat && !N1ConstOrSplat &&
4466       (DCI.isBeforeLegalizeOps() ||
4467        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4468     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4469 
4470   // If we have a subtract with the same 2 non-constant operands as this setcc
4471   // -- but in reverse order -- then try to commute the operands of this setcc
4472   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4473   // instruction on some targets.
4474   if (!N0ConstOrSplat && !N1ConstOrSplat &&
4475       (DCI.isBeforeLegalizeOps() ||
4476        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4477       DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4478       !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4479     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4480 
4481   if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4482     return V;
4483 
4484   if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4485     return V;
4486 
4487   if (auto *N1C = isConstOrConstSplat(N1)) {
4488     const APInt &C1 = N1C->getAPIntValue();
4489 
4490     // Optimize some CTPOP cases.
4491     if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4492       return V;
4493 
4494     // For equality to 0 of a no-wrap multiply, decompose and test each op:
4495     // X * Y == 0 --> (X == 0) || (Y == 0)
4496     // X * Y != 0 --> (X != 0) && (Y != 0)
4497     // TODO: This bails out if minsize is set, but if the target doesn't have a
4498     //       single instruction multiply for this type, it would likely be
4499     //       smaller to decompose.
4500     if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4501         N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4502         (N0->getFlags().hasNoUnsignedWrap() ||
4503          N0->getFlags().hasNoSignedWrap()) &&
4504         !Attr.hasFnAttr(Attribute::MinSize)) {
4505       SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4506       SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4507       unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4508       return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4509     }
4510 
4511     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4512     // equality comparison, then we're just comparing whether X itself is
4513     // zero.
4514     if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4515         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4516         llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4517       if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4518         if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4519             ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4520           if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4521             // (srl (ctlz x), 5) == 0  -> X != 0
4522             // (srl (ctlz x), 5) != 1  -> X != 0
4523             Cond = ISD::SETNE;
4524           } else {
4525             // (srl (ctlz x), 5) != 0  -> X == 0
4526             // (srl (ctlz x), 5) == 1  -> X == 0
4527             Cond = ISD::SETEQ;
4528           }
4529           SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4530           return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4531                               Cond);
4532         }
4533       }
4534     }
4535   }
4536 
4537   // FIXME: Support vectors.
4538   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4539     const APInt &C1 = N1C->getAPIntValue();
4540 
4541     // (zext x) == C --> x == (trunc C)
4542     // (sext x) == C --> x == (trunc C)
4543     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4544         DCI.isBeforeLegalize() && N0->hasOneUse()) {
4545       unsigned MinBits = N0.getValueSizeInBits();
4546       SDValue PreExt;
4547       bool Signed = false;
4548       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4549         // ZExt
4550         MinBits = N0->getOperand(0).getValueSizeInBits();
4551         PreExt = N0->getOperand(0);
4552       } else if (N0->getOpcode() == ISD::AND) {
4553         // DAGCombine turns costly ZExts into ANDs
4554         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4555           if ((C->getAPIntValue()+1).isPowerOf2()) {
4556             MinBits = C->getAPIntValue().countr_one();
4557             PreExt = N0->getOperand(0);
4558           }
4559       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4560         // SExt
4561         MinBits = N0->getOperand(0).getValueSizeInBits();
4562         PreExt = N0->getOperand(0);
4563         Signed = true;
4564       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4565         // ZEXTLOAD / SEXTLOAD
4566         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4567           MinBits = LN0->getMemoryVT().getSizeInBits();
4568           PreExt = N0;
4569         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4570           Signed = true;
4571           MinBits = LN0->getMemoryVT().getSizeInBits();
4572           PreExt = N0;
4573         }
4574       }
4575 
4576       // Figure out how many bits we need to preserve this constant.
4577       unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4578 
4579       // Make sure we're not losing bits from the constant.
4580       if (MinBits > 0 &&
4581           MinBits < C1.getBitWidth() &&
4582           MinBits >= ReqdBits) {
4583         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4584         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4585           // Will get folded away.
4586           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4587           if (MinBits == 1 && C1 == 1)
4588             // Invert the condition.
4589             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4590                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4591           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4592           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4593         }
4594 
4595         // If truncating the setcc operands is not desirable, we can still
4596         // simplify the expression in some cases:
4597         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4598         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4599         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4600         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4601         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4602         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4603         SDValue TopSetCC = N0->getOperand(0);
4604         unsigned N0Opc = N0->getOpcode();
4605         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4606         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4607             TopSetCC.getOpcode() == ISD::SETCC &&
4608             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4609             (isConstFalseVal(N1) ||
4610              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4611 
4612           bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4613                          (!N1C->isZero() && Cond == ISD::SETNE);
4614 
4615           if (!Inverse)
4616             return TopSetCC;
4617 
4618           ISD::CondCode InvCond = ISD::getSetCCInverse(
4619               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4620               TopSetCC.getOperand(0).getValueType());
4621           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4622                                       TopSetCC.getOperand(1),
4623                                       InvCond);
4624         }
4625       }
4626     }
4627 
4628     // If the LHS is '(and load, const)', the RHS is 0, the test is for
4629     // equality or unsigned, and all 1 bits of the const are in the same
4630     // partial word, see if we can shorten the load.
4631     if (DCI.isBeforeLegalize() &&
4632         !ISD::isSignedIntSetCC(Cond) &&
4633         N0.getOpcode() == ISD::AND && C1 == 0 &&
4634         N0.getNode()->hasOneUse() &&
4635         isa<LoadSDNode>(N0.getOperand(0)) &&
4636         N0.getOperand(0).getNode()->hasOneUse() &&
4637         isa<ConstantSDNode>(N0.getOperand(1))) {
4638       auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4639       APInt bestMask;
4640       unsigned bestWidth = 0, bestOffset = 0;
4641       if (Lod->isSimple() && Lod->isUnindexed() &&
4642           (Lod->getMemoryVT().isByteSized() ||
4643            isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4644         unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4645         unsigned origWidth = N0.getValueSizeInBits();
4646         unsigned maskWidth = origWidth;
4647         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4648         // 8 bits, but have to be careful...
4649         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4650           origWidth = Lod->getMemoryVT().getSizeInBits();
4651         const APInt &Mask = N0.getConstantOperandAPInt(1);
4652         // Only consider power-of-2 widths (and at least one byte) as candiates
4653         // for the narrowed load.
4654         for (unsigned width = 8; width < origWidth; width *= 2) {
4655           EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4656           if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4657             continue;
4658           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4659           // Avoid accessing any padding here for now (we could use memWidth
4660           // instead of origWidth here otherwise).
4661           unsigned maxOffset = origWidth - width;
4662           for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4663             if (Mask.isSubsetOf(newMask)) {
4664               unsigned ptrOffset =
4665                   Layout.isLittleEndian() ? offset : memWidth - width - offset;
4666               unsigned IsFast = 0;
4667               Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4668               if (allowsMemoryAccess(
4669                       *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4670                       NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4671                   IsFast) {
4672                 bestOffset = ptrOffset / 8;
4673                 bestMask = Mask.lshr(offset);
4674                 bestWidth = width;
4675                 break;
4676               }
4677             }
4678             newMask <<= 8;
4679           }
4680           if (bestWidth)
4681             break;
4682         }
4683       }
4684       if (bestWidth) {
4685         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4686         SDValue Ptr = Lod->getBasePtr();
4687         if (bestOffset != 0)
4688           Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4689         SDValue NewLoad =
4690             DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4691                         Lod->getPointerInfo().getWithOffset(bestOffset),
4692                         Lod->getOriginalAlign());
4693         SDValue And =
4694             DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4695                         DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4696         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4697       }
4698     }
4699 
4700     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4701     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4702       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4703 
4704       // If the comparison constant has bits in the upper part, the
4705       // zero-extended value could never match.
4706       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4707                                               C1.getBitWidth() - InSize))) {
4708         switch (Cond) {
4709         case ISD::SETUGT:
4710         case ISD::SETUGE:
4711         case ISD::SETEQ:
4712           return DAG.getConstant(0, dl, VT);
4713         case ISD::SETULT:
4714         case ISD::SETULE:
4715         case ISD::SETNE:
4716           return DAG.getConstant(1, dl, VT);
4717         case ISD::SETGT:
4718         case ISD::SETGE:
4719           // True if the sign bit of C1 is set.
4720           return DAG.getConstant(C1.isNegative(), dl, VT);
4721         case ISD::SETLT:
4722         case ISD::SETLE:
4723           // True if the sign bit of C1 isn't set.
4724           return DAG.getConstant(C1.isNonNegative(), dl, VT);
4725         default:
4726           break;
4727         }
4728       }
4729 
4730       // Otherwise, we can perform the comparison with the low bits.
4731       switch (Cond) {
4732       case ISD::SETEQ:
4733       case ISD::SETNE:
4734       case ISD::SETUGT:
4735       case ISD::SETUGE:
4736       case ISD::SETULT:
4737       case ISD::SETULE: {
4738         EVT newVT = N0.getOperand(0).getValueType();
4739         // FIXME: Should use isNarrowingProfitable.
4740         if (DCI.isBeforeLegalizeOps() ||
4741             (isOperationLegal(ISD::SETCC, newVT) &&
4742              isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4743              isTypeDesirableForOp(ISD::SETCC, newVT))) {
4744           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4745           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4746 
4747           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4748                                           NewConst, Cond);
4749           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4750         }
4751         break;
4752       }
4753       default:
4754         break; // todo, be more careful with signed comparisons
4755       }
4756     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4757                (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4758                !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4759                                       OpVT)) {
4760       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4761       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4762       EVT ExtDstTy = N0.getValueType();
4763       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4764 
4765       // If the constant doesn't fit into the number of bits for the source of
4766       // the sign extension, it is impossible for both sides to be equal.
4767       if (C1.getSignificantBits() > ExtSrcTyBits)
4768         return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4769 
4770       assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4771              ExtDstTy != ExtSrcTy && "Unexpected types!");
4772       APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4773       SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4774                                    DAG.getConstant(Imm, dl, ExtDstTy));
4775       if (!DCI.isCalledByLegalizer())
4776         DCI.AddToWorklist(ZextOp.getNode());
4777       // Otherwise, make this a use of a zext.
4778       return DAG.getSetCC(dl, VT, ZextOp,
4779                           DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4780     } else if ((N1C->isZero() || N1C->isOne()) &&
4781                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4782       // SETCC (X), [0|1], [EQ|NE]  -> X if X is known 0/1. i1 types are
4783       // excluded as they are handled below whilst checking for foldBooleans.
4784       if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4785           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4786           (N0.getValueType() == MVT::i1 ||
4787            getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4788           DAG.MaskedValueIsZero(
4789               N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {
4790         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4791         if (TrueWhenTrue)
4792           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4793         // Invert the condition.
4794         if (N0.getOpcode() == ISD::SETCC) {
4795           ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4796           CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4797           if (DCI.isBeforeLegalizeOps() ||
4798               isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4799             return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4800         }
4801       }
4802 
4803       if ((N0.getOpcode() == ISD::XOR ||
4804            (N0.getOpcode() == ISD::AND &&
4805             N0.getOperand(0).getOpcode() == ISD::XOR &&
4806             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4807           isOneConstant(N0.getOperand(1))) {
4808         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
4809         // can only do this if the top bits are known zero.
4810         unsigned BitWidth = N0.getValueSizeInBits();
4811         if (DAG.MaskedValueIsZero(N0,
4812                                   APInt::getHighBitsSet(BitWidth,
4813                                                         BitWidth-1))) {
4814           // Okay, get the un-inverted input value.
4815           SDValue Val;
4816           if (N0.getOpcode() == ISD::XOR) {
4817             Val = N0.getOperand(0);
4818           } else {
4819             assert(N0.getOpcode() == ISD::AND &&
4820                     N0.getOperand(0).getOpcode() == ISD::XOR);
4821             // ((X^1)&1)^1 -> X & 1
4822             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4823                               N0.getOperand(0).getOperand(0),
4824                               N0.getOperand(1));
4825           }
4826 
4827           return DAG.getSetCC(dl, VT, Val, N1,
4828                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4829         }
4830       } else if (N1C->isOne()) {
4831         SDValue Op0 = N0;
4832         if (Op0.getOpcode() == ISD::TRUNCATE)
4833           Op0 = Op0.getOperand(0);
4834 
4835         if ((Op0.getOpcode() == ISD::XOR) &&
4836             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4837             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4838           SDValue XorLHS = Op0.getOperand(0);
4839           SDValue XorRHS = Op0.getOperand(1);
4840           // Ensure that the input setccs return an i1 type or 0/1 value.
4841           if (Op0.getValueType() == MVT::i1 ||
4842               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4843                       ZeroOrOneBooleanContent &&
4844                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4845                         ZeroOrOneBooleanContent)) {
4846             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4847             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4848             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4849           }
4850         }
4851         if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4852           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4853           if (Op0.getValueType().bitsGT(VT))
4854             Op0 = DAG.getNode(ISD::AND, dl, VT,
4855                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4856                           DAG.getConstant(1, dl, VT));
4857           else if (Op0.getValueType().bitsLT(VT))
4858             Op0 = DAG.getNode(ISD::AND, dl, VT,
4859                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4860                         DAG.getConstant(1, dl, VT));
4861 
4862           return DAG.getSetCC(dl, VT, Op0,
4863                               DAG.getConstant(0, dl, Op0.getValueType()),
4864                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4865         }
4866         if (Op0.getOpcode() == ISD::AssertZext &&
4867             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4868           return DAG.getSetCC(dl, VT, Op0,
4869                               DAG.getConstant(0, dl, Op0.getValueType()),
4870                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4871       }
4872     }
4873 
4874     // Given:
4875     //   icmp eq/ne (urem %x, %y), 0
4876     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4877     //   icmp eq/ne %x, 0
4878     if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4879         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4880       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4881       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4882       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4883         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4884     }
4885 
4886     // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4887     //  and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4888     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4889         N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4890         N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4891         N1C->isAllOnes()) {
4892       return DAG.getSetCC(dl, VT, N0.getOperand(0),
4893                           DAG.getConstant(0, dl, OpVT),
4894                           Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4895     }
4896 
4897     if (SDValue V =
4898             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4899       return V;
4900   }
4901 
4902   // These simplifications apply to splat vectors as well.
4903   // TODO: Handle more splat vector cases.
4904   if (auto *N1C = isConstOrConstSplat(N1)) {
4905     const APInt &C1 = N1C->getAPIntValue();
4906 
4907     APInt MinVal, MaxVal;
4908     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4909     if (ISD::isSignedIntSetCC(Cond)) {
4910       MinVal = APInt::getSignedMinValue(OperandBitSize);
4911       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4912     } else {
4913       MinVal = APInt::getMinValue(OperandBitSize);
4914       MaxVal = APInt::getMaxValue(OperandBitSize);
4915     }
4916 
4917     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4918     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4919       // X >= MIN --> true
4920       if (C1 == MinVal)
4921         return DAG.getBoolConstant(true, dl, VT, OpVT);
4922 
4923       if (!VT.isVector()) { // TODO: Support this for vectors.
4924         // X >= C0 --> X > (C0 - 1)
4925         APInt C = C1 - 1;
4926         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4927         if ((DCI.isBeforeLegalizeOps() ||
4928              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4929             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4930                                   isLegalICmpImmediate(C.getSExtValue())))) {
4931           return DAG.getSetCC(dl, VT, N0,
4932                               DAG.getConstant(C, dl, N1.getValueType()),
4933                               NewCC);
4934         }
4935       }
4936     }
4937 
4938     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4939       // X <= MAX --> true
4940       if (C1 == MaxVal)
4941         return DAG.getBoolConstant(true, dl, VT, OpVT);
4942 
4943       // X <= C0 --> X < (C0 + 1)
4944       if (!VT.isVector()) { // TODO: Support this for vectors.
4945         APInt C = C1 + 1;
4946         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4947         if ((DCI.isBeforeLegalizeOps() ||
4948              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4949             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4950                                   isLegalICmpImmediate(C.getSExtValue())))) {
4951           return DAG.getSetCC(dl, VT, N0,
4952                               DAG.getConstant(C, dl, N1.getValueType()),
4953                               NewCC);
4954         }
4955       }
4956     }
4957 
4958     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4959       if (C1 == MinVal)
4960         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4961 
4962       // TODO: Support this for vectors after legalize ops.
4963       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4964         // Canonicalize setlt X, Max --> setne X, Max
4965         if (C1 == MaxVal)
4966           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4967 
4968         // If we have setult X, 1, turn it into seteq X, 0
4969         if (C1 == MinVal+1)
4970           return DAG.getSetCC(dl, VT, N0,
4971                               DAG.getConstant(MinVal, dl, N0.getValueType()),
4972                               ISD::SETEQ);
4973       }
4974     }
4975 
4976     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4977       if (C1 == MaxVal)
4978         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4979 
4980       // TODO: Support this for vectors after legalize ops.
4981       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4982         // Canonicalize setgt X, Min --> setne X, Min
4983         if (C1 == MinVal)
4984           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4985 
4986         // If we have setugt X, Max-1, turn it into seteq X, Max
4987         if (C1 == MaxVal-1)
4988           return DAG.getSetCC(dl, VT, N0,
4989                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
4990                               ISD::SETEQ);
4991       }
4992     }
4993 
4994     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4995       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4996       if (C1.isZero())
4997         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4998                 VT, N0, N1, Cond, DCI, dl))
4999           return CC;
5000 
5001       // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5002       // For example, when high 32-bits of i64 X are known clear:
5003       // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0
5004       // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1
5005       bool CmpZero = N1C->isZero();
5006       bool CmpNegOne = N1C->isAllOnes();
5007       if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5008         // Match or(lo,shl(hi,bw/2)) pattern.
5009         auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5010           unsigned EltBits = V.getScalarValueSizeInBits();
5011           if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5012             return false;
5013           SDValue LHS = V.getOperand(0);
5014           SDValue RHS = V.getOperand(1);
5015           APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5016           // Unshifted element must have zero upperbits.
5017           if (RHS.getOpcode() == ISD::SHL &&
5018               isa<ConstantSDNode>(RHS.getOperand(1)) &&
5019               RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5020               DAG.MaskedValueIsZero(LHS, HiBits)) {
5021             Lo = LHS;
5022             Hi = RHS.getOperand(0);
5023             return true;
5024           }
5025           if (LHS.getOpcode() == ISD::SHL &&
5026               isa<ConstantSDNode>(LHS.getOperand(1)) &&
5027               LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5028               DAG.MaskedValueIsZero(RHS, HiBits)) {
5029             Lo = RHS;
5030             Hi = LHS.getOperand(0);
5031             return true;
5032           }
5033           return false;
5034         };
5035 
5036         auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5037           unsigned EltBits = N0.getScalarValueSizeInBits();
5038           unsigned HalfBits = EltBits / 2;
5039           APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5040           SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5041           SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5042           SDValue NewN0 =
5043               DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5044           SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5045           return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5046         };
5047 
5048         SDValue Lo, Hi;
5049         if (IsConcat(N0, Lo, Hi))
5050           return MergeConcat(Lo, Hi);
5051 
5052         if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5053           SDValue Lo0, Lo1, Hi0, Hi1;
5054           if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5055               IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5056             return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5057                                DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5058           }
5059         }
5060       }
5061     }
5062 
5063     // If we have "setcc X, C0", check to see if we can shrink the immediate
5064     // by changing cc.
5065     // TODO: Support this for vectors after legalize ops.
5066     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5067       // SETUGT X, SINTMAX  -> SETLT X, 0
5068       // SETUGE X, SINTMIN -> SETLT X, 0
5069       if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5070           (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5071         return DAG.getSetCC(dl, VT, N0,
5072                             DAG.getConstant(0, dl, N1.getValueType()),
5073                             ISD::SETLT);
5074 
5075       // SETULT X, SINTMIN  -> SETGT X, -1
5076       // SETULE X, SINTMAX  -> SETGT X, -1
5077       if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5078           (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5079         return DAG.getSetCC(dl, VT, N0,
5080                             DAG.getAllOnesConstant(dl, N1.getValueType()),
5081                             ISD::SETGT);
5082     }
5083   }
5084 
5085   // Back to non-vector simplifications.
5086   // TODO: Can we do these for vector splats?
5087   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5088     const APInt &C1 = N1C->getAPIntValue();
5089     EVT ShValTy = N0.getValueType();
5090 
5091     // Fold bit comparisons when we can. This will result in an
5092     // incorrect value when boolean false is negative one, unless
5093     // the bitsize is 1 in which case the false value is the same
5094     // in practice regardless of the representation.
5095     if ((VT.getSizeInBits() == 1 ||
5096          getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
5097         (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5098         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5099         N0.getOpcode() == ISD::AND) {
5100       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5101         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
5102           // Perform the xform if the AND RHS is a single bit.
5103           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5104           if (AndRHS->getAPIntValue().isPowerOf2() &&
5105               !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5106             return DAG.getNode(
5107                 ISD::TRUNCATE, dl, VT,
5108                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5109                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5110           }
5111         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5112           // (X & 8) == 8  -->  (X & 8) >> 3
5113           // Perform the xform if C1 is a single bit.
5114           unsigned ShCt = C1.logBase2();
5115           if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5116             return DAG.getNode(
5117                 ISD::TRUNCATE, dl, VT,
5118                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5119                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5120           }
5121         }
5122       }
5123     }
5124 
5125     if (C1.getSignificantBits() <= 64 &&
5126         !isLegalICmpImmediate(C1.getSExtValue())) {
5127       // (X & -256) == 256 -> (X >> 8) == 1
5128       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5129           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5130         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5131           const APInt &AndRHSC = AndRHS->getAPIntValue();
5132           if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5133             unsigned ShiftBits = AndRHSC.countr_zero();
5134             if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5135               SDValue Shift = DAG.getNode(
5136                   ISD::SRL, dl, ShValTy, N0.getOperand(0),
5137                   DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5138               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5139               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5140             }
5141           }
5142         }
5143       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5144                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5145         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5146         // X <  0x100000000 -> (X >> 32) <  1
5147         // X >= 0x100000000 -> (X >> 32) >= 1
5148         // X <= 0x0ffffffff -> (X >> 32) <  1
5149         // X >  0x0ffffffff -> (X >> 32) >= 1
5150         unsigned ShiftBits;
5151         APInt NewC = C1;
5152         ISD::CondCode NewCond = Cond;
5153         if (AdjOne) {
5154           ShiftBits = C1.countr_one();
5155           NewC = NewC + 1;
5156           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5157         } else {
5158           ShiftBits = C1.countr_zero();
5159         }
5160         NewC.lshrInPlace(ShiftBits);
5161         if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5162             isLegalICmpImmediate(NewC.getSExtValue()) &&
5163             !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5164           SDValue Shift =
5165               DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5166                           DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5167           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5168           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5169         }
5170       }
5171     }
5172   }
5173 
5174   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5175     auto *CFP = cast<ConstantFPSDNode>(N1);
5176     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5177 
5178     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
5179     // constant if knowing that the operand is non-nan is enough.  We prefer to
5180     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5181     // materialize 0.0.
5182     if (Cond == ISD::SETO || Cond == ISD::SETUO)
5183       return DAG.getSetCC(dl, VT, N0, N0, Cond);
5184 
5185     // setcc (fneg x), C -> setcc swap(pred) x, -C
5186     if (N0.getOpcode() == ISD::FNEG) {
5187       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
5188       if (DCI.isBeforeLegalizeOps() ||
5189           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5190         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5191         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5192       }
5193     }
5194 
5195     // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5196     if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
5197         !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5198       bool IsFabs = N0.getOpcode() == ISD::FABS;
5199       SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5200       if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5201         FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5202                                              : (IsFabs ? fcInf : fcPosInf);
5203         if (Cond == ISD::SETUEQ)
5204           Flag |= fcNan;
5205         return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5206                            DAG.getTargetConstant(Flag, dl, MVT::i32));
5207       }
5208     }
5209 
5210     // If the condition is not legal, see if we can find an equivalent one
5211     // which is legal.
5212     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
5213       // If the comparison was an awkward floating-point == or != and one of
5214       // the comparison operands is infinity or negative infinity, convert the
5215       // condition to a less-awkward <= or >=.
5216       if (CFP->getValueAPF().isInfinity()) {
5217         bool IsNegInf = CFP->getValueAPF().isNegative();
5218         ISD::CondCode NewCond = ISD::SETCC_INVALID;
5219         switch (Cond) {
5220         case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5221         case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5222         case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5223         case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5224         default: break;
5225         }
5226         if (NewCond != ISD::SETCC_INVALID &&
5227             isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5228           return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5229       }
5230     }
5231   }
5232 
5233   if (N0 == N1) {
5234     // The sext(setcc()) => setcc() optimization relies on the appropriate
5235     // constant being emitted.
5236     assert(!N0.getValueType().isInteger() &&
5237            "Integer types should be handled by FoldSetCC");
5238 
5239     bool EqTrue = ISD::isTrueWhenEqual(Cond);
5240     unsigned UOF = ISD::getUnorderedFlavor(Cond);
5241     if (UOF == 2) // FP operators that are undefined on NaNs.
5242       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5243     if (UOF == unsigned(EqTrue))
5244       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5245     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
5246     // if it is not already.
5247     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5248     if (NewCond != Cond &&
5249         (DCI.isBeforeLegalizeOps() ||
5250                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5251       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5252   }
5253 
5254   // ~X > ~Y --> Y > X
5255   // ~X < ~Y --> Y < X
5256   // ~X < C --> X > ~C
5257   // ~X > C --> X < ~C
5258   if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5259       N0.getValueType().isInteger()) {
5260     if (isBitwiseNot(N0)) {
5261       if (isBitwiseNot(N1))
5262         return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5263 
5264       if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
5265           !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
5266         SDValue Not = DAG.getNOT(dl, N1, OpVT);
5267         return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5268       }
5269     }
5270   }
5271 
5272   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5273       N0.getValueType().isInteger()) {
5274     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5275         N0.getOpcode() == ISD::XOR) {
5276       // Simplify (X+Y) == (X+Z) -->  Y == Z
5277       if (N0.getOpcode() == N1.getOpcode()) {
5278         if (N0.getOperand(0) == N1.getOperand(0))
5279           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5280         if (N0.getOperand(1) == N1.getOperand(1))
5281           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5282         if (isCommutativeBinOp(N0.getOpcode())) {
5283           // If X op Y == Y op X, try other combinations.
5284           if (N0.getOperand(0) == N1.getOperand(1))
5285             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5286                                 Cond);
5287           if (N0.getOperand(1) == N1.getOperand(0))
5288             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5289                                 Cond);
5290         }
5291       }
5292 
5293       // If RHS is a legal immediate value for a compare instruction, we need
5294       // to be careful about increasing register pressure needlessly.
5295       bool LegalRHSImm = false;
5296 
5297       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5298         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5299           // Turn (X+C1) == C2 --> X == C2-C1
5300           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5301             return DAG.getSetCC(
5302                 dl, VT, N0.getOperand(0),
5303                 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5304                                 dl, N0.getValueType()),
5305                 Cond);
5306 
5307           // Turn (X^C1) == C2 --> X == C1^C2
5308           if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5309             return DAG.getSetCC(
5310                 dl, VT, N0.getOperand(0),
5311                 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5312                                 dl, N0.getValueType()),
5313                 Cond);
5314         }
5315 
5316         // Turn (C1-X) == C2 --> X == C1-C2
5317         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5318           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5319             return DAG.getSetCC(
5320                 dl, VT, N0.getOperand(1),
5321                 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5322                                 dl, N0.getValueType()),
5323                 Cond);
5324 
5325         // Could RHSC fold directly into a compare?
5326         if (RHSC->getValueType(0).getSizeInBits() <= 64)
5327           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5328       }
5329 
5330       // (X+Y) == X --> Y == 0 and similar folds.
5331       // Don't do this if X is an immediate that can fold into a cmp
5332       // instruction and X+Y has other uses. It could be an induction variable
5333       // chain, and the transform would increase register pressure.
5334       if (!LegalRHSImm || N0.hasOneUse())
5335         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5336           return V;
5337     }
5338 
5339     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5340         N1.getOpcode() == ISD::XOR)
5341       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5342         return V;
5343 
5344     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5345       return V;
5346   }
5347 
5348   // Fold remainder of division by a constant.
5349   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5350       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5351     // When division is cheap or optimizing for minimum size,
5352     // fall through to DIVREM creation by skipping this fold.
5353     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5354       if (N0.getOpcode() == ISD::UREM) {
5355         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5356           return Folded;
5357       } else if (N0.getOpcode() == ISD::SREM) {
5358         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5359           return Folded;
5360       }
5361     }
5362   }
5363 
5364   // Fold away ALL boolean setcc's.
5365   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5366     SDValue Temp;
5367     switch (Cond) {
5368     default: llvm_unreachable("Unknown integer setcc!");
5369     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
5370       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5371       N0 = DAG.getNOT(dl, Temp, OpVT);
5372       if (!DCI.isCalledByLegalizer())
5373         DCI.AddToWorklist(Temp.getNode());
5374       break;
5375     case ISD::SETNE:  // X != Y   -->  (X^Y)
5376       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5377       break;
5378     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
5379     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
5380       Temp = DAG.getNOT(dl, N0, OpVT);
5381       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5382       if (!DCI.isCalledByLegalizer())
5383         DCI.AddToWorklist(Temp.getNode());
5384       break;
5385     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
5386     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
5387       Temp = DAG.getNOT(dl, N1, OpVT);
5388       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5389       if (!DCI.isCalledByLegalizer())
5390         DCI.AddToWorklist(Temp.getNode());
5391       break;
5392     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
5393     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
5394       Temp = DAG.getNOT(dl, N0, OpVT);
5395       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5396       if (!DCI.isCalledByLegalizer())
5397         DCI.AddToWorklist(Temp.getNode());
5398       break;
5399     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
5400     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
5401       Temp = DAG.getNOT(dl, N1, OpVT);
5402       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5403       break;
5404     }
5405     if (VT.getScalarType() != MVT::i1) {
5406       if (!DCI.isCalledByLegalizer())
5407         DCI.AddToWorklist(N0.getNode());
5408       // FIXME: If running after legalize, we probably can't do this.
5409       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
5410       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5411     }
5412     return N0;
5413   }
5414 
5415   // Could not fold it.
5416   return SDValue();
5417 }
5418 
5419 /// Returns true (and the GlobalValue and the offset) if the node is a
5420 /// GlobalAddress + offset.
5421 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5422                                     int64_t &Offset) const {
5423 
5424   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5425 
5426   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5427     GA = GASD->getGlobal();
5428     Offset += GASD->getOffset();
5429     return true;
5430   }
5431 
5432   if (N->getOpcode() == ISD::ADD) {
5433     SDValue N1 = N->getOperand(0);
5434     SDValue N2 = N->getOperand(1);
5435     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5436       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5437         Offset += V->getSExtValue();
5438         return true;
5439       }
5440     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5441       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5442         Offset += V->getSExtValue();
5443         return true;
5444       }
5445     }
5446   }
5447 
5448   return false;
5449 }
5450 
5451 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5452                                           DAGCombinerInfo &DCI) const {
5453   // Default implementation: no optimization.
5454   return SDValue();
5455 }
5456 
5457 //===----------------------------------------------------------------------===//
5458 //  Inline Assembler Implementation Methods
5459 //===----------------------------------------------------------------------===//
5460 
5461 TargetLowering::ConstraintType
5462 TargetLowering::getConstraintType(StringRef Constraint) const {
5463   unsigned S = Constraint.size();
5464 
5465   if (S == 1) {
5466     switch (Constraint[0]) {
5467     default: break;
5468     case 'r':
5469       return C_RegisterClass;
5470     case 'm': // memory
5471     case 'o': // offsetable
5472     case 'V': // not offsetable
5473       return C_Memory;
5474     case 'p': // Address.
5475       return C_Address;
5476     case 'n': // Simple Integer
5477     case 'E': // Floating Point Constant
5478     case 'F': // Floating Point Constant
5479       return C_Immediate;
5480     case 'i': // Simple Integer or Relocatable Constant
5481     case 's': // Relocatable Constant
5482     case 'X': // Allow ANY value.
5483     case 'I': // Target registers.
5484     case 'J':
5485     case 'K':
5486     case 'L':
5487     case 'M':
5488     case 'N':
5489     case 'O':
5490     case 'P':
5491     case '<':
5492     case '>':
5493       return C_Other;
5494     }
5495   }
5496 
5497   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5498     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5499       return C_Memory;
5500     return C_Register;
5501   }
5502   return C_Unknown;
5503 }
5504 
5505 /// Try to replace an X constraint, which matches anything, with another that
5506 /// has more specific requirements based on the type of the corresponding
5507 /// operand.
5508 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5509   if (ConstraintVT.isInteger())
5510     return "r";
5511   if (ConstraintVT.isFloatingPoint())
5512     return "f"; // works for many targets
5513   return nullptr;
5514 }
5515 
5516 SDValue TargetLowering::LowerAsmOutputForConstraint(
5517     SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5518     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5519   return SDValue();
5520 }
5521 
5522 /// Lower the specified operand into the Ops vector.
5523 /// If it is invalid, don't add anything to Ops.
5524 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5525                                                   StringRef Constraint,
5526                                                   std::vector<SDValue> &Ops,
5527                                                   SelectionDAG &DAG) const {
5528 
5529   if (Constraint.size() > 1)
5530     return;
5531 
5532   char ConstraintLetter = Constraint[0];
5533   switch (ConstraintLetter) {
5534   default: break;
5535   case 'X':    // Allows any operand
5536   case 'i':    // Simple Integer or Relocatable Constant
5537   case 'n':    // Simple Integer
5538   case 's': {  // Relocatable Constant
5539 
5540     ConstantSDNode *C;
5541     uint64_t Offset = 0;
5542 
5543     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5544     // etc., since getelementpointer is variadic. We can't use
5545     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5546     // while in this case the GA may be furthest from the root node which is
5547     // likely an ISD::ADD.
5548     while (true) {
5549       if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5550         // gcc prints these as sign extended.  Sign extend value to 64 bits
5551         // now; without this it would get ZExt'd later in
5552         // ScheduleDAGSDNodes::EmitNode, which is very generic.
5553         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5554         BooleanContent BCont = getBooleanContents(MVT::i64);
5555         ISD::NodeType ExtOpc =
5556             IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5557         int64_t ExtVal =
5558             ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5559         Ops.push_back(
5560             DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5561         return;
5562       }
5563       if (ConstraintLetter != 'n') {
5564         if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5565           Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5566                                                    GA->getValueType(0),
5567                                                    Offset + GA->getOffset()));
5568           return;
5569         }
5570         if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5571           Ops.push_back(DAG.getTargetBlockAddress(
5572               BA->getBlockAddress(), BA->getValueType(0),
5573               Offset + BA->getOffset(), BA->getTargetFlags()));
5574           return;
5575         }
5576         if (isa<BasicBlockSDNode>(Op)) {
5577           Ops.push_back(Op);
5578           return;
5579         }
5580       }
5581       const unsigned OpCode = Op.getOpcode();
5582       if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5583         if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5584           Op = Op.getOperand(1);
5585         // Subtraction is not commutative.
5586         else if (OpCode == ISD::ADD &&
5587                  (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5588           Op = Op.getOperand(0);
5589         else
5590           return;
5591         Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5592         continue;
5593       }
5594       return;
5595     }
5596     break;
5597   }
5598   }
5599 }
5600 
5601 void TargetLowering::CollectTargetIntrinsicOperands(
5602     const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5603 }
5604 
5605 std::pair<unsigned, const TargetRegisterClass *>
5606 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5607                                              StringRef Constraint,
5608                                              MVT VT) const {
5609   if (!Constraint.starts_with("{"))
5610     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5611   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5612 
5613   // Remove the braces from around the name.
5614   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5615 
5616   std::pair<unsigned, const TargetRegisterClass *> R =
5617       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5618 
5619   // Figure out which register class contains this reg.
5620   for (const TargetRegisterClass *RC : RI->regclasses()) {
5621     // If none of the value types for this register class are valid, we
5622     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5623     if (!isLegalRC(*RI, *RC))
5624       continue;
5625 
5626     for (const MCPhysReg &PR : *RC) {
5627       if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5628         std::pair<unsigned, const TargetRegisterClass *> S =
5629             std::make_pair(PR, RC);
5630 
5631         // If this register class has the requested value type, return it,
5632         // otherwise keep searching and return the first class found
5633         // if no other is found which explicitly has the requested type.
5634         if (RI->isTypeLegalForClass(*RC, VT))
5635           return S;
5636         if (!R.second)
5637           R = S;
5638       }
5639     }
5640   }
5641 
5642   return R;
5643 }
5644 
5645 //===----------------------------------------------------------------------===//
5646 // Constraint Selection.
5647 
5648 /// Return true of this is an input operand that is a matching constraint like
5649 /// "4".
5650 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5651   assert(!ConstraintCode.empty() && "No known constraint!");
5652   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5653 }
5654 
5655 /// If this is an input matching constraint, this method returns the output
5656 /// operand it matches.
5657 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5658   assert(!ConstraintCode.empty() && "No known constraint!");
5659   return atoi(ConstraintCode.c_str());
5660 }
5661 
5662 /// Split up the constraint string from the inline assembly value into the
5663 /// specific constraints and their prefixes, and also tie in the associated
5664 /// operand values.
5665 /// If this returns an empty vector, and if the constraint string itself
5666 /// isn't empty, there was an error parsing.
5667 TargetLowering::AsmOperandInfoVector
5668 TargetLowering::ParseConstraints(const DataLayout &DL,
5669                                  const TargetRegisterInfo *TRI,
5670                                  const CallBase &Call) const {
5671   /// Information about all of the constraints.
5672   AsmOperandInfoVector ConstraintOperands;
5673   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5674   unsigned maCount = 0; // Largest number of multiple alternative constraints.
5675 
5676   // Do a prepass over the constraints, canonicalizing them, and building up the
5677   // ConstraintOperands list.
5678   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5679   unsigned ResNo = 0; // ResNo - The result number of the next output.
5680   unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5681 
5682   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5683     ConstraintOperands.emplace_back(std::move(CI));
5684     AsmOperandInfo &OpInfo = ConstraintOperands.back();
5685 
5686     // Update multiple alternative constraint count.
5687     if (OpInfo.multipleAlternatives.size() > maCount)
5688       maCount = OpInfo.multipleAlternatives.size();
5689 
5690     OpInfo.ConstraintVT = MVT::Other;
5691 
5692     // Compute the value type for each operand.
5693     switch (OpInfo.Type) {
5694     case InlineAsm::isOutput:
5695       // Indirect outputs just consume an argument.
5696       if (OpInfo.isIndirect) {
5697         OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5698         break;
5699       }
5700 
5701       // The return value of the call is this value.  As such, there is no
5702       // corresponding argument.
5703       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5704       if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5705         OpInfo.ConstraintVT =
5706             getSimpleValueType(DL, STy->getElementType(ResNo));
5707       } else {
5708         assert(ResNo == 0 && "Asm only has one result!");
5709         OpInfo.ConstraintVT =
5710             getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5711       }
5712       ++ResNo;
5713       break;
5714     case InlineAsm::isInput:
5715       OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5716       break;
5717     case InlineAsm::isLabel:
5718       OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5719       ++LabelNo;
5720       continue;
5721     case InlineAsm::isClobber:
5722       // Nothing to do.
5723       break;
5724     }
5725 
5726     if (OpInfo.CallOperandVal) {
5727       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5728       if (OpInfo.isIndirect) {
5729         OpTy = Call.getParamElementType(ArgNo);
5730         assert(OpTy && "Indirect operand must have elementtype attribute");
5731       }
5732 
5733       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5734       if (StructType *STy = dyn_cast<StructType>(OpTy))
5735         if (STy->getNumElements() == 1)
5736           OpTy = STy->getElementType(0);
5737 
5738       // If OpTy is not a single value, it may be a struct/union that we
5739       // can tile with integers.
5740       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5741         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5742         switch (BitSize) {
5743         default: break;
5744         case 1:
5745         case 8:
5746         case 16:
5747         case 32:
5748         case 64:
5749         case 128:
5750           OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5751           break;
5752         }
5753       }
5754 
5755       EVT VT = getAsmOperandValueType(DL, OpTy, true);
5756       OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5757       ArgNo++;
5758     }
5759   }
5760 
5761   // If we have multiple alternative constraints, select the best alternative.
5762   if (!ConstraintOperands.empty()) {
5763     if (maCount) {
5764       unsigned bestMAIndex = 0;
5765       int bestWeight = -1;
5766       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
5767       int weight = -1;
5768       unsigned maIndex;
5769       // Compute the sums of the weights for each alternative, keeping track
5770       // of the best (highest weight) one so far.
5771       for (maIndex = 0; maIndex < maCount; ++maIndex) {
5772         int weightSum = 0;
5773         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5774              cIndex != eIndex; ++cIndex) {
5775           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5776           if (OpInfo.Type == InlineAsm::isClobber)
5777             continue;
5778 
5779           // If this is an output operand with a matching input operand,
5780           // look up the matching input. If their types mismatch, e.g. one
5781           // is an integer, the other is floating point, or their sizes are
5782           // different, flag it as an maCantMatch.
5783           if (OpInfo.hasMatchingInput()) {
5784             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5785             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5786               if ((OpInfo.ConstraintVT.isInteger() !=
5787                    Input.ConstraintVT.isInteger()) ||
5788                   (OpInfo.ConstraintVT.getSizeInBits() !=
5789                    Input.ConstraintVT.getSizeInBits())) {
5790                 weightSum = -1; // Can't match.
5791                 break;
5792               }
5793             }
5794           }
5795           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5796           if (weight == -1) {
5797             weightSum = -1;
5798             break;
5799           }
5800           weightSum += weight;
5801         }
5802         // Update best.
5803         if (weightSum > bestWeight) {
5804           bestWeight = weightSum;
5805           bestMAIndex = maIndex;
5806         }
5807       }
5808 
5809       // Now select chosen alternative in each constraint.
5810       for (AsmOperandInfo &cInfo : ConstraintOperands)
5811         if (cInfo.Type != InlineAsm::isClobber)
5812           cInfo.selectAlternative(bestMAIndex);
5813     }
5814   }
5815 
5816   // Check and hook up tied operands, choose constraint code to use.
5817   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5818        cIndex != eIndex; ++cIndex) {
5819     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5820 
5821     // If this is an output operand with a matching input operand, look up the
5822     // matching input. If their types mismatch, e.g. one is an integer, the
5823     // other is floating point, or their sizes are different, flag it as an
5824     // error.
5825     if (OpInfo.hasMatchingInput()) {
5826       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5827 
5828       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5829         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5830             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5831                                          OpInfo.ConstraintVT);
5832         std::pair<unsigned, const TargetRegisterClass *> InputRC =
5833             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5834                                          Input.ConstraintVT);
5835         const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
5836                                     OpInfo.ConstraintVT.isFloatingPoint();
5837         const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
5838                                    Input.ConstraintVT.isFloatingPoint();
5839         if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
5840             (MatchRC.second != InputRC.second)) {
5841           report_fatal_error("Unsupported asm: input constraint"
5842                              " with a matching output constraint of"
5843                              " incompatible type!");
5844         }
5845       }
5846     }
5847   }
5848 
5849   return ConstraintOperands;
5850 }
5851 
5852 /// Return a number indicating our preference for chosing a type of constraint
5853 /// over another, for the purpose of sorting them. Immediates are almost always
5854 /// preferrable (when they can be emitted). A higher return value means a
5855 /// stronger preference for one constraint type relative to another.
5856 /// FIXME: We should prefer registers over memory but doing so may lead to
5857 /// unrecoverable register exhaustion later.
5858 /// https://github.com/llvm/llvm-project/issues/20571
5859 static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5860   switch (CT) {
5861   case TargetLowering::C_Immediate:
5862   case TargetLowering::C_Other:
5863     return 4;
5864   case TargetLowering::C_Memory:
5865   case TargetLowering::C_Address:
5866     return 3;
5867   case TargetLowering::C_RegisterClass:
5868     return 2;
5869   case TargetLowering::C_Register:
5870     return 1;
5871   case TargetLowering::C_Unknown:
5872     return 0;
5873   }
5874   llvm_unreachable("Invalid constraint type");
5875 }
5876 
5877 /// Examine constraint type and operand type and determine a weight value.
5878 /// This object must already have been set up with the operand type
5879 /// and the current alternative constraint selected.
5880 TargetLowering::ConstraintWeight
5881   TargetLowering::getMultipleConstraintMatchWeight(
5882     AsmOperandInfo &info, int maIndex) const {
5883   InlineAsm::ConstraintCodeVector *rCodes;
5884   if (maIndex >= (int)info.multipleAlternatives.size())
5885     rCodes = &info.Codes;
5886   else
5887     rCodes = &info.multipleAlternatives[maIndex].Codes;
5888   ConstraintWeight BestWeight = CW_Invalid;
5889 
5890   // Loop over the options, keeping track of the most general one.
5891   for (const std::string &rCode : *rCodes) {
5892     ConstraintWeight weight =
5893         getSingleConstraintMatchWeight(info, rCode.c_str());
5894     if (weight > BestWeight)
5895       BestWeight = weight;
5896   }
5897 
5898   return BestWeight;
5899 }
5900 
5901 /// Examine constraint type and operand type and determine a weight value.
5902 /// This object must already have been set up with the operand type
5903 /// and the current alternative constraint selected.
5904 TargetLowering::ConstraintWeight
5905   TargetLowering::getSingleConstraintMatchWeight(
5906     AsmOperandInfo &info, const char *constraint) const {
5907   ConstraintWeight weight = CW_Invalid;
5908   Value *CallOperandVal = info.CallOperandVal;
5909     // If we don't have a value, we can't do a match,
5910     // but allow it at the lowest weight.
5911   if (!CallOperandVal)
5912     return CW_Default;
5913   // Look at the constraint type.
5914   switch (*constraint) {
5915     case 'i': // immediate integer.
5916     case 'n': // immediate integer with a known value.
5917       if (isa<ConstantInt>(CallOperandVal))
5918         weight = CW_Constant;
5919       break;
5920     case 's': // non-explicit intregal immediate.
5921       if (isa<GlobalValue>(CallOperandVal))
5922         weight = CW_Constant;
5923       break;
5924     case 'E': // immediate float if host format.
5925     case 'F': // immediate float.
5926       if (isa<ConstantFP>(CallOperandVal))
5927         weight = CW_Constant;
5928       break;
5929     case '<': // memory operand with autodecrement.
5930     case '>': // memory operand with autoincrement.
5931     case 'm': // memory operand.
5932     case 'o': // offsettable memory operand
5933     case 'V': // non-offsettable memory operand
5934       weight = CW_Memory;
5935       break;
5936     case 'r': // general register.
5937     case 'g': // general register, memory operand or immediate integer.
5938               // note: Clang converts "g" to "imr".
5939       if (CallOperandVal->getType()->isIntegerTy())
5940         weight = CW_Register;
5941       break;
5942     case 'X': // any operand.
5943   default:
5944     weight = CW_Default;
5945     break;
5946   }
5947   return weight;
5948 }
5949 
5950 /// If there are multiple different constraints that we could pick for this
5951 /// operand (e.g. "imr") try to pick the 'best' one.
5952 /// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5953 /// into seven classes:
5954 ///    Register      -> one specific register
5955 ///    RegisterClass -> a group of regs
5956 ///    Memory        -> memory
5957 ///    Address       -> a symbolic memory reference
5958 ///    Immediate     -> immediate values
5959 ///    Other         -> magic values (such as "Flag Output Operands")
5960 ///    Unknown       -> something we don't recognize yet and can't handle
5961 /// Ideally, we would pick the most specific constraint possible: if we have
5962 /// something that fits into a register, we would pick it.  The problem here
5963 /// is that if we have something that could either be in a register or in
5964 /// memory that use of the register could cause selection of *other*
5965 /// operands to fail: they might only succeed if we pick memory.  Because of
5966 /// this the heuristic we use is:
5967 ///
5968 ///  1) If there is an 'other' constraint, and if the operand is valid for
5969 ///     that constraint, use it.  This makes us take advantage of 'i'
5970 ///     constraints when available.
5971 ///  2) Otherwise, pick the most general constraint present.  This prefers
5972 ///     'm' over 'r', for example.
5973 ///
5974 TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
5975     TargetLowering::AsmOperandInfo &OpInfo) const {
5976   ConstraintGroup Ret;
5977 
5978   Ret.reserve(OpInfo.Codes.size());
5979   for (StringRef Code : OpInfo.Codes) {
5980     TargetLowering::ConstraintType CType = getConstraintType(Code);
5981 
5982     // Indirect 'other' or 'immediate' constraints are not allowed.
5983     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5984                                CType == TargetLowering::C_Register ||
5985                                CType == TargetLowering::C_RegisterClass))
5986       continue;
5987 
5988     // Things with matching constraints can only be registers, per gcc
5989     // documentation.  This mainly affects "g" constraints.
5990     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5991       continue;
5992 
5993     Ret.emplace_back(Code, CType);
5994   }
5995 
5996   std::stable_sort(
5997       Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
5998         return getConstraintPiority(a.second) > getConstraintPiority(b.second);
5999       });
6000 
6001   return Ret;
6002 }
6003 
6004 /// If we have an immediate, see if we can lower it. Return true if we can,
6005 /// false otherwise.
6006 static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6007                                      SDValue Op, SelectionDAG *DAG,
6008                                      const TargetLowering &TLI) {
6009 
6010   assert((P.second == TargetLowering::C_Other ||
6011           P.second == TargetLowering::C_Immediate) &&
6012          "need immediate or other");
6013 
6014   if (!Op.getNode())
6015     return false;
6016 
6017   std::vector<SDValue> ResultOps;
6018   TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6019   return !ResultOps.empty();
6020 }
6021 
6022 /// Determines the constraint code and constraint type to use for the specific
6023 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6024 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6025                                             SDValue Op,
6026                                             SelectionDAG *DAG) const {
6027   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6028 
6029   // Single-letter constraints ('r') are very common.
6030   if (OpInfo.Codes.size() == 1) {
6031     OpInfo.ConstraintCode = OpInfo.Codes[0];
6032     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6033   } else {
6034     ConstraintGroup G = getConstraintPreferences(OpInfo);
6035     if (G.empty())
6036       return;
6037 
6038     unsigned BestIdx = 0;
6039     for (const unsigned E = G.size();
6040          BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6041                          G[BestIdx].second == TargetLowering::C_Immediate);
6042          ++BestIdx) {
6043       if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6044         break;
6045       // If we're out of constraints, just pick the first one.
6046       if (BestIdx + 1 == E) {
6047         BestIdx = 0;
6048         break;
6049       }
6050     }
6051 
6052     OpInfo.ConstraintCode = G[BestIdx].first;
6053     OpInfo.ConstraintType = G[BestIdx].second;
6054   }
6055 
6056   // 'X' matches anything.
6057   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6058     // Constants are handled elsewhere.  For Functions, the type here is the
6059     // type of the result, which is not what we want to look at; leave them
6060     // alone.
6061     Value *v = OpInfo.CallOperandVal;
6062     if (isa<ConstantInt>(v) || isa<Function>(v)) {
6063       return;
6064     }
6065 
6066     if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6067       OpInfo.ConstraintCode = "i";
6068       return;
6069     }
6070 
6071     // Otherwise, try to resolve it to something we know about by looking at
6072     // the actual operand type.
6073     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6074       OpInfo.ConstraintCode = Repl;
6075       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6076     }
6077   }
6078 }
6079 
6080 /// Given an exact SDIV by a constant, create a multiplication
6081 /// with the multiplicative inverse of the constant.
6082 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6083 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6084                               const SDLoc &dl, SelectionDAG &DAG,
6085                               SmallVectorImpl<SDNode *> &Created) {
6086   SDValue Op0 = N->getOperand(0);
6087   SDValue Op1 = N->getOperand(1);
6088   EVT VT = N->getValueType(0);
6089   EVT SVT = VT.getScalarType();
6090   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6091   EVT ShSVT = ShVT.getScalarType();
6092 
6093   bool UseSRA = false;
6094   SmallVector<SDValue, 16> Shifts, Factors;
6095 
6096   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6097     if (C->isZero())
6098       return false;
6099     APInt Divisor = C->getAPIntValue();
6100     unsigned Shift = Divisor.countr_zero();
6101     if (Shift) {
6102       Divisor.ashrInPlace(Shift);
6103       UseSRA = true;
6104     }
6105     APInt Factor = Divisor.multiplicativeInverse();
6106     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6107     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6108     return true;
6109   };
6110 
6111   // Collect all magic values from the build vector.
6112   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6113     return SDValue();
6114 
6115   SDValue Shift, Factor;
6116   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6117     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6118     Factor = DAG.getBuildVector(VT, dl, Factors);
6119   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6120     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6121            "Expected matchUnaryPredicate to return one element for scalable "
6122            "vectors");
6123     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6124     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6125   } else {
6126     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6127     Shift = Shifts[0];
6128     Factor = Factors[0];
6129   }
6130 
6131   SDValue Res = Op0;
6132   if (UseSRA) {
6133     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6134     Created.push_back(Res.getNode());
6135   }
6136 
6137   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6138 }
6139 
6140 /// Given an exact UDIV by a constant, create a multiplication
6141 /// with the multiplicative inverse of the constant.
6142 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6143 static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6144                               const SDLoc &dl, SelectionDAG &DAG,
6145                               SmallVectorImpl<SDNode *> &Created) {
6146   EVT VT = N->getValueType(0);
6147   EVT SVT = VT.getScalarType();
6148   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6149   EVT ShSVT = ShVT.getScalarType();
6150 
6151   bool UseSRL = false;
6152   SmallVector<SDValue, 16> Shifts, Factors;
6153 
6154   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6155     if (C->isZero())
6156       return false;
6157     APInt Divisor = C->getAPIntValue();
6158     unsigned Shift = Divisor.countr_zero();
6159     if (Shift) {
6160       Divisor.lshrInPlace(Shift);
6161       UseSRL = true;
6162     }
6163     // Calculate the multiplicative inverse modulo BW.
6164     APInt Factor = Divisor.multiplicativeInverse();
6165     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6166     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6167     return true;
6168   };
6169 
6170   SDValue Op1 = N->getOperand(1);
6171 
6172   // Collect all magic values from the build vector.
6173   if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6174     return SDValue();
6175 
6176   SDValue Shift, Factor;
6177   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6178     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6179     Factor = DAG.getBuildVector(VT, dl, Factors);
6180   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6181     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6182            "Expected matchUnaryPredicate to return one element for scalable "
6183            "vectors");
6184     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6185     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6186   } else {
6187     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6188     Shift = Shifts[0];
6189     Factor = Factors[0];
6190   }
6191 
6192   SDValue Res = N->getOperand(0);
6193   if (UseSRL) {
6194     Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6195     Created.push_back(Res.getNode());
6196   }
6197 
6198   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6199 }
6200 
6201 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6202                               SelectionDAG &DAG,
6203                               SmallVectorImpl<SDNode *> &Created) const {
6204   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6205   if (isIntDivCheap(N->getValueType(0), Attr))
6206     return SDValue(N, 0); // Lower SDIV as SDIV
6207   return SDValue();
6208 }
6209 
6210 SDValue
6211 TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6212                               SelectionDAG &DAG,
6213                               SmallVectorImpl<SDNode *> &Created) const {
6214   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6215   if (isIntDivCheap(N->getValueType(0), Attr))
6216     return SDValue(N, 0); // Lower SREM as SREM
6217   return SDValue();
6218 }
6219 
6220 /// Build sdiv by power-of-2 with conditional move instructions
6221 /// Ref: "Hacker's Delight" by Henry Warren 10-1
6222 /// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6223 ///   bgez x, label
6224 ///   add x, x, 2**k-1
6225 /// label:
6226 ///   sra res, x, k
6227 ///   neg res, res (when the divisor is negative)
6228 SDValue TargetLowering::buildSDIVPow2WithCMov(
6229     SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6230     SmallVectorImpl<SDNode *> &Created) const {
6231   unsigned Lg2 = Divisor.countr_zero();
6232   EVT VT = N->getValueType(0);
6233 
6234   SDLoc DL(N);
6235   SDValue N0 = N->getOperand(0);
6236   SDValue Zero = DAG.getConstant(0, DL, VT);
6237   APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6238   SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6239 
6240   // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6241   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6242   SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6243   SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6244   SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6245 
6246   Created.push_back(Cmp.getNode());
6247   Created.push_back(Add.getNode());
6248   Created.push_back(CMov.getNode());
6249 
6250   // Divide by pow2.
6251   SDValue SRA =
6252       DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6253 
6254   // If we're dividing by a positive value, we're done.  Otherwise, we must
6255   // negate the result.
6256   if (Divisor.isNonNegative())
6257     return SRA;
6258 
6259   Created.push_back(SRA.getNode());
6260   return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6261 }
6262 
6263 /// Given an ISD::SDIV node expressing a divide by constant,
6264 /// return a DAG expression to select that will generate the same value by
6265 /// multiplying by a magic number.
6266 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6267 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6268                                   bool IsAfterLegalization,
6269                                   bool IsAfterLegalTypes,
6270                                   SmallVectorImpl<SDNode *> &Created) const {
6271   SDLoc dl(N);
6272   EVT VT = N->getValueType(0);
6273   EVT SVT = VT.getScalarType();
6274   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6275   EVT ShSVT = ShVT.getScalarType();
6276   unsigned EltBits = VT.getScalarSizeInBits();
6277   EVT MulVT;
6278 
6279   // Check to see if we can do this.
6280   // FIXME: We should be more aggressive here.
6281   if (!isTypeLegal(VT)) {
6282     // Limit this to simple scalars for now.
6283     if (VT.isVector() || !VT.isSimple())
6284       return SDValue();
6285 
6286     // If this type will be promoted to a large enough type with a legal
6287     // multiply operation, we can go ahead and do this transform.
6288     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6289       return SDValue();
6290 
6291     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6292     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6293         !isOperationLegal(ISD::MUL, MulVT))
6294       return SDValue();
6295   }
6296 
6297   // If the sdiv has an 'exact' bit we can use a simpler lowering.
6298   if (N->getFlags().hasExact())
6299     return BuildExactSDIV(*this, N, dl, DAG, Created);
6300 
6301   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6302 
6303   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6304     if (C->isZero())
6305       return false;
6306 
6307     const APInt &Divisor = C->getAPIntValue();
6308     SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
6309     int NumeratorFactor = 0;
6310     int ShiftMask = -1;
6311 
6312     if (Divisor.isOne() || Divisor.isAllOnes()) {
6313       // If d is +1/-1, we just multiply the numerator by +1/-1.
6314       NumeratorFactor = Divisor.getSExtValue();
6315       magics.Magic = 0;
6316       magics.ShiftAmount = 0;
6317       ShiftMask = 0;
6318     } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6319       // If d > 0 and m < 0, add the numerator.
6320       NumeratorFactor = 1;
6321     } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6322       // If d < 0 and m > 0, subtract the numerator.
6323       NumeratorFactor = -1;
6324     }
6325 
6326     MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6327     Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6328     Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6329     ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6330     return true;
6331   };
6332 
6333   SDValue N0 = N->getOperand(0);
6334   SDValue N1 = N->getOperand(1);
6335 
6336   // Collect the shifts / magic values from each element.
6337   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6338     return SDValue();
6339 
6340   SDValue MagicFactor, Factor, Shift, ShiftMask;
6341   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6342     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6343     Factor = DAG.getBuildVector(VT, dl, Factors);
6344     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6345     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6346   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6347     assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6348            Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6349            "Expected matchUnaryPredicate to return one element for scalable "
6350            "vectors");
6351     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6352     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6353     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6354     ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6355   } else {
6356     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6357     MagicFactor = MagicFactors[0];
6358     Factor = Factors[0];
6359     Shift = Shifts[0];
6360     ShiftMask = ShiftMasks[0];
6361   }
6362 
6363   // Multiply the numerator (operand 0) by the magic value.
6364   // FIXME: We should support doing a MUL in a wider type.
6365   auto GetMULHS = [&](SDValue X, SDValue Y) {
6366     // If the type isn't legal, use a wider mul of the type calculated
6367     // earlier.
6368     if (!isTypeLegal(VT)) {
6369       X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6370       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6371       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6372       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6373                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6374       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6375     }
6376 
6377     if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6378       return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6379     if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6380       SDValue LoHi =
6381           DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6382       return SDValue(LoHi.getNode(), 1);
6383     }
6384     // If type twice as wide legal, widen and use a mul plus a shift.
6385     unsigned Size = VT.getScalarSizeInBits();
6386     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6387     if (VT.isVector())
6388       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6389                                 VT.getVectorElementCount());
6390     // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6391     // custom lowered. This is very expensive so avoid it at all costs for
6392     // constant divisors.
6393     if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6394          isOperationCustom(ISD::SDIVREM, VT.getScalarType())) ||
6395         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6396       X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6397       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6398       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6399       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6400                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6401       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6402     }
6403     return SDValue();
6404   };
6405 
6406   SDValue Q = GetMULHS(N0, MagicFactor);
6407   if (!Q)
6408     return SDValue();
6409 
6410   Created.push_back(Q.getNode());
6411 
6412   // (Optionally) Add/subtract the numerator using Factor.
6413   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6414   Created.push_back(Factor.getNode());
6415   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6416   Created.push_back(Q.getNode());
6417 
6418   // Shift right algebraic by shift value.
6419   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6420   Created.push_back(Q.getNode());
6421 
6422   // Extract the sign bit, mask it and add it to the quotient.
6423   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6424   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6425   Created.push_back(T.getNode());
6426   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6427   Created.push_back(T.getNode());
6428   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6429 }
6430 
6431 /// Given an ISD::UDIV node expressing a divide by constant,
6432 /// return a DAG expression to select that will generate the same value by
6433 /// multiplying by a magic number.
6434 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6435 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6436                                   bool IsAfterLegalization,
6437                                   bool IsAfterLegalTypes,
6438                                   SmallVectorImpl<SDNode *> &Created) const {
6439   SDLoc dl(N);
6440   EVT VT = N->getValueType(0);
6441   EVT SVT = VT.getScalarType();
6442   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6443   EVT ShSVT = ShVT.getScalarType();
6444   unsigned EltBits = VT.getScalarSizeInBits();
6445   EVT MulVT;
6446 
6447   // Check to see if we can do this.
6448   // FIXME: We should be more aggressive here.
6449   if (!isTypeLegal(VT)) {
6450     // Limit this to simple scalars for now.
6451     if (VT.isVector() || !VT.isSimple())
6452       return SDValue();
6453 
6454     // If this type will be promoted to a large enough type with a legal
6455     // multiply operation, we can go ahead and do this transform.
6456     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6457       return SDValue();
6458 
6459     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6460     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6461         !isOperationLegal(ISD::MUL, MulVT))
6462       return SDValue();
6463   }
6464 
6465   // If the udiv has an 'exact' bit we can use a simpler lowering.
6466   if (N->getFlags().hasExact())
6467     return BuildExactUDIV(*this, N, dl, DAG, Created);
6468 
6469   SDValue N0 = N->getOperand(0);
6470   SDValue N1 = N->getOperand(1);
6471 
6472   // Try to use leading zeros of the dividend to reduce the multiplier and
6473   // avoid expensive fixups.
6474   unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6475 
6476   bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6477   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6478 
6479   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6480     if (C->isZero())
6481       return false;
6482     const APInt& Divisor = C->getAPIntValue();
6483 
6484     SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6485 
6486     // Magic algorithm doesn't work for division by 1. We need to emit a select
6487     // at the end.
6488     if (Divisor.isOne()) {
6489       PreShift = PostShift = DAG.getUNDEF(ShSVT);
6490       MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6491     } else {
6492       UnsignedDivisionByConstantInfo magics =
6493           UnsignedDivisionByConstantInfo::get(
6494               Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6495 
6496       MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6497 
6498       assert(magics.PreShift < Divisor.getBitWidth() &&
6499              "We shouldn't generate an undefined shift!");
6500       assert(magics.PostShift < Divisor.getBitWidth() &&
6501              "We shouldn't generate an undefined shift!");
6502       assert((!magics.IsAdd || magics.PreShift == 0) &&
6503              "Unexpected pre-shift");
6504       PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6505       PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6506       NPQFactor = DAG.getConstant(
6507           magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6508                        : APInt::getZero(EltBits),
6509           dl, SVT);
6510       UseNPQ |= magics.IsAdd;
6511       UsePreShift |= magics.PreShift != 0;
6512       UsePostShift |= magics.PostShift != 0;
6513     }
6514 
6515     PreShifts.push_back(PreShift);
6516     MagicFactors.push_back(MagicFactor);
6517     NPQFactors.push_back(NPQFactor);
6518     PostShifts.push_back(PostShift);
6519     return true;
6520   };
6521 
6522   // Collect the shifts/magic values from each element.
6523   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6524     return SDValue();
6525 
6526   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6527   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6528     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6529     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6530     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6531     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6532   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6533     assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6534            NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6535            "Expected matchUnaryPredicate to return one for scalable vectors");
6536     PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6537     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6538     NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6539     PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6540   } else {
6541     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6542     PreShift = PreShifts[0];
6543     MagicFactor = MagicFactors[0];
6544     PostShift = PostShifts[0];
6545   }
6546 
6547   SDValue Q = N0;
6548   if (UsePreShift) {
6549     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6550     Created.push_back(Q.getNode());
6551   }
6552 
6553   // FIXME: We should support doing a MUL in a wider type.
6554   auto GetMULHU = [&](SDValue X, SDValue Y) {
6555     // If the type isn't legal, use a wider mul of the type calculated
6556     // earlier.
6557     if (!isTypeLegal(VT)) {
6558       X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6559       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6560       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6561       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6562                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6563       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6564     }
6565 
6566     if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6567       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6568     if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6569       SDValue LoHi =
6570           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6571       return SDValue(LoHi.getNode(), 1);
6572     }
6573     // If type twice as wide legal, widen and use a mul plus a shift.
6574     unsigned Size = VT.getScalarSizeInBits();
6575     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6576     if (VT.isVector())
6577       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6578                                 VT.getVectorElementCount());
6579     // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6580     // custom lowered. This is very expensive so avoid it at all costs for
6581     // constant divisors.
6582     if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6583          isOperationCustom(ISD::UDIVREM, VT.getScalarType())) ||
6584         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6585       X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6586       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6587       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6588       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6589                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6590       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6591     }
6592     return SDValue(); // No mulhu or equivalent
6593   };
6594 
6595   // Multiply the numerator (operand 0) by the magic value.
6596   Q = GetMULHU(Q, MagicFactor);
6597   if (!Q)
6598     return SDValue();
6599 
6600   Created.push_back(Q.getNode());
6601 
6602   if (UseNPQ) {
6603     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6604     Created.push_back(NPQ.getNode());
6605 
6606     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6607     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6608     if (VT.isVector())
6609       NPQ = GetMULHU(NPQ, NPQFactor);
6610     else
6611       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6612 
6613     Created.push_back(NPQ.getNode());
6614 
6615     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6616     Created.push_back(Q.getNode());
6617   }
6618 
6619   if (UsePostShift) {
6620     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6621     Created.push_back(Q.getNode());
6622   }
6623 
6624   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6625 
6626   SDValue One = DAG.getConstant(1, dl, VT);
6627   SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6628   return DAG.getSelect(dl, VT, IsOne, N0, Q);
6629 }
6630 
6631 /// If all values in Values that *don't* match the predicate are same 'splat'
6632 /// value, then replace all values with that splat value.
6633 /// Else, if AlternativeReplacement was provided, then replace all values that
6634 /// do match predicate with AlternativeReplacement value.
6635 static void
6636 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6637                           std::function<bool(SDValue)> Predicate,
6638                           SDValue AlternativeReplacement = SDValue()) {
6639   SDValue Replacement;
6640   // Is there a value for which the Predicate does *NOT* match? What is it?
6641   auto SplatValue = llvm::find_if_not(Values, Predicate);
6642   if (SplatValue != Values.end()) {
6643     // Does Values consist only of SplatValue's and values matching Predicate?
6644     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6645           return Value == *SplatValue || Predicate(Value);
6646         })) // Then we shall replace values matching predicate with SplatValue.
6647       Replacement = *SplatValue;
6648   }
6649   if (!Replacement) {
6650     // Oops, we did not find the "baseline" splat value.
6651     if (!AlternativeReplacement)
6652       return; // Nothing to do.
6653     // Let's replace with provided value then.
6654     Replacement = AlternativeReplacement;
6655   }
6656   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6657 }
6658 
6659 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6660 /// where the divisor is constant and the comparison target is zero,
6661 /// return a DAG expression that will generate the same comparison result
6662 /// using only multiplications, additions and shifts/rotations.
6663 /// Ref: "Hacker's Delight" 10-17.
6664 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6665                                         SDValue CompTargetNode,
6666                                         ISD::CondCode Cond,
6667                                         DAGCombinerInfo &DCI,
6668                                         const SDLoc &DL) const {
6669   SmallVector<SDNode *, 5> Built;
6670   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6671                                          DCI, DL, Built)) {
6672     for (SDNode *N : Built)
6673       DCI.AddToWorklist(N);
6674     return Folded;
6675   }
6676 
6677   return SDValue();
6678 }
6679 
6680 SDValue
6681 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6682                                   SDValue CompTargetNode, ISD::CondCode Cond,
6683                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6684                                   SmallVectorImpl<SDNode *> &Created) const {
6685   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6686   // - D must be constant, with D = D0 * 2^K where D0 is odd
6687   // - P is the multiplicative inverse of D0 modulo 2^W
6688   // - Q = floor(((2^W) - 1) / D)
6689   // where W is the width of the common type of N and D.
6690   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6691          "Only applicable for (in)equality comparisons.");
6692 
6693   SelectionDAG &DAG = DCI.DAG;
6694 
6695   EVT VT = REMNode.getValueType();
6696   EVT SVT = VT.getScalarType();
6697   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6698   EVT ShSVT = ShVT.getScalarType();
6699 
6700   // If MUL is unavailable, we cannot proceed in any case.
6701   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6702     return SDValue();
6703 
6704   bool ComparingWithAllZeros = true;
6705   bool AllComparisonsWithNonZerosAreTautological = true;
6706   bool HadTautologicalLanes = false;
6707   bool AllLanesAreTautological = true;
6708   bool HadEvenDivisor = false;
6709   bool AllDivisorsArePowerOfTwo = true;
6710   bool HadTautologicalInvertedLanes = false;
6711   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6712 
6713   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6714     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6715     if (CDiv->isZero())
6716       return false;
6717 
6718     const APInt &D = CDiv->getAPIntValue();
6719     const APInt &Cmp = CCmp->getAPIntValue();
6720 
6721     ComparingWithAllZeros &= Cmp.isZero();
6722 
6723     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6724     // if C2 is not less than C1, the comparison is always false.
6725     // But we will only be able to produce the comparison that will give the
6726     // opposive tautological answer. So this lane would need to be fixed up.
6727     bool TautologicalInvertedLane = D.ule(Cmp);
6728     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6729 
6730     // If all lanes are tautological (either all divisors are ones, or divisor
6731     // is not greater than the constant we are comparing with),
6732     // we will prefer to avoid the fold.
6733     bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6734     HadTautologicalLanes |= TautologicalLane;
6735     AllLanesAreTautological &= TautologicalLane;
6736 
6737     // If we are comparing with non-zero, we need'll need  to subtract said
6738     // comparison value from the LHS. But there is no point in doing that if
6739     // every lane where we are comparing with non-zero is tautological..
6740     if (!Cmp.isZero())
6741       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6742 
6743     // Decompose D into D0 * 2^K
6744     unsigned K = D.countr_zero();
6745     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6746     APInt D0 = D.lshr(K);
6747 
6748     // D is even if it has trailing zeros.
6749     HadEvenDivisor |= (K != 0);
6750     // D is a power-of-two if D0 is one.
6751     // If all divisors are power-of-two, we will prefer to avoid the fold.
6752     AllDivisorsArePowerOfTwo &= D0.isOne();
6753 
6754     // P = inv(D0, 2^W)
6755     // 2^W requires W + 1 bits, so we have to extend and then truncate.
6756     unsigned W = D.getBitWidth();
6757     APInt P = D0.multiplicativeInverse();
6758     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6759 
6760     // Q = floor((2^W - 1) u/ D)
6761     // R = ((2^W - 1) u% D)
6762     APInt Q, R;
6763     APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6764 
6765     // If we are comparing with zero, then that comparison constant is okay,
6766     // else it may need to be one less than that.
6767     if (Cmp.ugt(R))
6768       Q -= 1;
6769 
6770     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6771            "We are expecting that K is always less than all-ones for ShSVT");
6772 
6773     // If the lane is tautological the result can be constant-folded.
6774     if (TautologicalLane) {
6775       // Set P and K amount to a bogus values so we can try to splat them.
6776       P = 0;
6777       K = -1;
6778       // And ensure that comparison constant is tautological,
6779       // it will always compare true/false.
6780       Q = -1;
6781     }
6782 
6783     PAmts.push_back(DAG.getConstant(P, DL, SVT));
6784     KAmts.push_back(
6785         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
6786                               /*implicitTrunc=*/true),
6787                         DL, ShSVT));
6788     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6789     return true;
6790   };
6791 
6792   SDValue N = REMNode.getOperand(0);
6793   SDValue D = REMNode.getOperand(1);
6794 
6795   // Collect the values from each element.
6796   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6797     return SDValue();
6798 
6799   // If all lanes are tautological, the result can be constant-folded.
6800   if (AllLanesAreTautological)
6801     return SDValue();
6802 
6803   // If this is a urem by a powers-of-two, avoid the fold since it can be
6804   // best implemented as a bit test.
6805   if (AllDivisorsArePowerOfTwo)
6806     return SDValue();
6807 
6808   SDValue PVal, KVal, QVal;
6809   if (D.getOpcode() == ISD::BUILD_VECTOR) {
6810     if (HadTautologicalLanes) {
6811       // Try to turn PAmts into a splat, since we don't care about the values
6812       // that are currently '0'. If we can't, just keep '0'`s.
6813       turnVectorIntoSplatVector(PAmts, isNullConstant);
6814       // Try to turn KAmts into a splat, since we don't care about the values
6815       // that are currently '-1'. If we can't, change them to '0'`s.
6816       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6817                                 DAG.getConstant(0, DL, ShSVT));
6818     }
6819 
6820     PVal = DAG.getBuildVector(VT, DL, PAmts);
6821     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6822     QVal = DAG.getBuildVector(VT, DL, QAmts);
6823   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6824     assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6825            "Expected matchBinaryPredicate to return one element for "
6826            "SPLAT_VECTORs");
6827     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6828     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6829     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6830   } else {
6831     PVal = PAmts[0];
6832     KVal = KAmts[0];
6833     QVal = QAmts[0];
6834   }
6835 
6836   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6837     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6838       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6839     assert(CompTargetNode.getValueType() == N.getValueType() &&
6840            "Expecting that the types on LHS and RHS of comparisons match.");
6841     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6842   }
6843 
6844   // (mul N, P)
6845   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6846   Created.push_back(Op0.getNode());
6847 
6848   // Rotate right only if any divisor was even. We avoid rotates for all-odd
6849   // divisors as a performance improvement, since rotating by 0 is a no-op.
6850   if (HadEvenDivisor) {
6851     // We need ROTR to do this.
6852     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6853       return SDValue();
6854     // UREM: (rotr (mul N, P), K)
6855     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6856     Created.push_back(Op0.getNode());
6857   }
6858 
6859   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6860   SDValue NewCC =
6861       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6862                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6863   if (!HadTautologicalInvertedLanes)
6864     return NewCC;
6865 
6866   // If any lanes previously compared always-false, the NewCC will give
6867   // always-true result for them, so we need to fixup those lanes.
6868   // Or the other way around for inequality predicate.
6869   assert(VT.isVector() && "Can/should only get here for vectors.");
6870   Created.push_back(NewCC.getNode());
6871 
6872   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6873   // if C2 is not less than C1, the comparison is always false.
6874   // But we have produced the comparison that will give the
6875   // opposive tautological answer. So these lanes would need to be fixed up.
6876   SDValue TautologicalInvertedChannels =
6877       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6878   Created.push_back(TautologicalInvertedChannels.getNode());
6879 
6880   // NOTE: we avoid letting illegal types through even if we're before legalize
6881   // ops – legalization has a hard time producing good code for this.
6882   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6883     // If we have a vector select, let's replace the comparison results in the
6884     // affected lanes with the correct tautological result.
6885     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6886                                               DL, SETCCVT, SETCCVT);
6887     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6888                        Replacement, NewCC);
6889   }
6890 
6891   // Else, we can just invert the comparison result in the appropriate lanes.
6892   //
6893   // NOTE: see the note above VSELECT above.
6894   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6895     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6896                        TautologicalInvertedChannels);
6897 
6898   return SDValue(); // Don't know how to lower.
6899 }
6900 
6901 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6902 /// where the divisor is constant and the comparison target is zero,
6903 /// return a DAG expression that will generate the same comparison result
6904 /// using only multiplications, additions and shifts/rotations.
6905 /// Ref: "Hacker's Delight" 10-17.
6906 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6907                                         SDValue CompTargetNode,
6908                                         ISD::CondCode Cond,
6909                                         DAGCombinerInfo &DCI,
6910                                         const SDLoc &DL) const {
6911   SmallVector<SDNode *, 7> Built;
6912   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6913                                          DCI, DL, Built)) {
6914     assert(Built.size() <= 7 && "Max size prediction failed.");
6915     for (SDNode *N : Built)
6916       DCI.AddToWorklist(N);
6917     return Folded;
6918   }
6919 
6920   return SDValue();
6921 }
6922 
6923 SDValue
6924 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6925                                   SDValue CompTargetNode, ISD::CondCode Cond,
6926                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6927                                   SmallVectorImpl<SDNode *> &Created) const {
6928   // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6929   // Fold:
6930   //   (seteq/ne (srem N, D), 0)
6931   // To:
6932   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
6933   //
6934   // - D must be constant, with D = D0 * 2^K where D0 is odd
6935   // - P is the multiplicative inverse of D0 modulo 2^W
6936   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6937   // - Q = floor((2 * A) / (2^K))
6938   // where W is the width of the common type of N and D.
6939   //
6940   // When D is a power of two (and thus D0 is 1), the normal
6941   // formula for A and Q don't apply, because the derivation
6942   // depends on D not dividing 2^(W-1), and thus theorem ZRS
6943   // does not apply. This specifically fails when N = INT_MIN.
6944   //
6945   // Instead, for power-of-two D, we use:
6946   // - A = 2^(W-1)
6947   // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6948   // - Q = 2^(W-K) - 1
6949   // |-> Test that the top K bits are zero after rotation
6950   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6951          "Only applicable for (in)equality comparisons.");
6952 
6953   SelectionDAG &DAG = DCI.DAG;
6954 
6955   EVT VT = REMNode.getValueType();
6956   EVT SVT = VT.getScalarType();
6957   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6958   EVT ShSVT = ShVT.getScalarType();
6959 
6960   // If we are after ops legalization, and MUL is unavailable, we can not
6961   // proceed.
6962   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6963     return SDValue();
6964 
6965   // TODO: Could support comparing with non-zero too.
6966   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6967   if (!CompTarget || !CompTarget->isZero())
6968     return SDValue();
6969 
6970   bool HadIntMinDivisor = false;
6971   bool HadOneDivisor = false;
6972   bool AllDivisorsAreOnes = true;
6973   bool HadEvenDivisor = false;
6974   bool NeedToApplyOffset = false;
6975   bool AllDivisorsArePowerOfTwo = true;
6976   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6977 
6978   auto BuildSREMPattern = [&](ConstantSDNode *C) {
6979     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6980     if (C->isZero())
6981       return false;
6982 
6983     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6984 
6985     // WARNING: this fold is only valid for positive divisors!
6986     APInt D = C->getAPIntValue();
6987     if (D.isNegative())
6988       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
6989 
6990     HadIntMinDivisor |= D.isMinSignedValue();
6991 
6992     // If all divisors are ones, we will prefer to avoid the fold.
6993     HadOneDivisor |= D.isOne();
6994     AllDivisorsAreOnes &= D.isOne();
6995 
6996     // Decompose D into D0 * 2^K
6997     unsigned K = D.countr_zero();
6998     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6999     APInt D0 = D.lshr(K);
7000 
7001     if (!D.isMinSignedValue()) {
7002       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7003       // we don't care about this lane in this fold, we'll special-handle it.
7004       HadEvenDivisor |= (K != 0);
7005     }
7006 
7007     // D is a power-of-two if D0 is one. This includes INT_MIN.
7008     // If all divisors are power-of-two, we will prefer to avoid the fold.
7009     AllDivisorsArePowerOfTwo &= D0.isOne();
7010 
7011     // P = inv(D0, 2^W)
7012     // 2^W requires W + 1 bits, so we have to extend and then truncate.
7013     unsigned W = D.getBitWidth();
7014     APInt P = D0.multiplicativeInverse();
7015     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7016 
7017     // A = floor((2^(W - 1) - 1) / D0) & -2^K
7018     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7019     A.clearLowBits(K);
7020 
7021     if (!D.isMinSignedValue()) {
7022       // If divisor INT_MIN, then we don't care about this lane in this fold,
7023       // we'll special-handle it.
7024       NeedToApplyOffset |= A != 0;
7025     }
7026 
7027     // Q = floor((2 * A) / (2^K))
7028     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7029 
7030     assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7031            "We are expecting that A is always less than all-ones for SVT");
7032     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7033            "We are expecting that K is always less than all-ones for ShSVT");
7034 
7035     // If D was a power of two, apply the alternate constant derivation.
7036     if (D0.isOne()) {
7037       // A = 2^(W-1)
7038       A = APInt::getSignedMinValue(W);
7039       // - Q = 2^(W-K) - 1
7040       Q = APInt::getAllOnes(W - K).zext(W);
7041     }
7042 
7043     // If the divisor is 1 the result can be constant-folded. Likewise, we
7044     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7045     if (D.isOne()) {
7046       // Set P, A and K to a bogus values so we can try to splat them.
7047       P = 0;
7048       A = -1;
7049       K = -1;
7050 
7051       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
7052       Q = -1;
7053     }
7054 
7055     PAmts.push_back(DAG.getConstant(P, DL, SVT));
7056     AAmts.push_back(DAG.getConstant(A, DL, SVT));
7057     KAmts.push_back(
7058         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7059                               /*implicitTrunc=*/true),
7060                         DL, ShSVT));
7061     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7062     return true;
7063   };
7064 
7065   SDValue N = REMNode.getOperand(0);
7066   SDValue D = REMNode.getOperand(1);
7067 
7068   // Collect the values from each element.
7069   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7070     return SDValue();
7071 
7072   // If this is a srem by a one, avoid the fold since it can be constant-folded.
7073   if (AllDivisorsAreOnes)
7074     return SDValue();
7075 
7076   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7077   // since it can be best implemented as a bit test.
7078   if (AllDivisorsArePowerOfTwo)
7079     return SDValue();
7080 
7081   SDValue PVal, AVal, KVal, QVal;
7082   if (D.getOpcode() == ISD::BUILD_VECTOR) {
7083     if (HadOneDivisor) {
7084       // Try to turn PAmts into a splat, since we don't care about the values
7085       // that are currently '0'. If we can't, just keep '0'`s.
7086       turnVectorIntoSplatVector(PAmts, isNullConstant);
7087       // Try to turn AAmts into a splat, since we don't care about the
7088       // values that are currently '-1'. If we can't, change them to '0'`s.
7089       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
7090                                 DAG.getConstant(0, DL, SVT));
7091       // Try to turn KAmts into a splat, since we don't care about the values
7092       // that are currently '-1'. If we can't, change them to '0'`s.
7093       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
7094                                 DAG.getConstant(0, DL, ShSVT));
7095     }
7096 
7097     PVal = DAG.getBuildVector(VT, DL, PAmts);
7098     AVal = DAG.getBuildVector(VT, DL, AAmts);
7099     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7100     QVal = DAG.getBuildVector(VT, DL, QAmts);
7101   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7102     assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7103            QAmts.size() == 1 &&
7104            "Expected matchUnaryPredicate to return one element for scalable "
7105            "vectors");
7106     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7107     AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7108     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7109     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7110   } else {
7111     assert(isa<ConstantSDNode>(D) && "Expected a constant");
7112     PVal = PAmts[0];
7113     AVal = AAmts[0];
7114     KVal = KAmts[0];
7115     QVal = QAmts[0];
7116   }
7117 
7118   // (mul N, P)
7119   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7120   Created.push_back(Op0.getNode());
7121 
7122   if (NeedToApplyOffset) {
7123     // We need ADD to do this.
7124     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7125       return SDValue();
7126 
7127     // (add (mul N, P), A)
7128     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7129     Created.push_back(Op0.getNode());
7130   }
7131 
7132   // Rotate right only if any divisor was even. We avoid rotates for all-odd
7133   // divisors as a performance improvement, since rotating by 0 is a no-op.
7134   if (HadEvenDivisor) {
7135     // We need ROTR to do this.
7136     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7137       return SDValue();
7138     // SREM: (rotr (add (mul N, P), A), K)
7139     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7140     Created.push_back(Op0.getNode());
7141   }
7142 
7143   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7144   SDValue Fold =
7145       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7146                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7147 
7148   // If we didn't have lanes with INT_MIN divisor, then we're done.
7149   if (!HadIntMinDivisor)
7150     return Fold;
7151 
7152   // That fold is only valid for positive divisors. Which effectively means,
7153   // it is invalid for INT_MIN divisors. So if we have such a lane,
7154   // we must fix-up results for said lanes.
7155   assert(VT.isVector() && "Can/should only get here for vectors.");
7156 
7157   // NOTE: we avoid letting illegal types through even if we're before legalize
7158   // ops – legalization has a hard time producing good code for the code that
7159   // follows.
7160   if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7161       !isOperationLegalOrCustom(ISD::AND, VT) ||
7162       !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
7163       !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
7164     return SDValue();
7165 
7166   Created.push_back(Fold.getNode());
7167 
7168   SDValue IntMin = DAG.getConstant(
7169       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
7170   SDValue IntMax = DAG.getConstant(
7171       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
7172   SDValue Zero =
7173       DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
7174 
7175   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7176   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7177   Created.push_back(DivisorIsIntMin.getNode());
7178 
7179   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
7180   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7181   Created.push_back(Masked.getNode());
7182   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7183   Created.push_back(MaskedIsZero.getNode());
7184 
7185   // To produce final result we need to blend 2 vectors: 'SetCC' and
7186   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7187   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7188   // constant-folded, select can get lowered to a shuffle with constant mask.
7189   SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7190                                 MaskedIsZero, Fold);
7191 
7192   return Blended;
7193 }
7194 
7195 bool TargetLowering::
7196 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7197   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7198     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7199                                 "be a constant integer");
7200     return true;
7201   }
7202 
7203   return false;
7204 }
7205 
7206 SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7207                                          const DenormalMode &Mode) const {
7208   SDLoc DL(Op);
7209   EVT VT = Op.getValueType();
7210   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7211   SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7212 
7213   // This is specifically a check for the handling of denormal inputs, not the
7214   // result.
7215   if (Mode.Input == DenormalMode::PreserveSign ||
7216       Mode.Input == DenormalMode::PositiveZero) {
7217     // Test = X == 0.0
7218     return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7219   }
7220 
7221   // Testing it with denormal inputs to avoid wrong estimate.
7222   //
7223   // Test = fabs(X) < SmallestNormal
7224   const fltSemantics &FltSem = VT.getFltSemantics();
7225   APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7226   SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7227   SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7228   return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7229 }
7230 
7231 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7232                                              bool LegalOps, bool OptForSize,
7233                                              NegatibleCost &Cost,
7234                                              unsigned Depth) const {
7235   // fneg is removable even if it has multiple uses.
7236   if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7237     Cost = NegatibleCost::Cheaper;
7238     return Op.getOperand(0);
7239   }
7240 
7241   // Don't recurse exponentially.
7242   if (Depth > SelectionDAG::MaxRecursionDepth)
7243     return SDValue();
7244 
7245   // Pre-increment recursion depth for use in recursive calls.
7246   ++Depth;
7247   const SDNodeFlags Flags = Op->getFlags();
7248   const TargetOptions &Options = DAG.getTarget().Options;
7249   EVT VT = Op.getValueType();
7250   unsigned Opcode = Op.getOpcode();
7251 
7252   // Don't allow anything with multiple uses unless we know it is free.
7253   if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7254     bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7255                         isFPExtFree(VT, Op.getOperand(0).getValueType());
7256     if (!IsFreeExtend)
7257       return SDValue();
7258   }
7259 
7260   auto RemoveDeadNode = [&](SDValue N) {
7261     if (N && N.getNode()->use_empty())
7262       DAG.RemoveDeadNode(N.getNode());
7263   };
7264 
7265   SDLoc DL(Op);
7266 
7267   // Because getNegatedExpression can delete nodes we need a handle to keep
7268   // temporary nodes alive in case the recursion manages to create an identical
7269   // node.
7270   std::list<HandleSDNode> Handles;
7271 
7272   switch (Opcode) {
7273   case ISD::ConstantFP: {
7274     // Don't invert constant FP values after legalization unless the target says
7275     // the negated constant is legal.
7276     bool IsOpLegal =
7277         isOperationLegal(ISD::ConstantFP, VT) ||
7278         isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7279                      OptForSize);
7280 
7281     if (LegalOps && !IsOpLegal)
7282       break;
7283 
7284     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7285     V.changeSign();
7286     SDValue CFP = DAG.getConstantFP(V, DL, VT);
7287 
7288     // If we already have the use of the negated floating constant, it is free
7289     // to negate it even it has multiple uses.
7290     if (!Op.hasOneUse() && CFP.use_empty())
7291       break;
7292     Cost = NegatibleCost::Neutral;
7293     return CFP;
7294   }
7295   case ISD::BUILD_VECTOR: {
7296     // Only permit BUILD_VECTOR of constants.
7297     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7298           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7299         }))
7300       break;
7301 
7302     bool IsOpLegal =
7303         (isOperationLegal(ISD::ConstantFP, VT) &&
7304          isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
7305         llvm::all_of(Op->op_values(), [&](SDValue N) {
7306           return N.isUndef() ||
7307                  isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7308                               OptForSize);
7309         });
7310 
7311     if (LegalOps && !IsOpLegal)
7312       break;
7313 
7314     SmallVector<SDValue, 4> Ops;
7315     for (SDValue C : Op->op_values()) {
7316       if (C.isUndef()) {
7317         Ops.push_back(C);
7318         continue;
7319       }
7320       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7321       V.changeSign();
7322       Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7323     }
7324     Cost = NegatibleCost::Neutral;
7325     return DAG.getBuildVector(VT, DL, Ops);
7326   }
7327   case ISD::FADD: {
7328     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7329       break;
7330 
7331     // After operation legalization, it might not be legal to create new FSUBs.
7332     if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7333       break;
7334     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7335 
7336     // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7337     NegatibleCost CostX = NegatibleCost::Expensive;
7338     SDValue NegX =
7339         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7340     // Prevent this node from being deleted by the next call.
7341     if (NegX)
7342       Handles.emplace_back(NegX);
7343 
7344     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7345     NegatibleCost CostY = NegatibleCost::Expensive;
7346     SDValue NegY =
7347         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7348 
7349     // We're done with the handles.
7350     Handles.clear();
7351 
7352     // Negate the X if its cost is less or equal than Y.
7353     if (NegX && (CostX <= CostY)) {
7354       Cost = CostX;
7355       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7356       if (NegY != N)
7357         RemoveDeadNode(NegY);
7358       return N;
7359     }
7360 
7361     // Negate the Y if it is not expensive.
7362     if (NegY) {
7363       Cost = CostY;
7364       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7365       if (NegX != N)
7366         RemoveDeadNode(NegX);
7367       return N;
7368     }
7369     break;
7370   }
7371   case ISD::FSUB: {
7372     // We can't turn -(A-B) into B-A when we honor signed zeros.
7373     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7374       break;
7375 
7376     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7377     // fold (fneg (fsub 0, Y)) -> Y
7378     if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7379       if (C->isZero()) {
7380         Cost = NegatibleCost::Cheaper;
7381         return Y;
7382       }
7383 
7384     // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7385     Cost = NegatibleCost::Neutral;
7386     return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7387   }
7388   case ISD::FMUL:
7389   case ISD::FDIV: {
7390     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7391 
7392     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7393     NegatibleCost CostX = NegatibleCost::Expensive;
7394     SDValue NegX =
7395         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7396     // Prevent this node from being deleted by the next call.
7397     if (NegX)
7398       Handles.emplace_back(NegX);
7399 
7400     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7401     NegatibleCost CostY = NegatibleCost::Expensive;
7402     SDValue NegY =
7403         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7404 
7405     // We're done with the handles.
7406     Handles.clear();
7407 
7408     // Negate the X if its cost is less or equal than Y.
7409     if (NegX && (CostX <= CostY)) {
7410       Cost = CostX;
7411       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7412       if (NegY != N)
7413         RemoveDeadNode(NegY);
7414       return N;
7415     }
7416 
7417     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7418     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7419       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7420         break;
7421 
7422     // Negate the Y if it is not expensive.
7423     if (NegY) {
7424       Cost = CostY;
7425       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7426       if (NegX != N)
7427         RemoveDeadNode(NegX);
7428       return N;
7429     }
7430     break;
7431   }
7432   case ISD::FMA:
7433   case ISD::FMAD: {
7434     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7435       break;
7436 
7437     SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7438     NegatibleCost CostZ = NegatibleCost::Expensive;
7439     SDValue NegZ =
7440         getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7441     // Give up if fail to negate the Z.
7442     if (!NegZ)
7443       break;
7444 
7445     // Prevent this node from being deleted by the next two calls.
7446     Handles.emplace_back(NegZ);
7447 
7448     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7449     NegatibleCost CostX = NegatibleCost::Expensive;
7450     SDValue NegX =
7451         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7452     // Prevent this node from being deleted by the next call.
7453     if (NegX)
7454       Handles.emplace_back(NegX);
7455 
7456     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7457     NegatibleCost CostY = NegatibleCost::Expensive;
7458     SDValue NegY =
7459         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7460 
7461     // We're done with the handles.
7462     Handles.clear();
7463 
7464     // Negate the X if its cost is less or equal than Y.
7465     if (NegX && (CostX <= CostY)) {
7466       Cost = std::min(CostX, CostZ);
7467       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7468       if (NegY != N)
7469         RemoveDeadNode(NegY);
7470       return N;
7471     }
7472 
7473     // Negate the Y if it is not expensive.
7474     if (NegY) {
7475       Cost = std::min(CostY, CostZ);
7476       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7477       if (NegX != N)
7478         RemoveDeadNode(NegX);
7479       return N;
7480     }
7481     break;
7482   }
7483 
7484   case ISD::FP_EXTEND:
7485   case ISD::FSIN:
7486     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7487                                             OptForSize, Cost, Depth))
7488       return DAG.getNode(Opcode, DL, VT, NegV);
7489     break;
7490   case ISD::FP_ROUND:
7491     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7492                                             OptForSize, Cost, Depth))
7493       return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7494     break;
7495   case ISD::SELECT:
7496   case ISD::VSELECT: {
7497     // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7498     // iff at least one cost is cheaper and the other is neutral/cheaper
7499     SDValue LHS = Op.getOperand(1);
7500     NegatibleCost CostLHS = NegatibleCost::Expensive;
7501     SDValue NegLHS =
7502         getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7503     if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7504       RemoveDeadNode(NegLHS);
7505       break;
7506     }
7507 
7508     // Prevent this node from being deleted by the next call.
7509     Handles.emplace_back(NegLHS);
7510 
7511     SDValue RHS = Op.getOperand(2);
7512     NegatibleCost CostRHS = NegatibleCost::Expensive;
7513     SDValue NegRHS =
7514         getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7515 
7516     // We're done with the handles.
7517     Handles.clear();
7518 
7519     if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7520         (CostLHS != NegatibleCost::Cheaper &&
7521          CostRHS != NegatibleCost::Cheaper)) {
7522       RemoveDeadNode(NegLHS);
7523       RemoveDeadNode(NegRHS);
7524       break;
7525     }
7526 
7527     Cost = std::min(CostLHS, CostRHS);
7528     return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7529   }
7530   }
7531 
7532   return SDValue();
7533 }
7534 
7535 //===----------------------------------------------------------------------===//
7536 // Legalization Utilities
7537 //===----------------------------------------------------------------------===//
7538 
7539 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7540                                     SDValue LHS, SDValue RHS,
7541                                     SmallVectorImpl<SDValue> &Result,
7542                                     EVT HiLoVT, SelectionDAG &DAG,
7543                                     MulExpansionKind Kind, SDValue LL,
7544                                     SDValue LH, SDValue RL, SDValue RH) const {
7545   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7546          Opcode == ISD::SMUL_LOHI);
7547 
7548   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7549                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
7550   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7551                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
7552   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7553                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
7554   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7555                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
7556 
7557   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7558     return false;
7559 
7560   unsigned OuterBitSize = VT.getScalarSizeInBits();
7561   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7562 
7563   // LL, LH, RL, and RH must be either all NULL or all set to a value.
7564   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7565          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7566 
7567   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7568   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7569                           bool Signed) -> bool {
7570     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7571       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7572       Hi = SDValue(Lo.getNode(), 1);
7573       return true;
7574     }
7575     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7576       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7577       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7578       return true;
7579     }
7580     return false;
7581   };
7582 
7583   SDValue Lo, Hi;
7584 
7585   if (!LL.getNode() && !RL.getNode() &&
7586       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7587     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7588     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7589   }
7590 
7591   if (!LL.getNode())
7592     return false;
7593 
7594   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7595   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7596       DAG.MaskedValueIsZero(RHS, HighMask)) {
7597     // The inputs are both zero-extended.
7598     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7599       Result.push_back(Lo);
7600       Result.push_back(Hi);
7601       if (Opcode != ISD::MUL) {
7602         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7603         Result.push_back(Zero);
7604         Result.push_back(Zero);
7605       }
7606       return true;
7607     }
7608   }
7609 
7610   if (!VT.isVector() && Opcode == ISD::MUL &&
7611       DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7612       DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7613     // The input values are both sign-extended.
7614     // TODO non-MUL case?
7615     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7616       Result.push_back(Lo);
7617       Result.push_back(Hi);
7618       return true;
7619     }
7620   }
7621 
7622   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7623   SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7624 
7625   if (!LH.getNode() && !RH.getNode() &&
7626       isOperationLegalOrCustom(ISD::SRL, VT) &&
7627       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7628     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7629     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7630     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7631     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7632   }
7633 
7634   if (!LH.getNode())
7635     return false;
7636 
7637   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7638     return false;
7639 
7640   Result.push_back(Lo);
7641 
7642   if (Opcode == ISD::MUL) {
7643     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7644     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7645     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7646     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7647     Result.push_back(Hi);
7648     return true;
7649   }
7650 
7651   // Compute the full width result.
7652   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7653     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7654     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7655     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7656     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7657   };
7658 
7659   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7660   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7661     return false;
7662 
7663   // This is effectively the add part of a multiply-add of half-sized operands,
7664   // so it cannot overflow.
7665   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7666 
7667   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7668     return false;
7669 
7670   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7671   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7672 
7673   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7674                   isOperationLegalOrCustom(ISD::ADDE, VT));
7675   if (UseGlue)
7676     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7677                        Merge(Lo, Hi));
7678   else
7679     Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7680                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7681 
7682   SDValue Carry = Next.getValue(1);
7683   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7684   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7685 
7686   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7687     return false;
7688 
7689   if (UseGlue)
7690     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7691                      Carry);
7692   else
7693     Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7694                      Zero, Carry);
7695 
7696   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7697 
7698   if (Opcode == ISD::SMUL_LOHI) {
7699     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7700                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7701     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7702 
7703     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7704                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7705     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7706   }
7707 
7708   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7709   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7710   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7711   return true;
7712 }
7713 
7714 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7715                                SelectionDAG &DAG, MulExpansionKind Kind,
7716                                SDValue LL, SDValue LH, SDValue RL,
7717                                SDValue RH) const {
7718   SmallVector<SDValue, 2> Result;
7719   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7720                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7721                            DAG, Kind, LL, LH, RL, RH);
7722   if (Ok) {
7723     assert(Result.size() == 2);
7724     Lo = Result[0];
7725     Hi = Result[1];
7726   }
7727   return Ok;
7728 }
7729 
7730 // Optimize unsigned division or remainder by constants for types twice as large
7731 // as a legal VT.
7732 //
7733 // If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7734 // can be computed
7735 // as:
7736 //   Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7737 //   Remainder = Sum % Constant
7738 // This is based on "Remainder by Summing Digits" from Hacker's Delight.
7739 //
7740 // For division, we can compute the remainder using the algorithm described
7741 // above, subtract it from the dividend to get an exact multiple of Constant.
7742 // Then multiply that exact multiply by the multiplicative inverse modulo
7743 // (1 << (BitWidth / 2)) to get the quotient.
7744 
7745 // If Constant is even, we can shift right the dividend and the divisor by the
7746 // number of trailing zeros in Constant before applying the remainder algorithm.
7747 // If we're after the quotient, we can subtract this value from the shifted
7748 // dividend and multiply by the multiplicative inverse of the shifted divisor.
7749 // If we want the remainder, we shift the value left by the number of trailing
7750 // zeros and add the bits that were shifted out of the dividend.
7751 bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7752                                             SmallVectorImpl<SDValue> &Result,
7753                                             EVT HiLoVT, SelectionDAG &DAG,
7754                                             SDValue LL, SDValue LH) const {
7755   unsigned Opcode = N->getOpcode();
7756   EVT VT = N->getValueType(0);
7757 
7758   // TODO: Support signed division/remainder.
7759   if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7760     return false;
7761   assert(
7762       (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7763       "Unexpected opcode");
7764 
7765   auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7766   if (!CN)
7767     return false;
7768 
7769   APInt Divisor = CN->getAPIntValue();
7770   unsigned BitWidth = Divisor.getBitWidth();
7771   unsigned HBitWidth = BitWidth / 2;
7772   assert(VT.getScalarSizeInBits() == BitWidth &&
7773          HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7774 
7775   // Divisor needs to less than (1 << HBitWidth).
7776   APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7777   if (Divisor.uge(HalfMaxPlus1))
7778     return false;
7779 
7780   // We depend on the UREM by constant optimization in DAGCombiner that requires
7781   // high multiply.
7782   if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7783       !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
7784     return false;
7785 
7786   // Don't expand if optimizing for size.
7787   if (DAG.shouldOptForSize())
7788     return false;
7789 
7790   // Early out for 0 or 1 divisors.
7791   if (Divisor.ule(1))
7792     return false;
7793 
7794   // If the divisor is even, shift it until it becomes odd.
7795   unsigned TrailingZeros = 0;
7796   if (!Divisor[0]) {
7797     TrailingZeros = Divisor.countr_zero();
7798     Divisor.lshrInPlace(TrailingZeros);
7799   }
7800 
7801   SDLoc dl(N);
7802   SDValue Sum;
7803   SDValue PartialRem;
7804 
7805   // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7806   // then add in the carry.
7807   // TODO: If we can't split it in half, we might be able to split into 3 or
7808   // more pieces using a smaller bit width.
7809   if (HalfMaxPlus1.urem(Divisor).isOne()) {
7810     assert(!LL == !LH && "Expected both input halves or no input halves!");
7811     if (!LL)
7812       std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7813 
7814     // Shift the input by the number of TrailingZeros in the divisor. The
7815     // shifted out bits will be added to the remainder later.
7816     if (TrailingZeros) {
7817       // Save the shifted off bits if we need the remainder.
7818       if (Opcode != ISD::UDIV) {
7819         APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7820         PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7821                                  DAG.getConstant(Mask, dl, HiLoVT));
7822       }
7823 
7824       LL = DAG.getNode(
7825           ISD::OR, dl, HiLoVT,
7826           DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7827                       DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7828           DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7829                       DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7830                                                  HiLoVT, dl)));
7831       LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7832                        DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7833     }
7834 
7835     // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7836     EVT SetCCType =
7837         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7838     if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
7839       SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7840       Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7841       Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7842                         DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7843     } else {
7844       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7845       SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7846       // If the boolean for the target is 0 or 1, we can add the setcc result
7847       // directly.
7848       if (getBooleanContents(HiLoVT) ==
7849           TargetLoweringBase::ZeroOrOneBooleanContent)
7850         Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7851       else
7852         Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7853                               DAG.getConstant(0, dl, HiLoVT));
7854       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7855     }
7856   }
7857 
7858   // If we didn't find a sum, we can't do the expansion.
7859   if (!Sum)
7860     return false;
7861 
7862   // Perform a HiLoVT urem on the Sum using truncated divisor.
7863   SDValue RemL =
7864       DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7865                   DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7866   SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7867 
7868   if (Opcode != ISD::UREM) {
7869     // Subtract the remainder from the shifted dividend.
7870     SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7871     SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7872 
7873     Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7874 
7875     // Multiply by the multiplicative inverse of the divisor modulo
7876     // (1 << BitWidth).
7877     APInt MulFactor = Divisor.multiplicativeInverse();
7878 
7879     SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7880                                    DAG.getConstant(MulFactor, dl, VT));
7881 
7882     // Split the quotient into low and high parts.
7883     SDValue QuotL, QuotH;
7884     std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7885     Result.push_back(QuotL);
7886     Result.push_back(QuotH);
7887   }
7888 
7889   if (Opcode != ISD::UDIV) {
7890     // If we shifted the input, shift the remainder left and add the bits we
7891     // shifted off the input.
7892     if (TrailingZeros) {
7893       APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7894       RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7895                          DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7896       RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7897     }
7898     Result.push_back(RemL);
7899     Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7900   }
7901 
7902   return true;
7903 }
7904 
7905 // Check that (every element of) Z is undef or not an exact multiple of BW.
7906 static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7907   return ISD::matchUnaryPredicate(
7908       Z,
7909       [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7910       true);
7911 }
7912 
7913 static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7914   EVT VT = Node->getValueType(0);
7915   SDValue ShX, ShY;
7916   SDValue ShAmt, InvShAmt;
7917   SDValue X = Node->getOperand(0);
7918   SDValue Y = Node->getOperand(1);
7919   SDValue Z = Node->getOperand(2);
7920   SDValue Mask = Node->getOperand(3);
7921   SDValue VL = Node->getOperand(4);
7922 
7923   unsigned BW = VT.getScalarSizeInBits();
7924   bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7925   SDLoc DL(SDValue(Node, 0));
7926 
7927   EVT ShVT = Z.getValueType();
7928   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7929     // fshl: X << C | Y >> (BW - C)
7930     // fshr: X << (BW - C) | Y >> C
7931     // where C = Z % BW is not zero
7932     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7933     ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7934     InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7935     ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7936                       VL);
7937     ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7938                       VL);
7939   } else {
7940     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7941     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7942     SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7943     if (isPowerOf2_32(BW)) {
7944       // Z % BW -> Z & (BW - 1)
7945       ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7946       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7947       SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7948                                  DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7949       InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7950     } else {
7951       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7952       ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7953       InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7954     }
7955 
7956     SDValue One = DAG.getConstant(1, DL, ShVT);
7957     if (IsFSHL) {
7958       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7959       SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
7960       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
7961     } else {
7962       SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7963       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7964       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
7965     }
7966   }
7967   return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7968 }
7969 
7970 SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7971                                           SelectionDAG &DAG) const {
7972   if (Node->isVPOpcode())
7973     return expandVPFunnelShift(Node, DAG);
7974 
7975   EVT VT = Node->getValueType(0);
7976 
7977   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7978                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
7979                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
7980                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
7981     return SDValue();
7982 
7983   SDValue X = Node->getOperand(0);
7984   SDValue Y = Node->getOperand(1);
7985   SDValue Z = Node->getOperand(2);
7986 
7987   unsigned BW = VT.getScalarSizeInBits();
7988   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7989   SDLoc DL(SDValue(Node, 0));
7990 
7991   EVT ShVT = Z.getValueType();
7992 
7993   // If a funnel shift in the other direction is more supported, use it.
7994   unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7995   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7996       isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
7997     if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7998       // fshl X, Y, Z -> fshr X, Y, -Z
7999       // fshr X, Y, Z -> fshl X, Y, -Z
8000       SDValue Zero = DAG.getConstant(0, DL, ShVT);
8001       Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8002     } else {
8003       // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8004       // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8005       SDValue One = DAG.getConstant(1, DL, ShVT);
8006       if (IsFSHL) {
8007         Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8008         X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8009       } else {
8010         X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8011         Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8012       }
8013       Z = DAG.getNOT(DL, Z, ShVT);
8014     }
8015     return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8016   }
8017 
8018   SDValue ShX, ShY;
8019   SDValue ShAmt, InvShAmt;
8020   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8021     // fshl: X << C | Y >> (BW - C)
8022     // fshr: X << (BW - C) | Y >> C
8023     // where C = Z % BW is not zero
8024     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8025     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8026     InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8027     ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8028     ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8029   } else {
8030     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8031     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8032     SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8033     if (isPowerOf2_32(BW)) {
8034       // Z % BW -> Z & (BW - 1)
8035       ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8036       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8037       InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8038     } else {
8039       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8040       ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8041       InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8042     }
8043 
8044     SDValue One = DAG.getConstant(1, DL, ShVT);
8045     if (IsFSHL) {
8046       ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8047       SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8048       ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8049     } else {
8050       SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8051       ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8052       ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8053     }
8054   }
8055   return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8056 }
8057 
8058 // TODO: Merge with expandFunnelShift.
8059 SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8060                                   SelectionDAG &DAG) const {
8061   EVT VT = Node->getValueType(0);
8062   unsigned EltSizeInBits = VT.getScalarSizeInBits();
8063   bool IsLeft = Node->getOpcode() == ISD::ROTL;
8064   SDValue Op0 = Node->getOperand(0);
8065   SDValue Op1 = Node->getOperand(1);
8066   SDLoc DL(SDValue(Node, 0));
8067 
8068   EVT ShVT = Op1.getValueType();
8069   SDValue Zero = DAG.getConstant(0, DL, ShVT);
8070 
8071   // If a rotate in the other direction is more supported, use it.
8072   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8073   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8074       isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8075     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8076     return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8077   }
8078 
8079   if (!AllowVectorOps && VT.isVector() &&
8080       (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8081        !isOperationLegalOrCustom(ISD::SRL, VT) ||
8082        !isOperationLegalOrCustom(ISD::SUB, VT) ||
8083        !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
8084        !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
8085     return SDValue();
8086 
8087   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8088   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8089   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8090   SDValue ShVal;
8091   SDValue HsVal;
8092   if (isPowerOf2_32(EltSizeInBits)) {
8093     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8094     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8095     SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8096     SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8097     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8098     SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8099     HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8100   } else {
8101     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8102     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8103     SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8104     SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8105     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8106     SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8107     SDValue One = DAG.getConstant(1, DL, ShVT);
8108     HsVal =
8109         DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8110   }
8111   return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8112 }
8113 
8114 void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8115                                       SelectionDAG &DAG) const {
8116   assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8117   EVT VT = Node->getValueType(0);
8118   unsigned VTBits = VT.getScalarSizeInBits();
8119   assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8120 
8121   bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8122   bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8123   SDValue ShOpLo = Node->getOperand(0);
8124   SDValue ShOpHi = Node->getOperand(1);
8125   SDValue ShAmt = Node->getOperand(2);
8126   EVT ShAmtVT = ShAmt.getValueType();
8127   EVT ShAmtCCVT =
8128       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8129   SDLoc dl(Node);
8130 
8131   // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8132   // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8133   // away during isel.
8134   SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8135                                   DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8136   SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8137                                      DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8138                        : DAG.getConstant(0, dl, VT);
8139 
8140   SDValue Tmp2, Tmp3;
8141   if (IsSHL) {
8142     Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8143     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8144   } else {
8145     Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8146     Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8147   }
8148 
8149   // If the shift amount is larger or equal than the width of a part we don't
8150   // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8151   // values for large shift amounts.
8152   SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8153                                 DAG.getConstant(VTBits, dl, ShAmtVT));
8154   SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8155                               DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8156 
8157   if (IsSHL) {
8158     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8159     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8160   } else {
8161     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8162     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8163   }
8164 }
8165 
8166 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8167                                       SelectionDAG &DAG) const {
8168   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8169   SDValue Src = Node->getOperand(OpNo);
8170   EVT SrcVT = Src.getValueType();
8171   EVT DstVT = Node->getValueType(0);
8172   SDLoc dl(SDValue(Node, 0));
8173 
8174   // FIXME: Only f32 to i64 conversions are supported.
8175   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8176     return false;
8177 
8178   if (Node->isStrictFPOpcode())
8179     // When a NaN is converted to an integer a trap is allowed. We can't
8180     // use this expansion here because it would eliminate that trap. Other
8181     // traps are also allowed and cannot be eliminated. See
8182     // IEEE 754-2008 sec 5.8.
8183     return false;
8184 
8185   // Expand f32 -> i64 conversion
8186   // This algorithm comes from compiler-rt's implementation of fixsfdi:
8187   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8188   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8189   EVT IntVT = SrcVT.changeTypeToInteger();
8190   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8191 
8192   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8193   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8194   SDValue Bias = DAG.getConstant(127, dl, IntVT);
8195   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8196   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8197   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8198 
8199   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8200 
8201   SDValue ExponentBits = DAG.getNode(
8202       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8203       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8204   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8205 
8206   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8207                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8208                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8209   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8210 
8211   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8212                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8213                           DAG.getConstant(0x00800000, dl, IntVT));
8214 
8215   R = DAG.getZExtOrTrunc(R, dl, DstVT);
8216 
8217   R = DAG.getSelectCC(
8218       dl, Exponent, ExponentLoBit,
8219       DAG.getNode(ISD::SHL, dl, DstVT, R,
8220                   DAG.getZExtOrTrunc(
8221                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8222                       dl, IntShVT)),
8223       DAG.getNode(ISD::SRL, dl, DstVT, R,
8224                   DAG.getZExtOrTrunc(
8225                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8226                       dl, IntShVT)),
8227       ISD::SETGT);
8228 
8229   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8230                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8231 
8232   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8233                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8234   return true;
8235 }
8236 
8237 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8238                                       SDValue &Chain,
8239                                       SelectionDAG &DAG) const {
8240   SDLoc dl(SDValue(Node, 0));
8241   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8242   SDValue Src = Node->getOperand(OpNo);
8243 
8244   EVT SrcVT = Src.getValueType();
8245   EVT DstVT = Node->getValueType(0);
8246   EVT SetCCVT =
8247       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8248   EVT DstSetCCVT =
8249       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8250 
8251   // Only expand vector types if we have the appropriate vector bit operations.
8252   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8253                                                    ISD::FP_TO_SINT;
8254   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8255                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
8256     return false;
8257 
8258   // If the maximum float value is smaller then the signed integer range,
8259   // the destination signmask can't be represented by the float, so we can
8260   // just use FP_TO_SINT directly.
8261   const fltSemantics &APFSem = SrcVT.getFltSemantics();
8262   APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8263   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8264   if (APFloat::opOverflow &
8265       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8266     if (Node->isStrictFPOpcode()) {
8267       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8268                            { Node->getOperand(0), Src });
8269       Chain = Result.getValue(1);
8270     } else
8271       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8272     return true;
8273   }
8274 
8275   // Don't expand it if there isn't cheap fsub instruction.
8276   if (!isOperationLegalOrCustom(
8277           Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8278     return false;
8279 
8280   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8281   SDValue Sel;
8282 
8283   if (Node->isStrictFPOpcode()) {
8284     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8285                        Node->getOperand(0), /*IsSignaling*/ true);
8286     Chain = Sel.getValue(1);
8287   } else {
8288     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8289   }
8290 
8291   bool Strict = Node->isStrictFPOpcode() ||
8292                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8293 
8294   if (Strict) {
8295     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8296     // signmask then offset (the result of which should be fully representable).
8297     // Sel = Src < 0x8000000000000000
8298     // FltOfs = select Sel, 0, 0x8000000000000000
8299     // IntOfs = select Sel, 0, 0x8000000000000000
8300     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8301 
8302     // TODO: Should any fast-math-flags be set for the FSUB?
8303     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8304                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8305     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8306     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8307                                    DAG.getConstant(0, dl, DstVT),
8308                                    DAG.getConstant(SignMask, dl, DstVT));
8309     SDValue SInt;
8310     if (Node->isStrictFPOpcode()) {
8311       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8312                                 { Chain, Src, FltOfs });
8313       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8314                          { Val.getValue(1), Val });
8315       Chain = SInt.getValue(1);
8316     } else {
8317       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8318       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8319     }
8320     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8321   } else {
8322     // Expand based on maximum range of FP_TO_SINT:
8323     // True = fp_to_sint(Src)
8324     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8325     // Result = select (Src < 0x8000000000000000), True, False
8326 
8327     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8328     // TODO: Should any fast-math-flags be set for the FSUB?
8329     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8330                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8331     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8332                         DAG.getConstant(SignMask, dl, DstVT));
8333     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8334     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8335   }
8336   return true;
8337 }
8338 
8339 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8340                                       SDValue &Chain, SelectionDAG &DAG) const {
8341   // This transform is not correct for converting 0 when rounding mode is set
8342   // to round toward negative infinity which will produce -0.0. So disable
8343   // under strictfp.
8344   if (Node->isStrictFPOpcode())
8345     return false;
8346 
8347   SDValue Src = Node->getOperand(0);
8348   EVT SrcVT = Src.getValueType();
8349   EVT DstVT = Node->getValueType(0);
8350 
8351   // If the input is known to be non-negative and SINT_TO_FP is legal then use
8352   // it.
8353   if (Node->getFlags().hasNonNeg() &&
8354       isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT)) {
8355     Result =
8356         DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8357     return true;
8358   }
8359 
8360   if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8361     return false;
8362 
8363   // Only expand vector types if we have the appropriate vector bit
8364   // operations.
8365   if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8366                            !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
8367                            !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
8368                            !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
8369                            !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
8370     return false;
8371 
8372   SDLoc dl(SDValue(Node, 0));
8373   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8374 
8375   // Implementation of unsigned i64 to f64 following the algorithm in
8376   // __floatundidf in compiler_rt.  This implementation performs rounding
8377   // correctly in all rounding modes with the exception of converting 0
8378   // when rounding toward negative infinity. In that case the fsub will
8379   // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8380   // incorrect.
8381   SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8382   SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8383       llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8384   SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8385   SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8386   SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8387 
8388   SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8389   SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8390   SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8391   SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8392   SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8393   SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8394   SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8395   Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8396   return true;
8397 }
8398 
8399 SDValue
8400 TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8401                                                SelectionDAG &DAG) const {
8402   unsigned Opcode = Node->getOpcode();
8403   assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8404           Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8405          "Wrong opcode");
8406 
8407   if (Node->getFlags().hasNoNaNs()) {
8408     ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8409     EVT VT = Node->getValueType(0);
8410     if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8411          !isOperationLegalOrCustom(ISD::VSELECT, VT)) &&
8412         VT.isVector())
8413       return SDValue();
8414     SDValue Op1 = Node->getOperand(0);
8415     SDValue Op2 = Node->getOperand(1);
8416     SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8417     // Copy FMF flags, but always set the no-signed-zeros flag
8418     // as this is implied by the FMINNUM/FMAXNUM semantics.
8419     SelCC->setFlags(Node->getFlags() | SDNodeFlags::NoSignedZeros);
8420     return SelCC;
8421   }
8422 
8423   return SDValue();
8424 }
8425 
8426 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8427                                               SelectionDAG &DAG) const {
8428   if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8429     return Expanded;
8430 
8431   EVT VT = Node->getValueType(0);
8432   if (VT.isScalableVector())
8433     report_fatal_error(
8434         "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8435 
8436   SDLoc dl(Node);
8437   unsigned NewOp =
8438       Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8439 
8440   if (isOperationLegalOrCustom(NewOp, VT)) {
8441     SDValue Quiet0 = Node->getOperand(0);
8442     SDValue Quiet1 = Node->getOperand(1);
8443 
8444     if (!Node->getFlags().hasNoNaNs()) {
8445       // Insert canonicalizes if it's possible we need to quiet to get correct
8446       // sNaN behavior.
8447       if (!DAG.isKnownNeverSNaN(Quiet0)) {
8448         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8449                              Node->getFlags());
8450       }
8451       if (!DAG.isKnownNeverSNaN(Quiet1)) {
8452         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8453                              Node->getFlags());
8454       }
8455     }
8456 
8457     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8458   }
8459 
8460   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8461   // instead if there are no NaNs and there can't be an incompatible zero
8462   // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8463   if ((Node->getFlags().hasNoNaNs() ||
8464        (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8465         DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8466       (Node->getFlags().hasNoSignedZeros() ||
8467        DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8468        DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8469     unsigned IEEE2018Op =
8470         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8471     if (isOperationLegalOrCustom(IEEE2018Op, VT))
8472       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8473                          Node->getOperand(1), Node->getFlags());
8474   }
8475 
8476   if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8477     return SelCC;
8478 
8479   return SDValue();
8480 }
8481 
8482 SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8483                                                 SelectionDAG &DAG) const {
8484   if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8485     return Expanded;
8486 
8487   SDLoc DL(N);
8488   SDValue LHS = N->getOperand(0);
8489   SDValue RHS = N->getOperand(1);
8490   unsigned Opc = N->getOpcode();
8491   EVT VT = N->getValueType(0);
8492   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8493   bool IsMax = Opc == ISD::FMAXIMUM;
8494   SDNodeFlags Flags = N->getFlags();
8495 
8496   // First, implement comparison not propagating NaN. If no native fmin or fmax
8497   // available, use plain select with setcc instead.
8498   SDValue MinMax;
8499   unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8500   unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8501 
8502   // FIXME: We should probably define fminnum/fmaxnum variants with correct
8503   // signed zero behavior.
8504   bool MinMaxMustRespectOrderedZero = false;
8505 
8506   if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8507     MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8508     MinMaxMustRespectOrderedZero = true;
8509   } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8510     MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8511   } else {
8512     if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8513       return DAG.UnrollVectorOp(N);
8514 
8515     // NaN (if exists) will be propagated later, so orderness doesn't matter.
8516     SDValue Compare =
8517         DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8518     MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8519   }
8520 
8521   // Propagate any NaN of both operands
8522   if (!N->getFlags().hasNoNaNs() &&
8523       (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8524     ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8525                                         APFloat::getNaN(VT.getFltSemantics()));
8526     MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8527                            DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8528   }
8529 
8530   // fminimum/fmaximum requires -0.0 less than +0.0
8531   if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8532       !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8533     SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8534                                   DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8535     SDValue TestZero =
8536         DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8537     SDValue LCmp = DAG.getSelect(
8538         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8539         MinMax, Flags);
8540     SDValue RCmp = DAG.getSelect(
8541         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8542         LCmp, Flags);
8543     MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8544   }
8545 
8546   return MinMax;
8547 }
8548 
8549 SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
8550                                                       SelectionDAG &DAG) const {
8551   SDLoc DL(Node);
8552   SDValue LHS = Node->getOperand(0);
8553   SDValue RHS = Node->getOperand(1);
8554   unsigned Opc = Node->getOpcode();
8555   EVT VT = Node->getValueType(0);
8556   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8557   bool IsMax = Opc == ISD::FMAXIMUMNUM;
8558   const TargetOptions &Options = DAG.getTarget().Options;
8559   SDNodeFlags Flags = Node->getFlags();
8560 
8561   unsigned NewOp =
8562       Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8563 
8564   if (isOperationLegalOrCustom(NewOp, VT)) {
8565     if (!Flags.hasNoNaNs()) {
8566       // Insert canonicalizes if it's possible we need to quiet to get correct
8567       // sNaN behavior.
8568       if (!DAG.isKnownNeverSNaN(LHS)) {
8569         LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8570       }
8571       if (!DAG.isKnownNeverSNaN(RHS)) {
8572         RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8573       }
8574     }
8575 
8576     return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8577   }
8578 
8579   // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8580   // same behaviors for all of other cases: +0.0 vs -0.0 included.
8581   if (Flags.hasNoNaNs() ||
8582       (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8583     unsigned IEEE2019Op =
8584         Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8585     if (isOperationLegalOrCustom(IEEE2019Op, VT))
8586       return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8587   }
8588 
8589   // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8590   // either one for +0.0 vs -0.0.
8591   if ((Flags.hasNoNaNs() ||
8592        (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8593       (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8594        DAG.isKnownNeverZeroFloat(RHS))) {
8595     unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8596     if (isOperationLegalOrCustom(IEEE2008Op, VT))
8597       return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8598   }
8599 
8600   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8601     return DAG.UnrollVectorOp(Node);
8602 
8603   // If only one operand is NaN, override it with another operand.
8604   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8605     LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8606   }
8607   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8608     RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8609   }
8610 
8611   SDValue MinMax =
8612       DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8613   // If MinMax is NaN, let's quiet it.
8614   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
8615       !DAG.isKnownNeverNaN(RHS)) {
8616     MinMax = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
8617   }
8618 
8619   // Fixup signed zero behavior.
8620   if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8621       DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
8622     return MinMax;
8623   }
8624   SDValue TestZero =
8625       DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8626   SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8627                                 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8628   SDValue LCmp = DAG.getSelect(
8629       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8630       MinMax, Flags);
8631   SDValue RCmp = DAG.getSelect(
8632       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8633       Flags);
8634   return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8635 }
8636 
8637 /// Returns a true value if if this FPClassTest can be performed with an ordered
8638 /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8639 /// std::nullopt if it cannot be performed as a compare with 0.
8640 static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8641                                            const fltSemantics &Semantics,
8642                                            const MachineFunction &MF) {
8643   FPClassTest OrderedMask = Test & ~fcNan;
8644   FPClassTest NanTest = Test & fcNan;
8645   bool IsOrdered = NanTest == fcNone;
8646   bool IsUnordered = NanTest == fcNan;
8647 
8648   // Skip cases that are testing for only a qnan or snan.
8649   if (!IsOrdered && !IsUnordered)
8650     return std::nullopt;
8651 
8652   if (OrderedMask == fcZero &&
8653       MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8654     return IsOrdered;
8655   if (OrderedMask == (fcZero | fcSubnormal) &&
8656       MF.getDenormalMode(Semantics).inputsAreZero())
8657     return IsOrdered;
8658   return std::nullopt;
8659 }
8660 
8661 SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8662                                          const FPClassTest OrigTestMask,
8663                                          SDNodeFlags Flags, const SDLoc &DL,
8664                                          SelectionDAG &DAG) const {
8665   EVT OperandVT = Op.getValueType();
8666   assert(OperandVT.isFloatingPoint());
8667   FPClassTest Test = OrigTestMask;
8668 
8669   // Degenerated cases.
8670   if (Test == fcNone)
8671     return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8672   if (Test == fcAllFlags)
8673     return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8674 
8675   // PPC double double is a pair of doubles, of which the higher part determines
8676   // the value class.
8677   if (OperandVT == MVT::ppcf128) {
8678     Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8679                      DAG.getConstant(1, DL, MVT::i32));
8680     OperandVT = MVT::f64;
8681   }
8682 
8683   // Floating-point type properties.
8684   EVT ScalarFloatVT = OperandVT.getScalarType();
8685   const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8686   const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8687   bool IsF80 = (ScalarFloatVT == MVT::f80);
8688 
8689   // Some checks can be implemented using float comparisons, if floating point
8690   // exceptions are ignored.
8691   if (Flags.hasNoFPExcept() &&
8692       isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8693     FPClassTest FPTestMask = Test;
8694     bool IsInvertedFP = false;
8695 
8696     if (FPClassTest InvertedFPCheck =
8697             invertFPClassTestIfSimpler(FPTestMask, true)) {
8698       FPTestMask = InvertedFPCheck;
8699       IsInvertedFP = true;
8700     }
8701 
8702     ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8703     ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8704 
8705     // See if we can fold an | fcNan into an unordered compare.
8706     FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8707 
8708     // Can't fold the ordered check if we're only testing for snan or qnan
8709     // individually.
8710     if ((FPTestMask & fcNan) != fcNan)
8711       OrderedFPTestMask = FPTestMask;
8712 
8713     const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8714 
8715     if (std::optional<bool> IsCmp0 =
8716             isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8717         IsCmp0 && (isCondCodeLegalOrCustom(
8718                       *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8719                       OperandVT.getScalarType().getSimpleVT()))) {
8720 
8721       // If denormals could be implicitly treated as 0, this is not equivalent
8722       // to a compare with 0 since it will also be true for denormals.
8723       return DAG.getSetCC(DL, ResultVT, Op,
8724                           DAG.getConstantFP(0.0, DL, OperandVT),
8725                           *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8726     }
8727 
8728     if (FPTestMask == fcNan &&
8729         isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
8730                                 OperandVT.getScalarType().getSimpleVT()))
8731       return DAG.getSetCC(DL, ResultVT, Op, Op,
8732                           IsInvertedFP ? ISD::SETO : ISD::SETUO);
8733 
8734     bool IsOrderedInf = FPTestMask == fcInf;
8735     if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8736         isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8737                                              : UnorderedCmpOpcode,
8738                                 OperandVT.getScalarType().getSimpleVT()) &&
8739         isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8740         (isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) ||
8741          (OperandVT.isVector() &&
8742           isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) {
8743       // isinf(x) --> fabs(x) == inf
8744       SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8745       SDValue Inf =
8746           DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8747       return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8748                           IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8749     }
8750 
8751     if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8752         isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8753                                           : UnorderedCmpOpcode,
8754                                 OperandVT.getSimpleVT())) {
8755       // isposinf(x) --> x == inf
8756       // isneginf(x) --> x == -inf
8757       // isposinf(x) || nan --> x u== inf
8758       // isneginf(x) || nan --> x u== -inf
8759 
8760       SDValue Inf = DAG.getConstantFP(
8761           APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
8762           OperandVT);
8763       return DAG.getSetCC(DL, ResultVT, Op, Inf,
8764                           IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8765     }
8766 
8767     if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8768       // TODO: Could handle ordered case, but it produces worse code for
8769       // x86. Maybe handle ordered if fabs is free?
8770 
8771       ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8772       ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8773 
8774       if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8775                                   OperandVT.getScalarType().getSimpleVT())) {
8776         // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8777 
8778         // TODO: Maybe only makes sense if fabs is free. Integer test of
8779         // exponent bits seems better for x86.
8780         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8781         SDValue SmallestNormal = DAG.getConstantFP(
8782             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8783         return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8784                             IsOrdered ? OrderedOp : UnorderedOp);
8785       }
8786     }
8787 
8788     if (FPTestMask == fcNormal) {
8789       // TODO: Handle unordered
8790       ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8791       ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
8792 
8793       if (isCondCodeLegalOrCustom(IsFiniteOp,
8794                                   OperandVT.getScalarType().getSimpleVT()) &&
8795           isCondCodeLegalOrCustom(IsNormalOp,
8796                                   OperandVT.getScalarType().getSimpleVT()) &&
8797           isFAbsFree(OperandVT)) {
8798         // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
8799         SDValue Inf =
8800             DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8801         SDValue SmallestNormal = DAG.getConstantFP(
8802             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8803 
8804         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8805         SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
8806         SDValue IsNormal =
8807             DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
8808         unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
8809         return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
8810       }
8811     }
8812   }
8813 
8814   // Some checks may be represented as inversion of simpler check, for example
8815   // "inf|normal|subnormal|zero" => !"nan".
8816   bool IsInverted = false;
8817 
8818   if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8819     Test = InvertedCheck;
8820     IsInverted = true;
8821   }
8822 
8823   // In the general case use integer operations.
8824   unsigned BitSize = OperandVT.getScalarSizeInBits();
8825   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8826   if (OperandVT.isVector())
8827     IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8828                              OperandVT.getVectorElementCount());
8829   SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8830 
8831   // Various masks.
8832   APInt SignBit = APInt::getSignMask(BitSize);
8833   APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
8834   APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8835   const unsigned ExplicitIntBitInF80 = 63;
8836   APInt ExpMask = Inf;
8837   if (IsF80)
8838     ExpMask.clearBit(ExplicitIntBitInF80);
8839   APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8840   APInt QNaNBitMask =
8841       APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8842   APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8843 
8844   SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8845   SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8846   SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8847   SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8848   SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8849   SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8850 
8851   SDValue Res;
8852   const auto appendResult = [&](SDValue PartialRes) {
8853     if (PartialRes) {
8854       if (Res)
8855         Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8856       else
8857         Res = PartialRes;
8858     }
8859   };
8860 
8861   SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8862   const auto getIntBitIsSet = [&]() -> SDValue {
8863     if (!IntBitIsSetV) {
8864       APInt IntBitMask(BitSize, 0);
8865       IntBitMask.setBit(ExplicitIntBitInF80);
8866       SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8867       SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8868       IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8869     }
8870     return IntBitIsSetV;
8871   };
8872 
8873   // Split the value into sign bit and absolute value.
8874   SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8875   SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8876                                DAG.getConstant(0, DL, IntVT), ISD::SETLT);
8877 
8878   // Tests that involve more than one class should be processed first.
8879   SDValue PartialRes;
8880 
8881   if (IsF80)
8882     ; // Detect finite numbers of f80 by checking individual classes because
8883       // they have different settings of the explicit integer bit.
8884   else if ((Test & fcFinite) == fcFinite) {
8885     // finite(V) ==> abs(V) < exp_mask
8886     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8887     Test &= ~fcFinite;
8888   } else if ((Test & fcFinite) == fcPosFinite) {
8889     // finite(V) && V > 0 ==> V < exp_mask
8890     PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8891     Test &= ~fcPosFinite;
8892   } else if ((Test & fcFinite) == fcNegFinite) {
8893     // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8894     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8895     PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8896     Test &= ~fcNegFinite;
8897   }
8898   appendResult(PartialRes);
8899 
8900   if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8901     // fcZero | fcSubnormal => test all exponent bits are 0
8902     // TODO: Handle sign bit specific cases
8903     if (PartialCheck == (fcZero | fcSubnormal)) {
8904       SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8905       SDValue ExpIsZero =
8906           DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8907       appendResult(ExpIsZero);
8908       Test &= ~PartialCheck & fcAllFlags;
8909     }
8910   }
8911 
8912   // Check for individual classes.
8913 
8914   if (unsigned PartialCheck = Test & fcZero) {
8915     if (PartialCheck == fcPosZero)
8916       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8917     else if (PartialCheck == fcZero)
8918       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8919     else // ISD::fcNegZero
8920       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8921     appendResult(PartialRes);
8922   }
8923 
8924   if (unsigned PartialCheck = Test & fcSubnormal) {
8925     // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8926     // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8927     SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8928     SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8929     SDValue VMinusOneV =
8930         DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8931     PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8932     if (PartialCheck == fcNegSubnormal)
8933       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8934     appendResult(PartialRes);
8935   }
8936 
8937   if (unsigned PartialCheck = Test & fcInf) {
8938     if (PartialCheck == fcPosInf)
8939       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8940     else if (PartialCheck == fcInf)
8941       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8942     else { // ISD::fcNegInf
8943       APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8944       SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8945       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8946     }
8947     appendResult(PartialRes);
8948   }
8949 
8950   if (unsigned PartialCheck = Test & fcNan) {
8951     APInt InfWithQnanBit = Inf | QNaNBitMask;
8952     SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8953     if (PartialCheck == fcNan) {
8954       // isnan(V) ==> abs(V) > int(inf)
8955       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8956       if (IsF80) {
8957         // Recognize unsupported values as NaNs for compatibility with glibc.
8958         // In them (exp(V)==0) == int_bit.
8959         SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8960         SDValue ExpIsZero =
8961             DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8962         SDValue IsPseudo =
8963             DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8964         PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8965       }
8966     } else if (PartialCheck == fcQNan) {
8967       // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8968       PartialRes =
8969           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8970     } else { // ISD::fcSNan
8971       // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8972       //                    abs(V) < (unsigned(Inf) | quiet_bit)
8973       SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8974       SDValue IsNotQnan =
8975           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8976       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8977     }
8978     appendResult(PartialRes);
8979   }
8980 
8981   if (unsigned PartialCheck = Test & fcNormal) {
8982     // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8983     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8984     SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8985     SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8986     APInt ExpLimit = ExpMask - ExpLSB;
8987     SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8988     PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8989     if (PartialCheck == fcNegNormal)
8990       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8991     else if (PartialCheck == fcPosNormal) {
8992       SDValue PosSignV =
8993           DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8994       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8995     }
8996     if (IsF80)
8997       PartialRes =
8998           DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8999     appendResult(PartialRes);
9000   }
9001 
9002   if (!Res)
9003     return DAG.getConstant(IsInverted, DL, ResultVT);
9004   if (IsInverted)
9005     Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
9006   return Res;
9007 }
9008 
9009 // Only expand vector types if we have the appropriate vector bit operations.
9010 static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9011   assert(VT.isVector() && "Expected vector type");
9012   unsigned Len = VT.getScalarSizeInBits();
9013   return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9014          TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
9015          TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
9016          (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9017          TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
9018 }
9019 
9020 SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9021   SDLoc dl(Node);
9022   EVT VT = Node->getValueType(0);
9023   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9024   SDValue Op = Node->getOperand(0);
9025   unsigned Len = VT.getScalarSizeInBits();
9026   assert(VT.isInteger() && "CTPOP not implemented for this type.");
9027 
9028   // TODO: Add support for irregular type lengths.
9029   if (!(Len <= 128 && Len % 8 == 0))
9030     return SDValue();
9031 
9032   // Only expand vector types if we have the appropriate vector bit operations.
9033   if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9034     return SDValue();
9035 
9036   // This is the "best" algorithm from
9037   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9038   SDValue Mask55 =
9039       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9040   SDValue Mask33 =
9041       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9042   SDValue Mask0F =
9043       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9044 
9045   // v = v - ((v >> 1) & 0x55555555...)
9046   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9047                    DAG.getNode(ISD::AND, dl, VT,
9048                                DAG.getNode(ISD::SRL, dl, VT, Op,
9049                                            DAG.getConstant(1, dl, ShVT)),
9050                                Mask55));
9051   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9052   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9053                    DAG.getNode(ISD::AND, dl, VT,
9054                                DAG.getNode(ISD::SRL, dl, VT, Op,
9055                                            DAG.getConstant(2, dl, ShVT)),
9056                                Mask33));
9057   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9058   Op = DAG.getNode(ISD::AND, dl, VT,
9059                    DAG.getNode(ISD::ADD, dl, VT, Op,
9060                                DAG.getNode(ISD::SRL, dl, VT, Op,
9061                                            DAG.getConstant(4, dl, ShVT))),
9062                    Mask0F);
9063 
9064   if (Len <= 8)
9065     return Op;
9066 
9067   // Avoid the multiply if we only have 2 bytes to add.
9068   // TODO: Only doing this for scalars because vectors weren't as obviously
9069   // improved.
9070   if (Len == 16 && !VT.isVector()) {
9071     // v = (v + (v >> 8)) & 0x00FF;
9072     return DAG.getNode(ISD::AND, dl, VT,
9073                      DAG.getNode(ISD::ADD, dl, VT, Op,
9074                                  DAG.getNode(ISD::SRL, dl, VT, Op,
9075                                              DAG.getConstant(8, dl, ShVT))),
9076                      DAG.getConstant(0xFF, dl, VT));
9077   }
9078 
9079   // v = (v * 0x01010101...) >> (Len - 8)
9080   SDValue V;
9081   if (isOperationLegalOrCustomOrPromote(
9082           ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9083     SDValue Mask01 =
9084         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9085     V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9086   } else {
9087     V = Op;
9088     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9089       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9090       V = DAG.getNode(ISD::ADD, dl, VT, V,
9091                       DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9092     }
9093   }
9094   return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9095 }
9096 
9097 SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9098   SDLoc dl(Node);
9099   EVT VT = Node->getValueType(0);
9100   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9101   SDValue Op = Node->getOperand(0);
9102   SDValue Mask = Node->getOperand(1);
9103   SDValue VL = Node->getOperand(2);
9104   unsigned Len = VT.getScalarSizeInBits();
9105   assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9106 
9107   // TODO: Add support for irregular type lengths.
9108   if (!(Len <= 128 && Len % 8 == 0))
9109     return SDValue();
9110 
9111   // This is same algorithm of expandCTPOP from
9112   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9113   SDValue Mask55 =
9114       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9115   SDValue Mask33 =
9116       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9117   SDValue Mask0F =
9118       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9119 
9120   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9121 
9122   // v = v - ((v >> 1) & 0x55555555...)
9123   Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9124                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9125                                  DAG.getConstant(1, dl, ShVT), Mask, VL),
9126                      Mask55, Mask, VL);
9127   Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9128 
9129   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9130   Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9131   Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9132                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9133                                  DAG.getConstant(2, dl, ShVT), Mask, VL),
9134                      Mask33, Mask, VL);
9135   Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9136 
9137   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9138   Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9139                      Mask, VL),
9140   Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9141   Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9142 
9143   if (Len <= 8)
9144     return Op;
9145 
9146   // v = (v * 0x01010101...) >> (Len - 8)
9147   SDValue V;
9148   if (isOperationLegalOrCustomOrPromote(
9149           ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9150     SDValue Mask01 =
9151         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9152     V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9153   } else {
9154     V = Op;
9155     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9156       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9157       V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9158                       DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9159                       Mask, VL);
9160     }
9161   }
9162   return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9163                      Mask, VL);
9164 }
9165 
9166 SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9167   SDLoc dl(Node);
9168   EVT VT = Node->getValueType(0);
9169   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9170   SDValue Op = Node->getOperand(0);
9171   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9172 
9173   // If the non-ZERO_UNDEF version is supported we can use that instead.
9174   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9175       isOperationLegalOrCustom(ISD::CTLZ, VT))
9176     return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9177 
9178   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9179   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
9180     EVT SetCCVT =
9181         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9182     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9183     SDValue Zero = DAG.getConstant(0, dl, VT);
9184     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9185     return DAG.getSelect(dl, VT, SrcIsZero,
9186                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9187   }
9188 
9189   // Only expand vector types if we have the appropriate vector bit operations.
9190   // This includes the operations needed to expand CTPOP if it isn't supported.
9191   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9192                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9193                          !canExpandVectorCTPOP(*this, VT)) ||
9194                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
9195                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
9196     return SDValue();
9197 
9198   // for now, we do this:
9199   // x = x | (x >> 1);
9200   // x = x | (x >> 2);
9201   // ...
9202   // x = x | (x >>16);
9203   // x = x | (x >>32); // for 64-bit input
9204   // return popcount(~x);
9205   //
9206   // Ref: "Hacker's Delight" by Henry Warren
9207   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9208     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9209     Op = DAG.getNode(ISD::OR, dl, VT, Op,
9210                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9211   }
9212   Op = DAG.getNOT(dl, Op, VT);
9213   return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9214 }
9215 
9216 SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9217   SDLoc dl(Node);
9218   EVT VT = Node->getValueType(0);
9219   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9220   SDValue Op = Node->getOperand(0);
9221   SDValue Mask = Node->getOperand(1);
9222   SDValue VL = Node->getOperand(2);
9223   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9224 
9225   // do this:
9226   // x = x | (x >> 1);
9227   // x = x | (x >> 2);
9228   // ...
9229   // x = x | (x >>16);
9230   // x = x | (x >>32); // for 64-bit input
9231   // return popcount(~x);
9232   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9233     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9234     Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9235                      DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9236                      VL);
9237   }
9238   Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9239                    Mask, VL);
9240   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9241 }
9242 
9243 SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9244                                         const SDLoc &DL, EVT VT, SDValue Op,
9245                                         unsigned BitWidth) const {
9246   if (BitWidth != 32 && BitWidth != 64)
9247     return SDValue();
9248   APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9249                                   : APInt(64, 0x0218A392CD3D5DBFULL);
9250   const DataLayout &TD = DAG.getDataLayout();
9251   MachinePointerInfo PtrInfo =
9252       MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
9253   unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9254   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9255   SDValue Lookup = DAG.getNode(
9256       ISD::SRL, DL, VT,
9257       DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9258                   DAG.getConstant(DeBruijn, DL, VT)),
9259       DAG.getConstant(ShiftAmt, DL, VT));
9260   Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
9261 
9262   SmallVector<uint8_t> Table(BitWidth, 0);
9263   for (unsigned i = 0; i < BitWidth; i++) {
9264     APInt Shl = DeBruijn.shl(i);
9265     APInt Lshr = Shl.lshr(ShiftAmt);
9266     Table[Lshr.getZExtValue()] = i;
9267   }
9268 
9269   // Create a ConstantArray in Constant Pool
9270   auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9271   SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9272                                       TD.getPrefTypeAlign(CA->getType()));
9273   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9274                                    DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9275                                    PtrInfo, MVT::i8);
9276   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9277     return ExtLoad;
9278 
9279   EVT SetCCVT =
9280       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9281   SDValue Zero = DAG.getConstant(0, DL, VT);
9282   SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9283   return DAG.getSelect(DL, VT, SrcIsZero,
9284                        DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9285 }
9286 
9287 SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9288   SDLoc dl(Node);
9289   EVT VT = Node->getValueType(0);
9290   SDValue Op = Node->getOperand(0);
9291   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9292 
9293   // If the non-ZERO_UNDEF version is supported we can use that instead.
9294   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9295       isOperationLegalOrCustom(ISD::CTTZ, VT))
9296     return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9297 
9298   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9299   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
9300     EVT SetCCVT =
9301         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9302     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9303     SDValue Zero = DAG.getConstant(0, dl, VT);
9304     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9305     return DAG.getSelect(dl, VT, SrcIsZero,
9306                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9307   }
9308 
9309   // Only expand vector types if we have the appropriate vector bit operations.
9310   // This includes the operations needed to expand CTPOP if it isn't supported.
9311   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9312                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9313                          !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
9314                          !canExpandVectorCTPOP(*this, VT)) ||
9315                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
9316                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
9317                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9318     return SDValue();
9319 
9320   // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9321   if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9322       !isOperationLegal(ISD::CTLZ, VT))
9323     if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9324       return V;
9325 
9326   // for now, we use: { return popcount(~x & (x - 1)); }
9327   // unless the target has ctlz but not ctpop, in which case we use:
9328   // { return 32 - nlz(~x & (x-1)); }
9329   // Ref: "Hacker's Delight" by Henry Warren
9330   SDValue Tmp = DAG.getNode(
9331       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9332       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9333 
9334   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9335   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
9336     return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9337                        DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9338   }
9339 
9340   return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9341 }
9342 
9343 SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9344   SDValue Op = Node->getOperand(0);
9345   SDValue Mask = Node->getOperand(1);
9346   SDValue VL = Node->getOperand(2);
9347   SDLoc dl(Node);
9348   EVT VT = Node->getValueType(0);
9349 
9350   // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9351   SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9352                             DAG.getAllOnesConstant(dl, VT), Mask, VL);
9353   SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9354                                  DAG.getConstant(1, dl, VT), Mask, VL);
9355   SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9356   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9357 }
9358 
9359 SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9360                                              SelectionDAG &DAG) const {
9361   // %cond = to_bool_vec %source
9362   // %splat = splat /*val=*/VL
9363   // %tz = step_vector
9364   // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9365   // %r = vp.reduce.umin %v
9366   SDLoc DL(N);
9367   SDValue Source = N->getOperand(0);
9368   SDValue Mask = N->getOperand(1);
9369   SDValue EVL = N->getOperand(2);
9370   EVT SrcVT = Source.getValueType();
9371   EVT ResVT = N->getValueType(0);
9372   EVT ResVecVT =
9373       EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9374 
9375   // Convert to boolean vector.
9376   if (SrcVT.getScalarType() != MVT::i1) {
9377     SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9378     SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9379                              SrcVT.getVectorElementCount());
9380     Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9381                          DAG.getCondCode(ISD::SETNE), Mask, EVL);
9382   }
9383 
9384   SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9385   SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9386   SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9387   SDValue Select =
9388       DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9389   return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9390 }
9391 
9392 SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9393                                   bool IsNegative) const {
9394   SDLoc dl(N);
9395   EVT VT = N->getValueType(0);
9396   SDValue Op = N->getOperand(0);
9397 
9398   // abs(x) -> smax(x,sub(0,x))
9399   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9400       isOperationLegal(ISD::SMAX, VT)) {
9401     SDValue Zero = DAG.getConstant(0, dl, VT);
9402     Op = DAG.getFreeze(Op);
9403     return DAG.getNode(ISD::SMAX, dl, VT, Op,
9404                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9405   }
9406 
9407   // abs(x) -> umin(x,sub(0,x))
9408   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9409       isOperationLegal(ISD::UMIN, VT)) {
9410     SDValue Zero = DAG.getConstant(0, dl, VT);
9411     Op = DAG.getFreeze(Op);
9412     return DAG.getNode(ISD::UMIN, dl, VT, Op,
9413                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9414   }
9415 
9416   // 0 - abs(x) -> smin(x, sub(0,x))
9417   if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9418       isOperationLegal(ISD::SMIN, VT)) {
9419     SDValue Zero = DAG.getConstant(0, dl, VT);
9420     Op = DAG.getFreeze(Op);
9421     return DAG.getNode(ISD::SMIN, dl, VT, Op,
9422                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9423   }
9424 
9425   // Only expand vector types if we have the appropriate vector operations.
9426   if (VT.isVector() &&
9427       (!isOperationLegalOrCustom(ISD::SRA, VT) ||
9428        (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9429        (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9430        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9431     return SDValue();
9432 
9433   Op = DAG.getFreeze(Op);
9434   SDValue Shift = DAG.getNode(
9435       ISD::SRA, dl, VT, Op,
9436       DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9437   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9438 
9439   // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9440   if (!IsNegative)
9441     return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9442 
9443   // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9444   return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9445 }
9446 
9447 SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9448   SDLoc dl(N);
9449   EVT VT = N->getValueType(0);
9450   SDValue LHS = DAG.getFreeze(N->getOperand(0));
9451   SDValue RHS = DAG.getFreeze(N->getOperand(1));
9452   bool IsSigned = N->getOpcode() == ISD::ABDS;
9453 
9454   // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9455   // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9456   unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9457   unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9458   if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9459     SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9460     SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9461     return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9462   }
9463 
9464   // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9465   if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9466     return DAG.getNode(ISD::OR, dl, VT,
9467                        DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9468                        DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9469 
9470   // If the subtract doesn't overflow then just use abs(sub())
9471   // NOTE: don't use frozen operands for value tracking.
9472   bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9473                        DAG.SignBitIsZero(N->getOperand(0));
9474 
9475   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
9476                              N->getOperand(1)))
9477     return DAG.getNode(ISD::ABS, dl, VT,
9478                        DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9479 
9480   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
9481                              N->getOperand(0)))
9482     return DAG.getNode(ISD::ABS, dl, VT,
9483                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9484 
9485   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9486   ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9487   SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9488 
9489   // Branchless expansion iff cmp result is allbits:
9490   // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9491   // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9492   if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9493     SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9494     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9495     return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9496   }
9497 
9498   // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9499   // flag if the (scalar) type is illegal as this is more likely to legalize
9500   // cleanly:
9501   // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9502   if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9503     SDValue USubO =
9504         DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9505     SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9506     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9507     return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9508   }
9509 
9510   // FIXME: Should really try to split the vector in case it's legal on a
9511   // subvector.
9512   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9513     return DAG.UnrollVectorOp(N);
9514 
9515   // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9516   // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9517   return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9518                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9519 }
9520 
9521 SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
9522   SDLoc dl(N);
9523   EVT VT = N->getValueType(0);
9524   SDValue LHS = N->getOperand(0);
9525   SDValue RHS = N->getOperand(1);
9526 
9527   unsigned Opc = N->getOpcode();
9528   bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9529   bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9530   unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9531   unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9532   unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9533   unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9534   assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9535           Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9536          "Unknown AVG node");
9537 
9538   // If the operands are already extended, we can add+shift.
9539   bool IsExt =
9540       (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9541        DAG.ComputeNumSignBits(RHS) >= 2) ||
9542       (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9543        DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9544   if (IsExt) {
9545     SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9546     if (!IsFloor)
9547       Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9548     return DAG.getNode(ShiftOpc, dl, VT, Sum,
9549                        DAG.getShiftAmountConstant(1, VT, dl));
9550   }
9551 
9552   // For scalars, see if we can efficiently extend/truncate to use add+shift.
9553   if (VT.isScalarInteger()) {
9554     unsigned BW = VT.getScalarSizeInBits();
9555     EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9556     if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9557       LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9558       RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9559       SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9560       if (!IsFloor)
9561         Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9562                           DAG.getConstant(1, dl, ExtVT));
9563       // Just use SRL as we will be truncating away the extended sign bits.
9564       Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9565                         DAG.getShiftAmountConstant(1, ExtVT, dl));
9566       return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9567     }
9568   }
9569 
9570   // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9571   if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9572     SDValue UAddWithOverflow =
9573         DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9574 
9575     SDValue Sum = UAddWithOverflow.getValue(0);
9576     SDValue Overflow = UAddWithOverflow.getValue(1);
9577 
9578     // Right shift the sum by 1
9579     SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9580                                   DAG.getShiftAmountConstant(1, VT, dl));
9581 
9582     SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9583     SDValue OverflowShl = DAG.getNode(
9584         ISD::SHL, dl, VT, ZeroExtOverflow,
9585         DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9586 
9587     return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9588   }
9589 
9590   // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9591   // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9592   // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9593   // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9594   LHS = DAG.getFreeze(LHS);
9595   RHS = DAG.getFreeze(RHS);
9596   SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9597   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9598   SDValue Shift =
9599       DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9600   return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9601 }
9602 
9603 SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9604   SDLoc dl(N);
9605   EVT VT = N->getValueType(0);
9606   SDValue Op = N->getOperand(0);
9607 
9608   if (!VT.isSimple())
9609     return SDValue();
9610 
9611   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9612   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9613   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9614   default:
9615     return SDValue();
9616   case MVT::i16:
9617     // Use a rotate by 8. This can be further expanded if necessary.
9618     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9619   case MVT::i32:
9620     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9621     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9622                        DAG.getConstant(0xFF00, dl, VT));
9623     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9624     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9625     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9626     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9627     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9628     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9629     return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9630   case MVT::i64:
9631     Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9632     Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9633                        DAG.getConstant(255ULL<<8, dl, VT));
9634     Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9635     Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9636                        DAG.getConstant(255ULL<<16, dl, VT));
9637     Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9638     Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9639                        DAG.getConstant(255ULL<<24, dl, VT));
9640     Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9641     Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9642     Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9643                        DAG.getConstant(255ULL<<24, dl, VT));
9644     Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9645     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9646                        DAG.getConstant(255ULL<<16, dl, VT));
9647     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9648     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9649                        DAG.getConstant(255ULL<<8, dl, VT));
9650     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9651     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9652     Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9653     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9654     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9655     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9656     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9657     return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9658   }
9659 }
9660 
9661 SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9662   SDLoc dl(N);
9663   EVT VT = N->getValueType(0);
9664   SDValue Op = N->getOperand(0);
9665   SDValue Mask = N->getOperand(1);
9666   SDValue EVL = N->getOperand(2);
9667 
9668   if (!VT.isSimple())
9669     return SDValue();
9670 
9671   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9672   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9673   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9674   default:
9675     return SDValue();
9676   case MVT::i16:
9677     Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9678                        Mask, EVL);
9679     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9680                        Mask, EVL);
9681     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9682   case MVT::i32:
9683     Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9684                        Mask, EVL);
9685     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9686                        Mask, EVL);
9687     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9688                        Mask, EVL);
9689     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9690                        Mask, EVL);
9691     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9692                        DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9693     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9694                        Mask, EVL);
9695     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9696     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9697     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9698   case MVT::i64:
9699     Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9700                        Mask, EVL);
9701     Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9702                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9703     Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9704                        Mask, EVL);
9705     Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9706                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9707     Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9708                        Mask, EVL);
9709     Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9710                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9711     Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9712                        Mask, EVL);
9713     Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9714                        Mask, EVL);
9715     Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9716                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9717     Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9718                        Mask, EVL);
9719     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9720                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9721     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9722                        Mask, EVL);
9723     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9724                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9725     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9726                        Mask, EVL);
9727     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9728     Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9729     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9730     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9731     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9732     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9733     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9734   }
9735 }
9736 
9737 SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9738   SDLoc dl(N);
9739   EVT VT = N->getValueType(0);
9740   SDValue Op = N->getOperand(0);
9741   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9742   unsigned Sz = VT.getScalarSizeInBits();
9743 
9744   SDValue Tmp, Tmp2, Tmp3;
9745 
9746   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9747   // and finally the i1 pairs.
9748   // TODO: We can easily support i4/i2 legal types if any target ever does.
9749   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9750     // Create the masks - repeating the pattern every byte.
9751     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9752     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9753     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9754 
9755     // BSWAP if the type is wider than a single byte.
9756     Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9757 
9758     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9759     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9760     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9761     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9762     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9763     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9764 
9765     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9766     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9767     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9768     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9769     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9770     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9771 
9772     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9773     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9774     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9775     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9776     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9777     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9778     return Tmp;
9779   }
9780 
9781   Tmp = DAG.getConstant(0, dl, VT);
9782   for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9783     if (I < J)
9784       Tmp2 =
9785           DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9786     else
9787       Tmp2 =
9788           DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9789 
9790     APInt Shift = APInt::getOneBitSet(Sz, J);
9791     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9792     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9793   }
9794 
9795   return Tmp;
9796 }
9797 
9798 SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9799   assert(N->getOpcode() == ISD::VP_BITREVERSE);
9800 
9801   SDLoc dl(N);
9802   EVT VT = N->getValueType(0);
9803   SDValue Op = N->getOperand(0);
9804   SDValue Mask = N->getOperand(1);
9805   SDValue EVL = N->getOperand(2);
9806   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9807   unsigned Sz = VT.getScalarSizeInBits();
9808 
9809   SDValue Tmp, Tmp2, Tmp3;
9810 
9811   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9812   // and finally the i1 pairs.
9813   // TODO: We can easily support i4/i2 legal types if any target ever does.
9814   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9815     // Create the masks - repeating the pattern every byte.
9816     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9817     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9818     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9819 
9820     // BSWAP if the type is wider than a single byte.
9821     Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9822 
9823     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9824     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9825                        Mask, EVL);
9826     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9827                        DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9828     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9829                        Mask, EVL);
9830     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9831                        Mask, EVL);
9832     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9833 
9834     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9835     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9836                        Mask, EVL);
9837     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9838                        DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9839     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9840                        Mask, EVL);
9841     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9842                        Mask, EVL);
9843     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9844 
9845     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9846     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9847                        Mask, EVL);
9848     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9849                        DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9850     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9851                        Mask, EVL);
9852     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9853                        Mask, EVL);
9854     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9855     return Tmp;
9856   }
9857   return SDValue();
9858 }
9859 
9860 std::pair<SDValue, SDValue>
9861 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9862                                     SelectionDAG &DAG) const {
9863   SDLoc SL(LD);
9864   SDValue Chain = LD->getChain();
9865   SDValue BasePTR = LD->getBasePtr();
9866   EVT SrcVT = LD->getMemoryVT();
9867   EVT DstVT = LD->getValueType(0);
9868   ISD::LoadExtType ExtType = LD->getExtensionType();
9869 
9870   if (SrcVT.isScalableVector())
9871     report_fatal_error("Cannot scalarize scalable vector loads");
9872 
9873   unsigned NumElem = SrcVT.getVectorNumElements();
9874 
9875   EVT SrcEltVT = SrcVT.getScalarType();
9876   EVT DstEltVT = DstVT.getScalarType();
9877 
9878   // A vector must always be stored in memory as-is, i.e. without any padding
9879   // between the elements, since various code depend on it, e.g. in the
9880   // handling of a bitcast of a vector type to int, which may be done with a
9881   // vector store followed by an integer load. A vector that does not have
9882   // elements that are byte-sized must therefore be stored as an integer
9883   // built out of the extracted vector elements.
9884   if (!SrcEltVT.isByteSized()) {
9885     unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9886     EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9887 
9888     unsigned NumSrcBits = SrcVT.getSizeInBits();
9889     EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9890 
9891     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9892     SDValue SrcEltBitMask = DAG.getConstant(
9893         APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9894 
9895     // Load the whole vector and avoid masking off the top bits as it makes
9896     // the codegen worse.
9897     SDValue Load =
9898         DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9899                        LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9900                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
9901 
9902     SmallVector<SDValue, 8> Vals;
9903     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9904       unsigned ShiftIntoIdx =
9905           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9906       SDValue ShiftAmount = DAG.getShiftAmountConstant(
9907           ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
9908       SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9909       SDValue Elt =
9910           DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9911       SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9912 
9913       if (ExtType != ISD::NON_EXTLOAD) {
9914         unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9915         Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9916       }
9917 
9918       Vals.push_back(Scalar);
9919     }
9920 
9921     SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9922     return std::make_pair(Value, Load.getValue(1));
9923   }
9924 
9925   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9926   assert(SrcEltVT.isByteSized());
9927 
9928   SmallVector<SDValue, 8> Vals;
9929   SmallVector<SDValue, 8> LoadChains;
9930 
9931   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9932     SDValue ScalarLoad =
9933         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9934                        LD->getPointerInfo().getWithOffset(Idx * Stride),
9935                        SrcEltVT, LD->getOriginalAlign(),
9936                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
9937 
9938     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9939 
9940     Vals.push_back(ScalarLoad.getValue(0));
9941     LoadChains.push_back(ScalarLoad.getValue(1));
9942   }
9943 
9944   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9945   SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9946 
9947   return std::make_pair(Value, NewChain);
9948 }
9949 
9950 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9951                                              SelectionDAG &DAG) const {
9952   SDLoc SL(ST);
9953 
9954   SDValue Chain = ST->getChain();
9955   SDValue BasePtr = ST->getBasePtr();
9956   SDValue Value = ST->getValue();
9957   EVT StVT = ST->getMemoryVT();
9958 
9959   if (StVT.isScalableVector())
9960     report_fatal_error("Cannot scalarize scalable vector stores");
9961 
9962   // The type of the data we want to save
9963   EVT RegVT = Value.getValueType();
9964   EVT RegSclVT = RegVT.getScalarType();
9965 
9966   // The type of data as saved in memory.
9967   EVT MemSclVT = StVT.getScalarType();
9968 
9969   unsigned NumElem = StVT.getVectorNumElements();
9970 
9971   // A vector must always be stored in memory as-is, i.e. without any padding
9972   // between the elements, since various code depend on it, e.g. in the
9973   // handling of a bitcast of a vector type to int, which may be done with a
9974   // vector store followed by an integer load. A vector that does not have
9975   // elements that are byte-sized must therefore be stored as an integer
9976   // built out of the extracted vector elements.
9977   if (!MemSclVT.isByteSized()) {
9978     unsigned NumBits = StVT.getSizeInBits();
9979     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9980 
9981     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9982 
9983     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9984       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9985                                 DAG.getVectorIdxConstant(Idx, SL));
9986       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
9987       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
9988       unsigned ShiftIntoIdx =
9989           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9990       SDValue ShiftAmount =
9991           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9992       SDValue ShiftedElt =
9993           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9994       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9995     }
9996 
9997     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9998                         ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9999                         ST->getAAInfo());
10000   }
10001 
10002   // Store Stride in bytes
10003   unsigned Stride = MemSclVT.getSizeInBits() / 8;
10004   assert(Stride && "Zero stride!");
10005   // Extract each of the elements from the original vector and save them into
10006   // memory individually.
10007   SmallVector<SDValue, 8> Stores;
10008   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10009     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10010                               DAG.getVectorIdxConstant(Idx, SL));
10011 
10012     SDValue Ptr =
10013         DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10014 
10015     // This scalar TruncStore may be illegal, but we legalize it later.
10016     SDValue Store = DAG.getTruncStore(
10017         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10018         MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10019         ST->getAAInfo());
10020 
10021     Stores.push_back(Store);
10022   }
10023 
10024   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10025 }
10026 
10027 std::pair<SDValue, SDValue>
10028 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
10029   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10030          "unaligned indexed loads not implemented!");
10031   SDValue Chain = LD->getChain();
10032   SDValue Ptr = LD->getBasePtr();
10033   EVT VT = LD->getValueType(0);
10034   EVT LoadedVT = LD->getMemoryVT();
10035   SDLoc dl(LD);
10036   auto &MF = DAG.getMachineFunction();
10037 
10038   if (VT.isFloatingPoint() || VT.isVector()) {
10039     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10040     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10041       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10042           LoadedVT.isVector()) {
10043         // Scalarize the load and let the individual components be handled.
10044         return scalarizeVectorLoad(LD, DAG);
10045       }
10046 
10047       // Expand to a (misaligned) integer load of the same size,
10048       // then bitconvert to floating point or vector.
10049       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10050                                     LD->getMemOperand());
10051       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10052       if (LoadedVT != VT)
10053         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10054                              ISD::ANY_EXTEND, dl, VT, Result);
10055 
10056       return std::make_pair(Result, newLoad.getValue(1));
10057     }
10058 
10059     // Copy the value to a (aligned) stack slot using (unaligned) integer
10060     // loads and stores, then do a (aligned) load from the stack slot.
10061     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10062     unsigned LoadedBytes = LoadedVT.getStoreSize();
10063     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10064     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10065 
10066     // Make sure the stack slot is also aligned for the register type.
10067     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10068     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10069     SmallVector<SDValue, 8> Stores;
10070     SDValue StackPtr = StackBase;
10071     unsigned Offset = 0;
10072 
10073     EVT PtrVT = Ptr.getValueType();
10074     EVT StackPtrVT = StackPtr.getValueType();
10075 
10076     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10077     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10078 
10079     // Do all but one copies using the full register width.
10080     for (unsigned i = 1; i < NumRegs; i++) {
10081       // Load one integer register's worth from the original location.
10082       SDValue Load = DAG.getLoad(
10083           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10084           LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10085           LD->getAAInfo());
10086       // Follow the load with a store to the stack slot.  Remember the store.
10087       Stores.push_back(DAG.getStore(
10088           Load.getValue(1), dl, Load, StackPtr,
10089           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10090       // Increment the pointers.
10091       Offset += RegBytes;
10092 
10093       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10094       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10095     }
10096 
10097     // The last copy may be partial.  Do an extending load.
10098     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10099                                   8 * (LoadedBytes - Offset));
10100     SDValue Load =
10101         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10102                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
10103                        LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10104                        LD->getAAInfo());
10105     // Follow the load with a store to the stack slot.  Remember the store.
10106     // On big-endian machines this requires a truncating store to ensure
10107     // that the bits end up in the right place.
10108     Stores.push_back(DAG.getTruncStore(
10109         Load.getValue(1), dl, Load, StackPtr,
10110         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10111 
10112     // The order of the stores doesn't matter - say it with a TokenFactor.
10113     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10114 
10115     // Finally, perform the original load only redirected to the stack slot.
10116     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10117                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10118                           LoadedVT);
10119 
10120     // Callers expect a MERGE_VALUES node.
10121     return std::make_pair(Load, TF);
10122   }
10123 
10124   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10125          "Unaligned load of unsupported type.");
10126 
10127   // Compute the new VT that is half the size of the old one.  This is an
10128   // integer MVT.
10129   unsigned NumBits = LoadedVT.getSizeInBits();
10130   EVT NewLoadedVT;
10131   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10132   NumBits >>= 1;
10133 
10134   Align Alignment = LD->getOriginalAlign();
10135   unsigned IncrementSize = NumBits / 8;
10136   ISD::LoadExtType HiExtType = LD->getExtensionType();
10137 
10138   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10139   if (HiExtType == ISD::NON_EXTLOAD)
10140     HiExtType = ISD::ZEXTLOAD;
10141 
10142   // Load the value in two parts
10143   SDValue Lo, Hi;
10144   if (DAG.getDataLayout().isLittleEndian()) {
10145     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10146                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10147                         LD->getAAInfo());
10148 
10149     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10150     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10151                         LD->getPointerInfo().getWithOffset(IncrementSize),
10152                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10153                         LD->getAAInfo());
10154   } else {
10155     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10156                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10157                         LD->getAAInfo());
10158 
10159     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10160     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10161                         LD->getPointerInfo().getWithOffset(IncrementSize),
10162                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10163                         LD->getAAInfo());
10164   }
10165 
10166   // aggregate the two parts
10167   SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10168   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10169   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10170 
10171   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10172                              Hi.getValue(1));
10173 
10174   return std::make_pair(Result, TF);
10175 }
10176 
10177 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10178                                              SelectionDAG &DAG) const {
10179   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10180          "unaligned indexed stores not implemented!");
10181   SDValue Chain = ST->getChain();
10182   SDValue Ptr = ST->getBasePtr();
10183   SDValue Val = ST->getValue();
10184   EVT VT = Val.getValueType();
10185   Align Alignment = ST->getOriginalAlign();
10186   auto &MF = DAG.getMachineFunction();
10187   EVT StoreMemVT = ST->getMemoryVT();
10188 
10189   SDLoc dl(ST);
10190   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10191     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10192     if (isTypeLegal(intVT)) {
10193       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10194           StoreMemVT.isVector()) {
10195         // Scalarize the store and let the individual components be handled.
10196         SDValue Result = scalarizeVectorStore(ST, DAG);
10197         return Result;
10198       }
10199       // Expand to a bitconvert of the value to the integer type of the
10200       // same size, then a (misaligned) int store.
10201       // FIXME: Does not handle truncating floating point stores!
10202       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10203       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10204                             Alignment, ST->getMemOperand()->getFlags());
10205       return Result;
10206     }
10207     // Do a (aligned) store to a stack slot, then copy from the stack slot
10208     // to the final destination using (unaligned) integer loads and stores.
10209     MVT RegVT = getRegisterType(
10210         *DAG.getContext(),
10211         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10212     EVT PtrVT = Ptr.getValueType();
10213     unsigned StoredBytes = StoreMemVT.getStoreSize();
10214     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10215     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10216 
10217     // Make sure the stack slot is also aligned for the register type.
10218     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10219     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10220 
10221     // Perform the original store, only redirected to the stack slot.
10222     SDValue Store = DAG.getTruncStore(
10223         Chain, dl, Val, StackPtr,
10224         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10225 
10226     EVT StackPtrVT = StackPtr.getValueType();
10227 
10228     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10229     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10230     SmallVector<SDValue, 8> Stores;
10231     unsigned Offset = 0;
10232 
10233     // Do all but one copies using the full register width.
10234     for (unsigned i = 1; i < NumRegs; i++) {
10235       // Load one integer register's worth from the stack slot.
10236       SDValue Load = DAG.getLoad(
10237           RegVT, dl, Store, StackPtr,
10238           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10239       // Store it to the final location.  Remember the store.
10240       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10241                                     ST->getPointerInfo().getWithOffset(Offset),
10242                                     ST->getOriginalAlign(),
10243                                     ST->getMemOperand()->getFlags()));
10244       // Increment the pointers.
10245       Offset += RegBytes;
10246       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10247       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10248     }
10249 
10250     // The last store may be partial.  Do a truncating store.  On big-endian
10251     // machines this requires an extending load from the stack slot to ensure
10252     // that the bits are in the right place.
10253     EVT LoadMemVT =
10254         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10255 
10256     // Load from the stack slot.
10257     SDValue Load = DAG.getExtLoad(
10258         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10259         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10260 
10261     Stores.push_back(
10262         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10263                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10264                           ST->getOriginalAlign(),
10265                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10266     // The order of the stores doesn't matter - say it with a TokenFactor.
10267     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10268     return Result;
10269   }
10270 
10271   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10272          "Unaligned store of unknown type.");
10273   // Get the half-size VT
10274   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10275   unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10276   unsigned IncrementSize = NumBits / 8;
10277 
10278   // Divide the stored value in two parts.
10279   SDValue ShiftAmount =
10280       DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10281   SDValue Lo = Val;
10282   // If Val is a constant, replace the upper bits with 0. The SRL will constant
10283   // fold and not use the upper bits. A smaller constant may be easier to
10284   // materialize.
10285   if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10286     Lo = DAG.getNode(
10287         ISD::AND, dl, VT, Lo,
10288         DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10289                         VT));
10290   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10291 
10292   // Store the two parts
10293   SDValue Store1, Store2;
10294   Store1 = DAG.getTruncStore(Chain, dl,
10295                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10296                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10297                              ST->getMemOperand()->getFlags());
10298 
10299   Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10300   Store2 = DAG.getTruncStore(
10301       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10302       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10303       ST->getMemOperand()->getFlags(), ST->getAAInfo());
10304 
10305   SDValue Result =
10306       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10307   return Result;
10308 }
10309 
10310 SDValue
10311 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10312                                        const SDLoc &DL, EVT DataVT,
10313                                        SelectionDAG &DAG,
10314                                        bool IsCompressedMemory) const {
10315   SDValue Increment;
10316   EVT AddrVT = Addr.getValueType();
10317   EVT MaskVT = Mask.getValueType();
10318   assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10319          "Incompatible types of Data and Mask");
10320   if (IsCompressedMemory) {
10321     if (DataVT.isScalableVector())
10322       report_fatal_error(
10323           "Cannot currently handle compressed memory with scalable vectors");
10324     // Incrementing the pointer according to number of '1's in the mask.
10325     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10326     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10327     if (MaskIntVT.getSizeInBits() < 32) {
10328       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10329       MaskIntVT = MVT::i32;
10330     }
10331 
10332     // Count '1's with POPCNT.
10333     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10334     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10335     // Scale is an element size in bytes.
10336     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10337                                     AddrVT);
10338     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10339   } else if (DataVT.isScalableVector()) {
10340     Increment = DAG.getVScale(DL, AddrVT,
10341                               APInt(AddrVT.getFixedSizeInBits(),
10342                                     DataVT.getStoreSize().getKnownMinValue()));
10343   } else
10344     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10345 
10346   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10347 }
10348 
10349 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10350                                        EVT VecVT, const SDLoc &dl,
10351                                        ElementCount SubEC) {
10352   assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10353          "Cannot index a scalable vector within a fixed-width vector");
10354 
10355   unsigned NElts = VecVT.getVectorMinNumElements();
10356   unsigned NumSubElts = SubEC.getKnownMinValue();
10357   EVT IdxVT = Idx.getValueType();
10358 
10359   if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10360     // If this is a constant index and we know the value plus the number of the
10361     // elements in the subvector minus one is less than the minimum number of
10362     // elements then it's safe to return Idx.
10363     if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10364       if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10365         return Idx;
10366     SDValue VS =
10367         DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10368     unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10369     SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10370                               DAG.getConstant(NumSubElts, dl, IdxVT));
10371     return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10372   }
10373   if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10374     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10375     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10376                        DAG.getConstant(Imm, dl, IdxVT));
10377   }
10378   unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10379   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10380                      DAG.getConstant(MaxIndex, dl, IdxVT));
10381 }
10382 
10383 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
10384                                                 SDValue VecPtr, EVT VecVT,
10385                                                 SDValue Index) const {
10386   return getVectorSubVecPointer(
10387       DAG, VecPtr, VecVT,
10388       EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
10389       Index);
10390 }
10391 
10392 SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
10393                                                SDValue VecPtr, EVT VecVT,
10394                                                EVT SubVecVT,
10395                                                SDValue Index) const {
10396   SDLoc dl(Index);
10397   // Make sure the index type is big enough to compute in.
10398   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10399 
10400   EVT EltVT = VecVT.getVectorElementType();
10401 
10402   // Calculate the element offset and add it to the pointer.
10403   unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10404   assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10405          "Converting bits to bytes lost precision");
10406   assert(SubVecVT.getVectorElementType() == EltVT &&
10407          "Sub-vector must be a vector with matching element type");
10408   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10409                                   SubVecVT.getVectorElementCount());
10410 
10411   EVT IdxVT = Index.getValueType();
10412   if (SubVecVT.isScalableVector())
10413     Index =
10414         DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10415                     DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10416 
10417   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10418                       DAG.getConstant(EltSize, dl, IdxVT));
10419   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10420 }
10421 
10422 //===----------------------------------------------------------------------===//
10423 // Implementation of Emulated TLS Model
10424 //===----------------------------------------------------------------------===//
10425 
10426 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10427                                                 SelectionDAG &DAG) const {
10428   // Access to address of TLS varialbe xyz is lowered to a function call:
10429   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10430   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10431   PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10432   SDLoc dl(GA);
10433 
10434   ArgListTy Args;
10435   ArgListEntry Entry;
10436   const GlobalValue *GV =
10437       cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10438   SmallString<32> NameString("__emutls_v.");
10439   NameString += GV->getName();
10440   StringRef EmuTlsVarName(NameString);
10441   const GlobalVariable *EmuTlsVar =
10442       GV->getParent()->getNamedGlobal(EmuTlsVarName);
10443   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10444   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10445   Entry.Ty = VoidPtrType;
10446   Args.push_back(Entry);
10447 
10448   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10449 
10450   TargetLowering::CallLoweringInfo CLI(DAG);
10451   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10452   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10453   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10454 
10455   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10456   // At last for X86 targets, maybe good for other targets too?
10457   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10458   MFI.setAdjustsStack(true); // Is this only for X86 target?
10459   MFI.setHasCalls(true);
10460 
10461   assert((GA->getOffset() == 0) &&
10462          "Emulated TLS must have zero offset in GlobalAddressSDNode");
10463   return CallResult.first;
10464 }
10465 
10466 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10467                                                 SelectionDAG &DAG) const {
10468   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10469   if (!isCtlzFast())
10470     return SDValue();
10471   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10472   SDLoc dl(Op);
10473   if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10474     EVT VT = Op.getOperand(0).getValueType();
10475     SDValue Zext = Op.getOperand(0);
10476     if (VT.bitsLT(MVT::i32)) {
10477       VT = MVT::i32;
10478       Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10479     }
10480     unsigned Log2b = Log2_32(VT.getSizeInBits());
10481     SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10482     SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10483                               DAG.getConstant(Log2b, dl, MVT::i32));
10484     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10485   }
10486   return SDValue();
10487 }
10488 
10489 SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
10490   SDValue Op0 = Node->getOperand(0);
10491   SDValue Op1 = Node->getOperand(1);
10492   EVT VT = Op0.getValueType();
10493   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10494   unsigned Opcode = Node->getOpcode();
10495   SDLoc DL(Node);
10496 
10497   // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10498   if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10499       getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10500     Op0 = DAG.getFreeze(Op0);
10501     SDValue Zero = DAG.getConstant(0, DL, VT);
10502     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10503                        DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10504   }
10505 
10506   // umin(x,y) -> sub(x,usubsat(x,y))
10507   // TODO: Missing freeze(Op0)?
10508   if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10509       isOperationLegal(ISD::USUBSAT, VT)) {
10510     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10511                        DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10512   }
10513 
10514   // umax(x,y) -> add(x,usubsat(y,x))
10515   // TODO: Missing freeze(Op0)?
10516   if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10517       isOperationLegal(ISD::USUBSAT, VT)) {
10518     return DAG.getNode(ISD::ADD, DL, VT, Op0,
10519                        DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10520   }
10521 
10522   // FIXME: Should really try to split the vector in case it's legal on a
10523   // subvector.
10524   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10525     return DAG.UnrollVectorOp(Node);
10526 
10527   // Attempt to find an existing SETCC node that we can reuse.
10528   // TODO: Do we need a generic doesSETCCNodeExist?
10529   // TODO: Missing freeze(Op0)/freeze(Op1)?
10530   auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10531                          ISD::CondCode PrefCommuteCC,
10532                          ISD::CondCode AltCommuteCC) {
10533     SDVTList BoolVTList = DAG.getVTList(BoolVT);
10534     for (ISD::CondCode CC : {PrefCC, AltCC}) {
10535       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10536                             {Op0, Op1, DAG.getCondCode(CC)})) {
10537         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10538         return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10539       }
10540     }
10541     for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10542       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10543                             {Op0, Op1, DAG.getCondCode(CC)})) {
10544         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10545         return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10546       }
10547     }
10548     SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10549     return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10550   };
10551 
10552   // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10553   //                      -> Y = (A < B) ? B : A
10554   //                      -> Y = (A >= B) ? A : B
10555   //                      -> Y = (A <= B) ? B : A
10556   switch (Opcode) {
10557   case ISD::SMAX:
10558     return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10559   case ISD::SMIN:
10560     return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10561   case ISD::UMAX:
10562     return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10563   case ISD::UMIN:
10564     return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10565   }
10566 
10567   llvm_unreachable("How did we get here?");
10568 }
10569 
10570 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
10571   unsigned Opcode = Node->getOpcode();
10572   SDValue LHS = Node->getOperand(0);
10573   SDValue RHS = Node->getOperand(1);
10574   EVT VT = LHS.getValueType();
10575   SDLoc dl(Node);
10576 
10577   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10578   assert(VT.isInteger() && "Expected operands to be integers");
10579 
10580   // usub.sat(a, b) -> umax(a, b) - b
10581   if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10582     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10583     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10584   }
10585 
10586   // uadd.sat(a, b) -> umin(a, ~b) + b
10587   if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10588     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10589     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10590     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10591   }
10592 
10593   unsigned OverflowOp;
10594   switch (Opcode) {
10595   case ISD::SADDSAT:
10596     OverflowOp = ISD::SADDO;
10597     break;
10598   case ISD::UADDSAT:
10599     OverflowOp = ISD::UADDO;
10600     break;
10601   case ISD::SSUBSAT:
10602     OverflowOp = ISD::SSUBO;
10603     break;
10604   case ISD::USUBSAT:
10605     OverflowOp = ISD::USUBO;
10606     break;
10607   default:
10608     llvm_unreachable("Expected method to receive signed or unsigned saturation "
10609                      "addition or subtraction node.");
10610   }
10611 
10612   // FIXME: Should really try to split the vector in case it's legal on a
10613   // subvector.
10614   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10615     return DAG.UnrollVectorOp(Node);
10616 
10617   unsigned BitWidth = LHS.getScalarValueSizeInBits();
10618   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10619   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10620   SDValue SumDiff = Result.getValue(0);
10621   SDValue Overflow = Result.getValue(1);
10622   SDValue Zero = DAG.getConstant(0, dl, VT);
10623   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10624 
10625   if (Opcode == ISD::UADDSAT) {
10626     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10627       // (LHS + RHS) | OverflowMask
10628       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10629       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10630     }
10631     // Overflow ? 0xffff.... : (LHS + RHS)
10632     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10633   }
10634 
10635   if (Opcode == ISD::USUBSAT) {
10636     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10637       // (LHS - RHS) & ~OverflowMask
10638       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10639       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10640       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10641     }
10642     // Overflow ? 0 : (LHS - RHS)
10643     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10644   }
10645 
10646   if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10647     APInt MinVal = APInt::getSignedMinValue(BitWidth);
10648     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
10649 
10650     KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10651     KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10652 
10653     // If either of the operand signs are known, then they are guaranteed to
10654     // only saturate in one direction. If non-negative they will saturate
10655     // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10656     //
10657     // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10658     // sign of 'y' has to be flipped.
10659 
10660     bool LHSIsNonNegative = KnownLHS.isNonNegative();
10661     bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10662                                                    : KnownRHS.isNegative();
10663     if (LHSIsNonNegative || RHSIsNonNegative) {
10664       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10665       return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10666     }
10667 
10668     bool LHSIsNegative = KnownLHS.isNegative();
10669     bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10670                                                 : KnownRHS.isNonNegative();
10671     if (LHSIsNegative || RHSIsNegative) {
10672       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10673       return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10674     }
10675   }
10676 
10677   // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10678   APInt MinVal = APInt::getSignedMinValue(BitWidth);
10679   SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10680   SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10681                               DAG.getConstant(BitWidth - 1, dl, VT));
10682   Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10683   return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10684 }
10685 
10686 SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
10687   unsigned Opcode = Node->getOpcode();
10688   SDValue LHS = Node->getOperand(0);
10689   SDValue RHS = Node->getOperand(1);
10690   EVT VT = LHS.getValueType();
10691   EVT ResVT = Node->getValueType(0);
10692   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10693   SDLoc dl(Node);
10694 
10695   auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10696   auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10697   SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10698   SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10699 
10700   // We can't perform arithmetic on i1 values. Extending them would
10701   // probably result in worse codegen, so let's just use two selects instead.
10702   // Some targets are also just better off using selects rather than subtraction
10703   // because one of the conditions can be merged with one of the selects.
10704   // And finally, if we don't know the contents of high bits of a boolean value
10705   // we can't perform any arithmetic either.
10706   if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10707       getBooleanContents(BoolVT) == UndefinedBooleanContent) {
10708     SDValue SelectZeroOrOne =
10709         DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10710                       DAG.getConstant(0, dl, ResVT));
10711     return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10712                          SelectZeroOrOne);
10713   }
10714 
10715   if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent)
10716     std::swap(IsGT, IsLT);
10717   return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10718                             ResVT);
10719 }
10720 
10721 SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10722   unsigned Opcode = Node->getOpcode();
10723   bool IsSigned = Opcode == ISD::SSHLSAT;
10724   SDValue LHS = Node->getOperand(0);
10725   SDValue RHS = Node->getOperand(1);
10726   EVT VT = LHS.getValueType();
10727   SDLoc dl(Node);
10728 
10729   assert((Node->getOpcode() == ISD::SSHLSAT ||
10730           Node->getOpcode() == ISD::USHLSAT) &&
10731           "Expected a SHLSAT opcode");
10732   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10733   assert(VT.isInteger() && "Expected operands to be integers");
10734 
10735   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10736     return DAG.UnrollVectorOp(Node);
10737 
10738   // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10739 
10740   unsigned BW = VT.getScalarSizeInBits();
10741   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10742   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10743   SDValue Orig =
10744       DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10745 
10746   SDValue SatVal;
10747   if (IsSigned) {
10748     SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10749     SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10750     SDValue Cond =
10751         DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10752     SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10753   } else {
10754     SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10755   }
10756   SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10757   return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10758 }
10759 
10760 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10761                                         bool Signed, EVT WideVT,
10762                                         const SDValue LL, const SDValue LH,
10763                                         const SDValue RL, const SDValue RH,
10764                                         SDValue &Lo, SDValue &Hi) const {
10765   // We can fall back to a libcall with an illegal type for the MUL if we
10766   // have a libcall big enough.
10767   // Also, we can fall back to a division in some cases, but that's a big
10768   // performance hit in the general case.
10769   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10770   if (WideVT == MVT::i16)
10771     LC = RTLIB::MUL_I16;
10772   else if (WideVT == MVT::i32)
10773     LC = RTLIB::MUL_I32;
10774   else if (WideVT == MVT::i64)
10775     LC = RTLIB::MUL_I64;
10776   else if (WideVT == MVT::i128)
10777     LC = RTLIB::MUL_I128;
10778 
10779   if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10780     // We'll expand the multiplication by brute force because we have no other
10781     // options. This is a trivially-generalized version of the code from
10782     // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10783     // 4.3.1).
10784     EVT VT = LL.getValueType();
10785     unsigned Bits = VT.getSizeInBits();
10786     unsigned HalfBits = Bits >> 1;
10787     SDValue Mask =
10788         DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10789     SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10790     SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10791 
10792     SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10793     SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10794 
10795     SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10796     SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10797     SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10798     SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10799 
10800     SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10801                             DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10802     SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10803     SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10804 
10805     SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10806                             DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10807     SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10808 
10809     SDValue W =
10810         DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10811                     DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10812     Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10813                      DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10814 
10815     Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10816                      DAG.getNode(ISD::ADD, dl, VT,
10817                                  DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10818                                  DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10819   } else {
10820     // Attempt a libcall.
10821     SDValue Ret;
10822     TargetLowering::MakeLibCallOptions CallOptions;
10823     CallOptions.setSExt(Signed);
10824     CallOptions.setIsPostTypeLegalization(true);
10825     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10826       // Halves of WideVT are packed into registers in different order
10827       // depending on platform endianness. This is usually handled by
10828       // the C calling convention, but we can't defer to it in
10829       // the legalizer.
10830       SDValue Args[] = {LL, LH, RL, RH};
10831       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10832     } else {
10833       SDValue Args[] = {LH, LL, RH, RL};
10834       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10835     }
10836     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10837            "Ret value is a collection of constituent nodes holding result.");
10838     if (DAG.getDataLayout().isLittleEndian()) {
10839       // Same as above.
10840       Lo = Ret.getOperand(0);
10841       Hi = Ret.getOperand(1);
10842     } else {
10843       Lo = Ret.getOperand(1);
10844       Hi = Ret.getOperand(0);
10845     }
10846   }
10847 }
10848 
10849 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10850                                         bool Signed, const SDValue LHS,
10851                                         const SDValue RHS, SDValue &Lo,
10852                                         SDValue &Hi) const {
10853   EVT VT = LHS.getValueType();
10854   assert(RHS.getValueType() == VT && "Mismatching operand types");
10855 
10856   SDValue HiLHS;
10857   SDValue HiRHS;
10858   if (Signed) {
10859     // The high part is obtained by SRA'ing all but one of the bits of low
10860     // part.
10861     unsigned LoSize = VT.getFixedSizeInBits();
10862     HiLHS = DAG.getNode(
10863         ISD::SRA, dl, VT, LHS,
10864         DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10865     HiRHS = DAG.getNode(
10866         ISD::SRA, dl, VT, RHS,
10867         DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10868   } else {
10869     HiLHS = DAG.getConstant(0, dl, VT);
10870     HiRHS = DAG.getConstant(0, dl, VT);
10871   }
10872   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10873   forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10874 }
10875 
10876 SDValue
10877 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
10878   assert((Node->getOpcode() == ISD::SMULFIX ||
10879           Node->getOpcode() == ISD::UMULFIX ||
10880           Node->getOpcode() == ISD::SMULFIXSAT ||
10881           Node->getOpcode() == ISD::UMULFIXSAT) &&
10882          "Expected a fixed point multiplication opcode");
10883 
10884   SDLoc dl(Node);
10885   SDValue LHS = Node->getOperand(0);
10886   SDValue RHS = Node->getOperand(1);
10887   EVT VT = LHS.getValueType();
10888   unsigned Scale = Node->getConstantOperandVal(2);
10889   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10890                      Node->getOpcode() == ISD::UMULFIXSAT);
10891   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10892                  Node->getOpcode() == ISD::SMULFIXSAT);
10893   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10894   unsigned VTSize = VT.getScalarSizeInBits();
10895 
10896   if (!Scale) {
10897     // [us]mul.fix(a, b, 0) -> mul(a, b)
10898     if (!Saturating) {
10899       if (isOperationLegalOrCustom(ISD::MUL, VT))
10900         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10901     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10902       SDValue Result =
10903           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10904       SDValue Product = Result.getValue(0);
10905       SDValue Overflow = Result.getValue(1);
10906       SDValue Zero = DAG.getConstant(0, dl, VT);
10907 
10908       APInt MinVal = APInt::getSignedMinValue(VTSize);
10909       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10910       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10911       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10912       // Xor the inputs, if resulting sign bit is 0 the product will be
10913       // positive, else negative.
10914       SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10915       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10916       Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10917       return DAG.getSelect(dl, VT, Overflow, Result, Product);
10918     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10919       SDValue Result =
10920           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10921       SDValue Product = Result.getValue(0);
10922       SDValue Overflow = Result.getValue(1);
10923 
10924       APInt MaxVal = APInt::getMaxValue(VTSize);
10925       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10926       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10927     }
10928   }
10929 
10930   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10931          "Expected scale to be less than the number of bits if signed or at "
10932          "most the number of bits if unsigned.");
10933   assert(LHS.getValueType() == RHS.getValueType() &&
10934          "Expected both operands to be the same type");
10935 
10936   // Get the upper and lower bits of the result.
10937   SDValue Lo, Hi;
10938   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10939   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10940   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
10941   if (VT.isVector())
10942     WideVT =
10943         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
10944   if (isOperationLegalOrCustom(LoHiOp, VT)) {
10945     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10946     Lo = Result.getValue(0);
10947     Hi = Result.getValue(1);
10948   } else if (isOperationLegalOrCustom(HiOp, VT)) {
10949     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10950     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10951   } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
10952     // Try for a multiplication using a wider type.
10953     unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10954     SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
10955     SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
10956     SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
10957     Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
10958     SDValue Shifted =
10959         DAG.getNode(ISD::SRA, dl, WideVT, Res,
10960                     DAG.getShiftAmountConstant(VTSize, WideVT, dl));
10961     Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
10962   } else if (VT.isVector()) {
10963     return SDValue();
10964   } else {
10965     forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10966   }
10967 
10968   if (Scale == VTSize)
10969     // Result is just the top half since we'd be shifting by the width of the
10970     // operand. Overflow impossible so this works for both UMULFIX and
10971     // UMULFIXSAT.
10972     return Hi;
10973 
10974   // The result will need to be shifted right by the scale since both operands
10975   // are scaled. The result is given to us in 2 halves, so we only want part of
10976   // both in the result.
10977   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10978                                DAG.getShiftAmountConstant(Scale, VT, dl));
10979   if (!Saturating)
10980     return Result;
10981 
10982   if (!Signed) {
10983     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10984     // widened multiplication) aren't all zeroes.
10985 
10986     // Saturate to max if ((Hi >> Scale) != 0),
10987     // which is the same as if (Hi > ((1 << Scale) - 1))
10988     APInt MaxVal = APInt::getMaxValue(VTSize);
10989     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
10990                                       dl, VT);
10991     Result = DAG.getSelectCC(dl, Hi, LowMask,
10992                              DAG.getConstant(MaxVal, dl, VT), Result,
10993                              ISD::SETUGT);
10994 
10995     return Result;
10996   }
10997 
10998   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10999   // widened multiplication) aren't all ones or all zeroes.
11000 
11001   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11002   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11003 
11004   if (Scale == 0) {
11005     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11006                                DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11007     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11008     // Saturated to SatMin if wide product is negative, and SatMax if wide
11009     // product is positive ...
11010     SDValue Zero = DAG.getConstant(0, dl, VT);
11011     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11012                                                ISD::SETLT);
11013     // ... but only if we overflowed.
11014     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11015   }
11016 
11017   //  We handled Scale==0 above so all the bits to examine is in Hi.
11018 
11019   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11020   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11021   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11022                                     dl, VT);
11023   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11024   // Saturate to min if (Hi >> (Scale - 1)) < -1),
11025   // which is the same as if (HI < (-1 << (Scale - 1))
11026   SDValue HighMask =
11027       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11028                       dl, VT);
11029   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11030   return Result;
11031 }
11032 
11033 SDValue
11034 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11035                                     SDValue LHS, SDValue RHS,
11036                                     unsigned Scale, SelectionDAG &DAG) const {
11037   assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11038           Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11039          "Expected a fixed point division opcode");
11040 
11041   EVT VT = LHS.getValueType();
11042   bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11043   bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11044   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11045 
11046   // If there is enough room in the type to upscale the LHS or downscale the
11047   // RHS before the division, we can perform it in this type without having to
11048   // resize. For signed operations, the LHS headroom is the number of
11049   // redundant sign bits, and for unsigned ones it is the number of zeroes.
11050   // The headroom for the RHS is the number of trailing zeroes.
11051   unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11052                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
11053   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11054 
11055   // For signed saturating operations, we need to be able to detect true integer
11056   // division overflow; that is, when you have MIN / -EPS. However, this
11057   // is undefined behavior and if we emit divisions that could take such
11058   // values it may cause undesired behavior (arithmetic exceptions on x86, for
11059   // example).
11060   // Avoid this by requiring an extra bit so that we never get this case.
11061   // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11062   // signed saturating division, we need to emit a whopping 32-bit division.
11063   if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11064     return SDValue();
11065 
11066   unsigned LHSShift = std::min(LHSLead, Scale);
11067   unsigned RHSShift = Scale - LHSShift;
11068 
11069   // At this point, we know that if we shift the LHS up by LHSShift and the
11070   // RHS down by RHSShift, we can emit a regular division with a final scaling
11071   // factor of Scale.
11072 
11073   if (LHSShift)
11074     LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11075                       DAG.getShiftAmountConstant(LHSShift, VT, dl));
11076   if (RHSShift)
11077     RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11078                       DAG.getShiftAmountConstant(RHSShift, VT, dl));
11079 
11080   SDValue Quot;
11081   if (Signed) {
11082     // For signed operations, if the resulting quotient is negative and the
11083     // remainder is nonzero, subtract 1 from the quotient to round towards
11084     // negative infinity.
11085     SDValue Rem;
11086     // FIXME: Ideally we would always produce an SDIVREM here, but if the
11087     // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11088     // we couldn't just form a libcall, but the type legalizer doesn't do it.
11089     if (isTypeLegal(VT) &&
11090         isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
11091       Quot = DAG.getNode(ISD::SDIVREM, dl,
11092                          DAG.getVTList(VT, VT),
11093                          LHS, RHS);
11094       Rem = Quot.getValue(1);
11095       Quot = Quot.getValue(0);
11096     } else {
11097       Quot = DAG.getNode(ISD::SDIV, dl, VT,
11098                          LHS, RHS);
11099       Rem = DAG.getNode(ISD::SREM, dl, VT,
11100                         LHS, RHS);
11101     }
11102     SDValue Zero = DAG.getConstant(0, dl, VT);
11103     SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11104     SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11105     SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11106     SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11107     SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11108                                DAG.getConstant(1, dl, VT));
11109     Quot = DAG.getSelect(dl, VT,
11110                          DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11111                          Sub1, Quot);
11112   } else
11113     Quot = DAG.getNode(ISD::UDIV, dl, VT,
11114                        LHS, RHS);
11115 
11116   return Quot;
11117 }
11118 
11119 void TargetLowering::expandUADDSUBO(
11120     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11121   SDLoc dl(Node);
11122   SDValue LHS = Node->getOperand(0);
11123   SDValue RHS = Node->getOperand(1);
11124   bool IsAdd = Node->getOpcode() == ISD::UADDO;
11125 
11126   // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11127   unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11128   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11129     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11130     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11131                                     { LHS, RHS, CarryIn });
11132     Result = SDValue(NodeCarry.getNode(), 0);
11133     Overflow = SDValue(NodeCarry.getNode(), 1);
11134     return;
11135   }
11136 
11137   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11138                             LHS.getValueType(), LHS, RHS);
11139 
11140   EVT ResultType = Node->getValueType(1);
11141   EVT SetCCType = getSetCCResultType(
11142       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11143   SDValue SetCC;
11144   if (IsAdd && isOneConstant(RHS)) {
11145     // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11146     // the live range of X. We assume comparing with 0 is cheap.
11147     // The general case (X + C) < C is not necessarily beneficial. Although we
11148     // reduce the live range of X, we may introduce the materialization of
11149     // constant C.
11150     SetCC =
11151         DAG.getSetCC(dl, SetCCType, Result,
11152                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11153   } else if (IsAdd && isAllOnesConstant(RHS)) {
11154     // Special case: uaddo X, -1 overflows if X != 0.
11155     SetCC =
11156         DAG.getSetCC(dl, SetCCType, LHS,
11157                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11158   } else {
11159     ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11160     SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11161   }
11162   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11163 }
11164 
11165 void TargetLowering::expandSADDSUBO(
11166     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11167   SDLoc dl(Node);
11168   SDValue LHS = Node->getOperand(0);
11169   SDValue RHS = Node->getOperand(1);
11170   bool IsAdd = Node->getOpcode() == ISD::SADDO;
11171 
11172   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11173                             LHS.getValueType(), LHS, RHS);
11174 
11175   EVT ResultType = Node->getValueType(1);
11176   EVT OType = getSetCCResultType(
11177       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11178 
11179   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11180   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11181   if (isOperationLegal(OpcSat, LHS.getValueType())) {
11182     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11183     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11184     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11185     return;
11186   }
11187 
11188   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11189 
11190   // For an addition, the result should be less than one of the operands (LHS)
11191   // if and only if the other operand (RHS) is negative, otherwise there will
11192   // be overflow.
11193   // For a subtraction, the result should be less than one of the operands
11194   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11195   // otherwise there will be overflow.
11196   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11197   SDValue ConditionRHS =
11198       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11199 
11200   Overflow = DAG.getBoolExtOrTrunc(
11201       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11202       ResultType, ResultType);
11203 }
11204 
11205 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11206                                 SDValue &Overflow, SelectionDAG &DAG) const {
11207   SDLoc dl(Node);
11208   EVT VT = Node->getValueType(0);
11209   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11210   SDValue LHS = Node->getOperand(0);
11211   SDValue RHS = Node->getOperand(1);
11212   bool isSigned = Node->getOpcode() == ISD::SMULO;
11213 
11214   // For power-of-two multiplications we can use a simpler shift expansion.
11215   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11216     const APInt &C = RHSC->getAPIntValue();
11217     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11218     if (C.isPowerOf2()) {
11219       // smulo(x, signed_min) is same as umulo(x, signed_min).
11220       bool UseArithShift = isSigned && !C.isMinSignedValue();
11221       SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11222       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11223       Overflow = DAG.getSetCC(dl, SetCCVT,
11224           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11225                       dl, VT, Result, ShiftAmt),
11226           LHS, ISD::SETNE);
11227       return true;
11228     }
11229   }
11230 
11231   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11232   if (VT.isVector())
11233     WideVT =
11234         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
11235 
11236   SDValue BottomHalf;
11237   SDValue TopHalf;
11238   static const unsigned Ops[2][3] =
11239       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
11240         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
11241   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11242     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11243     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11244   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11245     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11246                              RHS);
11247     TopHalf = BottomHalf.getValue(1);
11248   } else if (isTypeLegal(WideVT)) {
11249     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11250     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11251     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11252     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11253     SDValue ShiftAmt =
11254         DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11255     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11256                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11257   } else {
11258     if (VT.isVector())
11259       return false;
11260 
11261     forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11262   }
11263 
11264   Result = BottomHalf;
11265   if (isSigned) {
11266     SDValue ShiftAmt = DAG.getShiftAmountConstant(
11267         VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11268     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11269     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11270   } else {
11271     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11272                             DAG.getConstant(0, dl, VT), ISD::SETNE);
11273   }
11274 
11275   // Truncate the result if SetCC returns a larger type than needed.
11276   EVT RType = Node->getValueType(1);
11277   if (RType.bitsLT(Overflow.getValueType()))
11278     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11279 
11280   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11281          "Unexpected result type for S/UMULO legalization");
11282   return true;
11283 }
11284 
11285 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
11286   SDLoc dl(Node);
11287   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11288   SDValue Op = Node->getOperand(0);
11289   EVT VT = Op.getValueType();
11290 
11291   if (VT.isScalableVector())
11292     report_fatal_error(
11293         "Expanding reductions for scalable vectors is undefined.");
11294 
11295   // Try to use a shuffle reduction for power of two vectors.
11296   if (VT.isPow2VectorType()) {
11297     while (VT.getVectorNumElements() > 1) {
11298       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11299       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11300         break;
11301 
11302       SDValue Lo, Hi;
11303       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11304       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11305       VT = HalfVT;
11306     }
11307   }
11308 
11309   EVT EltVT = VT.getVectorElementType();
11310   unsigned NumElts = VT.getVectorNumElements();
11311 
11312   SmallVector<SDValue, 8> Ops;
11313   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11314 
11315   SDValue Res = Ops[0];
11316   for (unsigned i = 1; i < NumElts; i++)
11317     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11318 
11319   // Result type may be wider than element type.
11320   if (EltVT != Node->getValueType(0))
11321     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11322   return Res;
11323 }
11324 
11325 SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
11326   SDLoc dl(Node);
11327   SDValue AccOp = Node->getOperand(0);
11328   SDValue VecOp = Node->getOperand(1);
11329   SDNodeFlags Flags = Node->getFlags();
11330 
11331   EVT VT = VecOp.getValueType();
11332   EVT EltVT = VT.getVectorElementType();
11333 
11334   if (VT.isScalableVector())
11335     report_fatal_error(
11336         "Expanding reductions for scalable vectors is undefined.");
11337 
11338   unsigned NumElts = VT.getVectorNumElements();
11339 
11340   SmallVector<SDValue, 8> Ops;
11341   DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11342 
11343   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11344 
11345   SDValue Res = AccOp;
11346   for (unsigned i = 0; i < NumElts; i++)
11347     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11348 
11349   return Res;
11350 }
11351 
11352 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11353                                SelectionDAG &DAG) const {
11354   EVT VT = Node->getValueType(0);
11355   SDLoc dl(Node);
11356   bool isSigned = Node->getOpcode() == ISD::SREM;
11357   unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11358   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11359   SDValue Dividend = Node->getOperand(0);
11360   SDValue Divisor = Node->getOperand(1);
11361   if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11362     SDVTList VTs = DAG.getVTList(VT, VT);
11363     Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11364     return true;
11365   }
11366   if (isOperationLegalOrCustom(DivOpc, VT)) {
11367     // X % Y -> X-X/Y*Y
11368     SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11369     SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11370     Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11371     return true;
11372   }
11373   return false;
11374 }
11375 
11376 SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11377                                             SelectionDAG &DAG) const {
11378   bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11379   SDLoc dl(SDValue(Node, 0));
11380   SDValue Src = Node->getOperand(0);
11381 
11382   // DstVT is the result type, while SatVT is the size to which we saturate
11383   EVT SrcVT = Src.getValueType();
11384   EVT DstVT = Node->getValueType(0);
11385 
11386   EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11387   unsigned SatWidth = SatVT.getScalarSizeInBits();
11388   unsigned DstWidth = DstVT.getScalarSizeInBits();
11389   assert(SatWidth <= DstWidth &&
11390          "Expected saturation width smaller than result width");
11391 
11392   // Determine minimum and maximum integer values and their corresponding
11393   // floating-point values.
11394   APInt MinInt, MaxInt;
11395   if (IsSigned) {
11396     MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11397     MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11398   } else {
11399     MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11400     MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11401   }
11402 
11403   // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11404   // libcall emission cannot handle this. Large result types will fail.
11405   if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11406     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11407     SrcVT = Src.getValueType();
11408   }
11409 
11410   const fltSemantics &Sem = SrcVT.getFltSemantics();
11411   APFloat MinFloat(Sem);
11412   APFloat MaxFloat(Sem);
11413 
11414   APFloat::opStatus MinStatus =
11415       MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11416   APFloat::opStatus MaxStatus =
11417       MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11418   bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11419                              !(MaxStatus & APFloat::opStatus::opInexact);
11420 
11421   SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11422   SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11423 
11424   // If the integer bounds are exactly representable as floats and min/max are
11425   // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11426   // of comparisons and selects.
11427   bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11428                      isOperationLegal(ISD::FMAXNUM, SrcVT);
11429   if (AreExactFloatBounds && MinMaxLegal) {
11430     SDValue Clamped = Src;
11431 
11432     // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11433     Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11434     // Clamp by MaxFloat from above. NaN cannot occur.
11435     Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11436     // Convert clamped value to integer.
11437     SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11438                                   dl, DstVT, Clamped);
11439 
11440     // In the unsigned case we're done, because we mapped NaN to MinFloat,
11441     // which will cast to zero.
11442     if (!IsSigned)
11443       return FpToInt;
11444 
11445     // Otherwise, select 0 if Src is NaN.
11446     SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11447     EVT SetCCVT =
11448         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11449     SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11450     return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11451   }
11452 
11453   SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11454   SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11455 
11456   // Result of direct conversion. The assumption here is that the operation is
11457   // non-trapping and it's fine to apply it to an out-of-range value if we
11458   // select it away later.
11459   SDValue FpToInt =
11460       DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11461 
11462   SDValue Select = FpToInt;
11463 
11464   EVT SetCCVT =
11465       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11466 
11467   // If Src ULT MinFloat, select MinInt. In particular, this also selects
11468   // MinInt if Src is NaN.
11469   SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11470   Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11471   // If Src OGT MaxFloat, select MaxInt.
11472   SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11473   Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11474 
11475   // In the unsigned case we are done, because we mapped NaN to MinInt, which
11476   // is already zero.
11477   if (!IsSigned)
11478     return Select;
11479 
11480   // Otherwise, select 0 if Src is NaN.
11481   SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11482   SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11483   return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11484 }
11485 
11486 SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
11487                                                 const SDLoc &dl,
11488                                                 SelectionDAG &DAG) const {
11489   EVT OperandVT = Op.getValueType();
11490   if (OperandVT.getScalarType() == ResultVT.getScalarType())
11491     return Op;
11492   EVT ResultIntVT = ResultVT.changeTypeToInteger();
11493   // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11494   // can induce double-rounding which may alter the results. We can
11495   // correct for this using a trick explained in: Boldo, Sylvie, and
11496   // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11497   // World Congress. 2005.
11498   unsigned BitSize = OperandVT.getScalarSizeInBits();
11499   EVT WideIntVT = OperandVT.changeTypeToInteger();
11500   SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11501   SDValue SignBit =
11502       DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11503                   DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11504   SDValue AbsWide;
11505   if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11506     AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11507   } else {
11508     SDValue ClearedSign = DAG.getNode(
11509         ISD::AND, dl, WideIntVT, OpAsInt,
11510         DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11511     AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11512   }
11513   SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11514   SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11515 
11516   // We can keep the narrow value as-is if narrowing was exact (no
11517   // rounding error), the wide value was NaN (the narrow value is also
11518   // NaN and should be preserved) or if we rounded to the odd value.
11519   SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11520   SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11521   SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11522   SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11523   EVT ResultIntVTCCVT = getSetCCResultType(
11524       DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11525   SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11526   // The result is already odd so we don't need to do anything.
11527   SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11528 
11529   EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11530                                        AbsWide.getValueType());
11531   // We keep results which are exact, odd or NaN.
11532   SDValue KeepNarrow =
11533       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11534   KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11535   // We morally performed a round-down if AbsNarrow is smaller than
11536   // AbsWide.
11537   SDValue NarrowIsRd =
11538       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11539   // If the narrow value is odd or exact, pick it.
11540   // Otherwise, narrow is even and corresponds to either the rounded-up
11541   // or rounded-down value. If narrow is the rounded-down value, we want
11542   // the rounded-up value as it will be odd.
11543   SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11544   SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11545   Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11546   int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11547   SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11548   SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11549   SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11550   Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11551   return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11552 }
11553 
11554 SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
11555   assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11556   SDValue Op = Node->getOperand(0);
11557   EVT VT = Node->getValueType(0);
11558   SDLoc dl(Node);
11559   if (VT.getScalarType() == MVT::bf16) {
11560     if (Node->getConstantOperandVal(1) == 1) {
11561       return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11562     }
11563     EVT OperandVT = Op.getValueType();
11564     SDValue IsNaN = DAG.getSetCC(
11565         dl,
11566         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11567         Op, Op, ISD::SETUO);
11568 
11569     // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11570     // can induce double-rounding which may alter the results. We can
11571     // correct for this using a trick explained in: Boldo, Sylvie, and
11572     // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11573     // World Congress. 2005.
11574     EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11575     EVT I32 = F32.changeTypeToInteger();
11576     Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11577     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11578 
11579     // Conversions should set NaN's quiet bit. This also prevents NaNs from
11580     // turning into infinities.
11581     SDValue NaN =
11582         DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11583 
11584     // Factor in the contribution of the low 16 bits.
11585     SDValue One = DAG.getConstant(1, dl, I32);
11586     SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11587                               DAG.getShiftAmountConstant(16, I32, dl));
11588     Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11589     SDValue RoundingBias =
11590         DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11591     SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11592 
11593     // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11594     // 0x80000000.
11595     Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11596 
11597     // Now that we have rounded, shift the bits into position.
11598     Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11599                      DAG.getShiftAmountConstant(16, I32, dl));
11600     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11601     EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11602     Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11603     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11604   }
11605   return SDValue();
11606 }
11607 
11608 SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11609                                            SelectionDAG &DAG) const {
11610   assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11611   assert(Node->getValueType(0).isScalableVector() &&
11612          "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11613 
11614   EVT VT = Node->getValueType(0);
11615   SDValue V1 = Node->getOperand(0);
11616   SDValue V2 = Node->getOperand(1);
11617   int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11618   SDLoc DL(Node);
11619 
11620   // Expand through memory thusly:
11621   //  Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11622   //  Store V1, Ptr
11623   //  Store V2, Ptr + sizeof(V1)
11624   //  If (Imm < 0)
11625   //    TrailingElts = -Imm
11626   //    Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11627   //  else
11628   //    Ptr = Ptr + (Imm * sizeof(VT.Elt))
11629   //  Res = Load Ptr
11630 
11631   Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11632 
11633   EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
11634                                VT.getVectorElementCount() * 2);
11635   SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11636   EVT PtrVT = StackPtr.getValueType();
11637   auto &MF = DAG.getMachineFunction();
11638   auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11639   auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11640 
11641   // Store the lo part of CONCAT_VECTORS(V1, V2)
11642   SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11643   // Store the hi part of CONCAT_VECTORS(V1, V2)
11644   SDValue OffsetToV2 = DAG.getVScale(
11645       DL, PtrVT,
11646       APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11647   SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11648   SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11649 
11650   if (Imm >= 0) {
11651     // Load back the required element. getVectorElementPointer takes care of
11652     // clamping the index if it's out-of-bounds.
11653     StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11654     // Load the spliced result
11655     return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11656                        MachinePointerInfo::getUnknownStack(MF));
11657   }
11658 
11659   uint64_t TrailingElts = -Imm;
11660 
11661   // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11662   TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11663   SDValue TrailingBytes =
11664       DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11665 
11666   if (TrailingElts > VT.getVectorMinNumElements()) {
11667     SDValue VLBytes =
11668         DAG.getVScale(DL, PtrVT,
11669                       APInt(PtrVT.getFixedSizeInBits(),
11670                             VT.getStoreSize().getKnownMinValue()));
11671     TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11672   }
11673 
11674   // Calculate the start address of the spliced result.
11675   StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11676 
11677   // Load the spliced result
11678   return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11679                      MachinePointerInfo::getUnknownStack(MF));
11680 }
11681 
11682 SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
11683                                               SelectionDAG &DAG) const {
11684   SDLoc DL(Node);
11685   SDValue Vec = Node->getOperand(0);
11686   SDValue Mask = Node->getOperand(1);
11687   SDValue Passthru = Node->getOperand(2);
11688 
11689   EVT VecVT = Vec.getValueType();
11690   EVT ScalarVT = VecVT.getScalarType();
11691   EVT MaskVT = Mask.getValueType();
11692   EVT MaskScalarVT = MaskVT.getScalarType();
11693 
11694   // Needs to be handled by targets that have scalable vector types.
11695   if (VecVT.isScalableVector())
11696     report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11697 
11698   SDValue StackPtr = DAG.CreateStackTemporary(
11699       VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11700   int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11701   MachinePointerInfo PtrInfo =
11702       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
11703 
11704   MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11705   SDValue Chain = DAG.getEntryNode();
11706   SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11707 
11708   bool HasPassthru = !Passthru.isUndef();
11709 
11710   // If we have a passthru vector, store it on the stack, overwrite the matching
11711   // positions and then re-write the last element that was potentially
11712   // overwritten even though mask[i] = false.
11713   if (HasPassthru)
11714     Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11715 
11716   SDValue LastWriteVal;
11717   APInt PassthruSplatVal;
11718   bool IsSplatPassthru =
11719       ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11720 
11721   if (IsSplatPassthru) {
11722     // As we do not know which position we wrote to last, we cannot simply
11723     // access that index from the passthru vector. So we first check if passthru
11724     // is a splat vector, to use any element ...
11725     LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11726   } else if (HasPassthru) {
11727     // ... if it is not a splat vector, we need to get the passthru value at
11728     // position = popcount(mask) and re-load it from the stack before it is
11729     // overwritten in the loop below.
11730     EVT PopcountVT = ScalarVT.changeTypeToInteger();
11731     SDValue Popcount = DAG.getNode(
11732         ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11733     Popcount =
11734         DAG.getNode(ISD::ZERO_EXTEND, DL,
11735                     MaskVT.changeVectorElementType(PopcountVT), Popcount);
11736     Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
11737     SDValue LastElmtPtr =
11738         getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11739     LastWriteVal = DAG.getLoad(
11740         ScalarVT, DL, Chain, LastElmtPtr,
11741         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11742     Chain = LastWriteVal.getValue(1);
11743   }
11744 
11745   unsigned NumElms = VecVT.getVectorNumElements();
11746   for (unsigned I = 0; I < NumElms; I++) {
11747     SDValue Idx = DAG.getVectorIdxConstant(I, DL);
11748 
11749     SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11750     SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11751     Chain = DAG.getStore(
11752         Chain, DL, ValI, OutPtr,
11753         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11754 
11755     // Get the mask value and add it to the current output position. This
11756     // either increments by 1 if MaskI is true or adds 0 otherwise.
11757     // Freeze in case we have poison/undef mask entries.
11758     SDValue MaskI = DAG.getFreeze(
11759         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11760     MaskI = DAG.getFreeze(MaskI);
11761     MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11762     MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11763     OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11764 
11765     if (HasPassthru && I == NumElms - 1) {
11766       SDValue EndOfVector =
11767           DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11768       SDValue AllLanesSelected =
11769           DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11770       OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11771       OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11772 
11773       // Re-write the last ValI if all lanes were selected. Otherwise,
11774       // overwrite the last write it with the passthru value.
11775       LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
11776                                    LastWriteVal, SDNodeFlags::Unpredictable);
11777       Chain = DAG.getStore(
11778           Chain, DL, LastWriteVal, OutPtr,
11779           MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11780     }
11781   }
11782 
11783   return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11784 }
11785 
11786 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
11787                                            SDValue &LHS, SDValue &RHS,
11788                                            SDValue &CC, SDValue Mask,
11789                                            SDValue EVL, bool &NeedInvert,
11790                                            const SDLoc &dl, SDValue &Chain,
11791                                            bool IsSignaling) const {
11792   MVT OpVT = LHS.getSimpleValueType();
11793   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11794   NeedInvert = false;
11795   assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11796   bool IsNonVP = !EVL;
11797   switch (getCondCodeAction(CCCode, OpVT)) {
11798   default:
11799     llvm_unreachable("Unknown condition code action!");
11800   case TargetLowering::Legal:
11801     // Nothing to do.
11802     break;
11803   case TargetLowering::Expand: {
11804     ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
11805     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11806       std::swap(LHS, RHS);
11807       CC = DAG.getCondCode(InvCC);
11808       return true;
11809     }
11810     // Swapping operands didn't work. Try inverting the condition.
11811     bool NeedSwap = false;
11812     InvCC = getSetCCInverse(CCCode, OpVT);
11813     if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
11814       // If inverting the condition is not enough, try swapping operands
11815       // on top of it.
11816       InvCC = ISD::getSetCCSwappedOperands(InvCC);
11817       NeedSwap = true;
11818     }
11819     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11820       CC = DAG.getCondCode(InvCC);
11821       NeedInvert = true;
11822       if (NeedSwap)
11823         std::swap(LHS, RHS);
11824       return true;
11825     }
11826 
11827     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
11828     unsigned Opc = 0;
11829     switch (CCCode) {
11830     default:
11831       llvm_unreachable("Don't know how to expand this condition!");
11832     case ISD::SETUO:
11833       if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
11834         CC1 = ISD::SETUNE;
11835         CC2 = ISD::SETUNE;
11836         Opc = ISD::OR;
11837         break;
11838       }
11839       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11840              "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11841       NeedInvert = true;
11842       [[fallthrough]];
11843     case ISD::SETO:
11844       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11845              "If SETO is expanded, SETOEQ must be legal!");
11846       CC1 = ISD::SETOEQ;
11847       CC2 = ISD::SETOEQ;
11848       Opc = ISD::AND;
11849       break;
11850     case ISD::SETONE:
11851     case ISD::SETUEQ:
11852       // If the SETUO or SETO CC isn't legal, we might be able to use
11853       // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11854       // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11855       // the operands.
11856       CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11857       if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
11858                                           isCondCodeLegal(ISD::SETOLT, OpVT))) {
11859         CC1 = ISD::SETOGT;
11860         CC2 = ISD::SETOLT;
11861         Opc = ISD::OR;
11862         NeedInvert = ((unsigned)CCCode & 0x8U);
11863         break;
11864       }
11865       [[fallthrough]];
11866     case ISD::SETOEQ:
11867     case ISD::SETOGT:
11868     case ISD::SETOGE:
11869     case ISD::SETOLT:
11870     case ISD::SETOLE:
11871     case ISD::SETUNE:
11872     case ISD::SETUGT:
11873     case ISD::SETUGE:
11874     case ISD::SETULT:
11875     case ISD::SETULE:
11876       // If we are floating point, assign and break, otherwise fall through.
11877       if (!OpVT.isInteger()) {
11878         // We can use the 4th bit to tell if we are the unordered
11879         // or ordered version of the opcode.
11880         CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11881         Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11882         CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11883         break;
11884       }
11885       // Fallthrough if we are unsigned integer.
11886       [[fallthrough]];
11887     case ISD::SETLE:
11888     case ISD::SETGT:
11889     case ISD::SETGE:
11890     case ISD::SETLT:
11891     case ISD::SETNE:
11892     case ISD::SETEQ:
11893       // If all combinations of inverting the condition and swapping operands
11894       // didn't work then we have no means to expand the condition.
11895       llvm_unreachable("Don't know how to expand this condition!");
11896     }
11897 
11898     SDValue SetCC1, SetCC2;
11899     if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11900       // If we aren't the ordered or unorder operation,
11901       // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11902       if (IsNonVP) {
11903         SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11904         SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11905       } else {
11906         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11907         SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11908       }
11909     } else {
11910       // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11911       if (IsNonVP) {
11912         SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11913         SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11914       } else {
11915         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11916         SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11917       }
11918     }
11919     if (Chain)
11920       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11921                           SetCC2.getValue(1));
11922     if (IsNonVP)
11923       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11924     else {
11925       // Transform the binary opcode to the VP equivalent.
11926       assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11927       Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11928       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11929     }
11930     RHS = SDValue();
11931     CC = SDValue();
11932     return true;
11933   }
11934   }
11935   return false;
11936 }
11937 
11938 SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
11939                                                       SelectionDAG &DAG) const {
11940   EVT VT = Node->getValueType(0);
11941   // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
11942   // split into two equal parts.
11943   if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
11944     return SDValue();
11945 
11946   // Restrict expansion to cases where both parts can be concatenated.
11947   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
11948   if (LoVT != HiVT || !isTypeLegal(LoVT))
11949     return SDValue();
11950 
11951   SDLoc DL(Node);
11952   unsigned Opcode = Node->getOpcode();
11953 
11954   // Don't expand if the result is likely to be unrolled anyway.
11955   if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
11956     return SDValue();
11957 
11958   SmallVector<SDValue, 4> LoOps, HiOps;
11959   for (const SDValue &V : Node->op_values()) {
11960     auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
11961     LoOps.push_back(Lo);
11962     HiOps.push_back(Hi);
11963   }
11964 
11965   SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
11966   SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
11967   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
11968 }
11969