xref: /llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (revision e2449f1bceeefd4a603cae024a7a1db763df6834)
1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Analysis/VectorUtils.h"
16 #include "llvm/CodeGen/CallingConvLower.h"
17 #include "llvm/CodeGen/CodeGenCommonISel.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineJumpTableInfo.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
23 #include "llvm/CodeGen/TargetRegisterInfo.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/Support/DivisionByConstantInfo.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Target/TargetMachine.h"
35 #include <cctype>
36 using namespace llvm;
37 
38 /// NOTE: The TargetMachine owns TLOF.
39 TargetLowering::TargetLowering(const TargetMachine &tm)
40     : TargetLoweringBase(tm) {}
41 
42 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
43   return nullptr;
44 }
45 
46 bool TargetLowering::isPositionIndependent() const {
47   return getTargetMachine().isPositionIndependent();
48 }
49 
50 /// Check whether a given call node is in tail position within its function. If
51 /// so, it sets Chain to the input chain of the tail call.
52 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
53                                           SDValue &Chain) const {
54   const Function &F = DAG.getMachineFunction().getFunction();
55 
56   // First, check if tail calls have been disabled in this function.
57   if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
58     return false;
59 
60   // Conservatively require the attributes of the call to match those of
61   // the return. Ignore following attributes because they don't affect the
62   // call sequence.
63   AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
64   for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
65                            Attribute::DereferenceableOrNull, Attribute::NoAlias,
66                            Attribute::NonNull, Attribute::NoUndef,
67                            Attribute::Range, Attribute::NoFPClass})
68     CallerAttrs.removeAttribute(Attr);
69 
70   if (CallerAttrs.hasAttributes())
71     return false;
72 
73   // It's not safe to eliminate the sign / zero extension of the return value.
74   if (CallerAttrs.contains(Attribute::ZExt) ||
75       CallerAttrs.contains(Attribute::SExt))
76     return false;
77 
78   // Check if the only use is a function return node.
79   return isUsedByReturnOnly(Node, Chain);
80 }
81 
82 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
83     const uint32_t *CallerPreservedMask,
84     const SmallVectorImpl<CCValAssign> &ArgLocs,
85     const SmallVectorImpl<SDValue> &OutVals) const {
86   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
87     const CCValAssign &ArgLoc = ArgLocs[I];
88     if (!ArgLoc.isRegLoc())
89       continue;
90     MCRegister Reg = ArgLoc.getLocReg();
91     // Only look at callee saved registers.
92     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
93       continue;
94     // Check that we pass the value used for the caller.
95     // (We look for a CopyFromReg reading a virtual register that is used
96     //  for the function live-in value of register Reg)
97     SDValue Value = OutVals[I];
98     if (Value->getOpcode() == ISD::AssertZext)
99       Value = Value.getOperand(0);
100     if (Value->getOpcode() != ISD::CopyFromReg)
101       return false;
102     Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
103     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
104       return false;
105   }
106   return true;
107 }
108 
109 /// Set CallLoweringInfo attribute flags based on a call instruction
110 /// and called function attributes.
111 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
112                                                      unsigned ArgIdx) {
113   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
114   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
115   IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
116   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
117   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
118   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
119   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
120   IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
121   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
122   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
123   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
124   IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
125   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
126   Alignment = Call->getParamStackAlign(ArgIdx);
127   IndirectType = nullptr;
128   assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
129          "multiple ABI attributes?");
130   if (IsByVal) {
131     IndirectType = Call->getParamByValType(ArgIdx);
132     if (!Alignment)
133       Alignment = Call->getParamAlign(ArgIdx);
134   }
135   if (IsPreallocated)
136     IndirectType = Call->getParamPreallocatedType(ArgIdx);
137   if (IsInAlloca)
138     IndirectType = Call->getParamInAllocaType(ArgIdx);
139   if (IsSRet)
140     IndirectType = Call->getParamStructRetType(ArgIdx);
141 }
142 
143 /// Generate a libcall taking the given operands as arguments and returning a
144 /// result of type RetVT.
145 std::pair<SDValue, SDValue>
146 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
147                             ArrayRef<SDValue> Ops,
148                             MakeLibCallOptions CallOptions,
149                             const SDLoc &dl,
150                             SDValue InChain) const {
151   if (!InChain)
152     InChain = DAG.getEntryNode();
153 
154   TargetLowering::ArgListTy Args;
155   Args.reserve(Ops.size());
156 
157   TargetLowering::ArgListEntry Entry;
158   for (unsigned i = 0; i < Ops.size(); ++i) {
159     SDValue NewOp = Ops[i];
160     Entry.Node = NewOp;
161     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
162     Entry.IsSExt =
163         shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
164     Entry.IsZExt = !Entry.IsSExt;
165 
166     if (CallOptions.IsSoften &&
167         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
168       Entry.IsSExt = Entry.IsZExt = false;
169     }
170     Args.push_back(Entry);
171   }
172 
173   if (LC == RTLIB::UNKNOWN_LIBCALL)
174     report_fatal_error("Unsupported library call operation!");
175   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
176                                          getPointerTy(DAG.getDataLayout()));
177 
178   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
179   TargetLowering::CallLoweringInfo CLI(DAG);
180   bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
181   bool zeroExtend = !signExtend;
182 
183   if (CallOptions.IsSoften &&
184       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
185     signExtend = zeroExtend = false;
186   }
187 
188   CLI.setDebugLoc(dl)
189       .setChain(InChain)
190       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
191       .setNoReturn(CallOptions.DoesNotReturn)
192       .setDiscardResult(!CallOptions.IsReturnValueUsed)
193       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
194       .setSExtResult(signExtend)
195       .setZExtResult(zeroExtend);
196   return LowerCallTo(CLI);
197 }
198 
199 bool TargetLowering::findOptimalMemOpLowering(
200     std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
201     unsigned SrcAS, const AttributeList &FuncAttributes) const {
202   if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
203       Op.getSrcAlign() < Op.getDstAlign())
204     return false;
205 
206   EVT VT = getOptimalMemOpType(Op, FuncAttributes);
207 
208   if (VT == MVT::Other) {
209     // Use the largest integer type whose alignment constraints are satisfied.
210     // We only need to check DstAlign here as SrcAlign is always greater or
211     // equal to DstAlign (or zero).
212     VT = MVT::LAST_INTEGER_VALUETYPE;
213     if (Op.isFixedDstAlign())
214       while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
215              !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
216         VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
217     assert(VT.isInteger());
218 
219     // Find the largest legal integer type.
220     MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
221     while (!isTypeLegal(LVT))
222       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
223     assert(LVT.isInteger());
224 
225     // If the type we've chosen is larger than the largest legal integer type
226     // then use that instead.
227     if (VT.bitsGT(LVT))
228       VT = LVT;
229   }
230 
231   unsigned NumMemOps = 0;
232   uint64_t Size = Op.size();
233   while (Size) {
234     unsigned VTSize = VT.getSizeInBits() / 8;
235     while (VTSize > Size) {
236       // For now, only use non-vector load / store's for the left-over pieces.
237       EVT NewVT = VT;
238       unsigned NewVTSize;
239 
240       bool Found = false;
241       if (VT.isVector() || VT.isFloatingPoint()) {
242         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
243         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
244             isSafeMemOpType(NewVT.getSimpleVT()))
245           Found = true;
246         else if (NewVT == MVT::i64 &&
247                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
248                  isSafeMemOpType(MVT::f64)) {
249           // i64 is usually not legal on 32-bit targets, but f64 may be.
250           NewVT = MVT::f64;
251           Found = true;
252         }
253       }
254 
255       if (!Found) {
256         do {
257           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
258           if (NewVT == MVT::i8)
259             break;
260         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
261       }
262       NewVTSize = NewVT.getSizeInBits() / 8;
263 
264       // If the new VT cannot cover all of the remaining bits, then consider
265       // issuing a (or a pair of) unaligned and overlapping load / store.
266       unsigned Fast;
267       if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
268           allowsMisalignedMemoryAccesses(
269               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
270               MachineMemOperand::MONone, &Fast) &&
271           Fast)
272         VTSize = Size;
273       else {
274         VT = NewVT;
275         VTSize = NewVTSize;
276       }
277     }
278 
279     if (++NumMemOps > Limit)
280       return false;
281 
282     MemOps.push_back(VT);
283     Size -= VTSize;
284   }
285 
286   return true;
287 }
288 
289 /// Soften the operands of a comparison. This code is shared among BR_CC,
290 /// SELECT_CC, and SETCC handlers.
291 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
292                                          SDValue &NewLHS, SDValue &NewRHS,
293                                          ISD::CondCode &CCCode,
294                                          const SDLoc &dl, const SDValue OldLHS,
295                                          const SDValue OldRHS) const {
296   SDValue Chain;
297   return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
298                              OldRHS, Chain);
299 }
300 
301 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
302                                          SDValue &NewLHS, SDValue &NewRHS,
303                                          ISD::CondCode &CCCode,
304                                          const SDLoc &dl, const SDValue OldLHS,
305                                          const SDValue OldRHS,
306                                          SDValue &Chain,
307                                          bool IsSignaling) const {
308   // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
309   // not supporting it. We can update this code when libgcc provides such
310   // functions.
311 
312   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
313          && "Unsupported setcc type!");
314 
315   // Expand into one or more soft-fp libcall(s).
316   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
317   bool ShouldInvertCC = false;
318   switch (CCCode) {
319   case ISD::SETEQ:
320   case ISD::SETOEQ:
321     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
322           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
323           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
324     break;
325   case ISD::SETNE:
326   case ISD::SETUNE:
327     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
328           (VT == MVT::f64) ? RTLIB::UNE_F64 :
329           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
330     break;
331   case ISD::SETGE:
332   case ISD::SETOGE:
333     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
334           (VT == MVT::f64) ? RTLIB::OGE_F64 :
335           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
336     break;
337   case ISD::SETLT:
338   case ISD::SETOLT:
339     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340           (VT == MVT::f64) ? RTLIB::OLT_F64 :
341           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342     break;
343   case ISD::SETLE:
344   case ISD::SETOLE:
345     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
346           (VT == MVT::f64) ? RTLIB::OLE_F64 :
347           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
348     break;
349   case ISD::SETGT:
350   case ISD::SETOGT:
351     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
352           (VT == MVT::f64) ? RTLIB::OGT_F64 :
353           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
354     break;
355   case ISD::SETO:
356     ShouldInvertCC = true;
357     [[fallthrough]];
358   case ISD::SETUO:
359     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
360           (VT == MVT::f64) ? RTLIB::UO_F64 :
361           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
362     break;
363   case ISD::SETONE:
364     // SETONE = O && UNE
365     ShouldInvertCC = true;
366     [[fallthrough]];
367   case ISD::SETUEQ:
368     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
369           (VT == MVT::f64) ? RTLIB::UO_F64 :
370           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
371     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
372           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
373           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
374     break;
375   default:
376     // Invert CC for unordered comparisons
377     ShouldInvertCC = true;
378     switch (CCCode) {
379     case ISD::SETULT:
380       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
381             (VT == MVT::f64) ? RTLIB::OGE_F64 :
382             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
383       break;
384     case ISD::SETULE:
385       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
386             (VT == MVT::f64) ? RTLIB::OGT_F64 :
387             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
388       break;
389     case ISD::SETUGT:
390       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
391             (VT == MVT::f64) ? RTLIB::OLE_F64 :
392             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
393       break;
394     case ISD::SETUGE:
395       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
396             (VT == MVT::f64) ? RTLIB::OLT_F64 :
397             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
398       break;
399     default: llvm_unreachable("Do not know how to soften this setcc!");
400     }
401   }
402 
403   // Use the target specific return value for comparison lib calls.
404   EVT RetVT = getCmpLibcallReturnType();
405   SDValue Ops[2] = {NewLHS, NewRHS};
406   TargetLowering::MakeLibCallOptions CallOptions;
407   EVT OpsVT[2] = { OldLHS.getValueType(),
408                    OldRHS.getValueType() };
409   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
410   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
411   NewLHS = Call.first;
412   NewRHS = DAG.getConstant(0, dl, RetVT);
413 
414   CCCode = getCmpLibcallCC(LC1);
415   if (ShouldInvertCC) {
416     assert(RetVT.isInteger());
417     CCCode = getSetCCInverse(CCCode, RetVT);
418   }
419 
420   if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
421     // Update Chain.
422     Chain = Call.second;
423   } else {
424     EVT SetCCVT =
425         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
426     SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
427     auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
428     CCCode = getCmpLibcallCC(LC2);
429     if (ShouldInvertCC)
430       CCCode = getSetCCInverse(CCCode, RetVT);
431     NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
432     if (Chain)
433       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
434                           Call2.second);
435     NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
436                          Tmp.getValueType(), Tmp, NewLHS);
437     NewRHS = SDValue();
438   }
439 }
440 
441 /// Return the entry encoding for a jump table in the current function. The
442 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
443 unsigned TargetLowering::getJumpTableEncoding() const {
444   // In non-pic modes, just use the address of a block.
445   if (!isPositionIndependent())
446     return MachineJumpTableInfo::EK_BlockAddress;
447 
448   // In PIC mode, if the target supports a GPRel32 directive, use it.
449   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
450     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
451 
452   // Otherwise, use a label difference.
453   return MachineJumpTableInfo::EK_LabelDifference32;
454 }
455 
456 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
457                                                  SelectionDAG &DAG) const {
458   // If our PIC model is GP relative, use the global offset table as the base.
459   unsigned JTEncoding = getJumpTableEncoding();
460 
461   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
462       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
463     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
464 
465   return Table;
466 }
467 
468 /// This returns the relocation base for the given PIC jumptable, the same as
469 /// getPICJumpTableRelocBase, but as an MCExpr.
470 const MCExpr *
471 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
472                                              unsigned JTI,MCContext &Ctx) const{
473   // The normal PIC reloc base is the label at the start of the jump table.
474   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
475 }
476 
477 SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
478                                                SDValue Addr, int JTI,
479                                                SelectionDAG &DAG) const {
480   SDValue Chain = Value;
481   // Jump table debug info is only needed if CodeView is enabled.
482   if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
483     Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
484   }
485   return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
486 }
487 
488 bool
489 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
490   const TargetMachine &TM = getTargetMachine();
491   const GlobalValue *GV = GA->getGlobal();
492 
493   // If the address is not even local to this DSO we will have to load it from
494   // a got and then add the offset.
495   if (!TM.shouldAssumeDSOLocal(GV))
496     return false;
497 
498   // If the code is position independent we will have to add a base register.
499   if (isPositionIndependent())
500     return false;
501 
502   // Otherwise we can do it.
503   return true;
504 }
505 
506 //===----------------------------------------------------------------------===//
507 //  Optimization Methods
508 //===----------------------------------------------------------------------===//
509 
510 /// If the specified instruction has a constant integer operand and there are
511 /// bits set in that constant that are not demanded, then clear those bits and
512 /// return true.
513 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
514                                             const APInt &DemandedBits,
515                                             const APInt &DemandedElts,
516                                             TargetLoweringOpt &TLO) const {
517   SDLoc DL(Op);
518   unsigned Opcode = Op.getOpcode();
519 
520   // Early-out if we've ended up calling an undemanded node, leave this to
521   // constant folding.
522   if (DemandedBits.isZero() || DemandedElts.isZero())
523     return false;
524 
525   // Do target-specific constant optimization.
526   if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
527     return TLO.New.getNode();
528 
529   // FIXME: ISD::SELECT, ISD::SELECT_CC
530   switch (Opcode) {
531   default:
532     break;
533   case ISD::XOR:
534   case ISD::AND:
535   case ISD::OR: {
536     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
537     if (!Op1C || Op1C->isOpaque())
538       return false;
539 
540     // If this is a 'not' op, don't touch it because that's a canonical form.
541     const APInt &C = Op1C->getAPIntValue();
542     if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
543       return false;
544 
545     if (!C.isSubsetOf(DemandedBits)) {
546       EVT VT = Op.getValueType();
547       SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
548       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
549                                       Op->getFlags());
550       return TLO.CombineTo(Op, NewOp);
551     }
552 
553     break;
554   }
555   }
556 
557   return false;
558 }
559 
560 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
561                                             const APInt &DemandedBits,
562                                             TargetLoweringOpt &TLO) const {
563   EVT VT = Op.getValueType();
564   APInt DemandedElts = VT.isVector()
565                            ? APInt::getAllOnes(VT.getVectorNumElements())
566                            : APInt(1, 1);
567   return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
568 }
569 
570 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
571 /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
572 /// but it could be generalized for targets with other types of implicit
573 /// widening casts.
574 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
575                                       const APInt &DemandedBits,
576                                       TargetLoweringOpt &TLO) const {
577   assert(Op.getNumOperands() == 2 &&
578          "ShrinkDemandedOp only supports binary operators!");
579   assert(Op.getNode()->getNumValues() == 1 &&
580          "ShrinkDemandedOp only supports nodes with one result!");
581 
582   EVT VT = Op.getValueType();
583   SelectionDAG &DAG = TLO.DAG;
584   SDLoc dl(Op);
585 
586   // Early return, as this function cannot handle vector types.
587   if (VT.isVector())
588     return false;
589 
590   assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
591          Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
592          "ShrinkDemandedOp only supports operands that have the same size!");
593 
594   // Don't do this if the node has another user, which may require the
595   // full value.
596   if (!Op.getNode()->hasOneUse())
597     return false;
598 
599   // Search for the smallest integer type with free casts to and from
600   // Op's type. For expedience, just check power-of-2 integer types.
601   unsigned DemandedSize = DemandedBits.getActiveBits();
602   for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
603        SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
604     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
605     if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
606       // We found a type with free casts.
607 
608       // If the operation has the 'disjoint' flag, then the
609       // operands on the new node are also disjoint.
610       SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
611                                                      : SDNodeFlags::None);
612       SDValue X = DAG.getNode(
613           Op.getOpcode(), dl, SmallVT,
614           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
615           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
616       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
617       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
618       return TLO.CombineTo(Op, Z);
619     }
620   }
621   return false;
622 }
623 
624 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
625                                           DAGCombinerInfo &DCI) const {
626   SelectionDAG &DAG = DCI.DAG;
627   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
628                         !DCI.isBeforeLegalizeOps());
629   KnownBits Known;
630 
631   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
632   if (Simplified) {
633     DCI.AddToWorklist(Op.getNode());
634     DCI.CommitTargetLoweringOpt(TLO);
635   }
636   return Simplified;
637 }
638 
639 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
640                                           const APInt &DemandedElts,
641                                           DAGCombinerInfo &DCI) const {
642   SelectionDAG &DAG = DCI.DAG;
643   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
644                         !DCI.isBeforeLegalizeOps());
645   KnownBits Known;
646 
647   bool Simplified =
648       SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
649   if (Simplified) {
650     DCI.AddToWorklist(Op.getNode());
651     DCI.CommitTargetLoweringOpt(TLO);
652   }
653   return Simplified;
654 }
655 
656 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
657                                           KnownBits &Known,
658                                           TargetLoweringOpt &TLO,
659                                           unsigned Depth,
660                                           bool AssumeSingleUse) const {
661   EVT VT = Op.getValueType();
662 
663   // Since the number of lanes in a scalable vector is unknown at compile time,
664   // we track one bit which is implicitly broadcast to all lanes.  This means
665   // that all lanes in a scalable vector are considered demanded.
666   APInt DemandedElts = VT.isFixedLengthVector()
667                            ? APInt::getAllOnes(VT.getVectorNumElements())
668                            : APInt(1, 1);
669   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
670                               AssumeSingleUse);
671 }
672 
673 // TODO: Under what circumstances can we create nodes? Constant folding?
674 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
675     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
676     SelectionDAG &DAG, unsigned Depth) const {
677   EVT VT = Op.getValueType();
678 
679   // Limit search depth.
680   if (Depth >= SelectionDAG::MaxRecursionDepth)
681     return SDValue();
682 
683   // Ignore UNDEFs.
684   if (Op.isUndef())
685     return SDValue();
686 
687   // Not demanding any bits/elts from Op.
688   if (DemandedBits == 0 || DemandedElts == 0)
689     return DAG.getUNDEF(VT);
690 
691   bool IsLE = DAG.getDataLayout().isLittleEndian();
692   unsigned NumElts = DemandedElts.getBitWidth();
693   unsigned BitWidth = DemandedBits.getBitWidth();
694   KnownBits LHSKnown, RHSKnown;
695   switch (Op.getOpcode()) {
696   case ISD::BITCAST: {
697     if (VT.isScalableVector())
698       return SDValue();
699 
700     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
701     EVT SrcVT = Src.getValueType();
702     EVT DstVT = Op.getValueType();
703     if (SrcVT == DstVT)
704       return Src;
705 
706     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
707     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
708     if (NumSrcEltBits == NumDstEltBits)
709       if (SDValue V = SimplifyMultipleUseDemandedBits(
710               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
711         return DAG.getBitcast(DstVT, V);
712 
713     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
714       unsigned Scale = NumDstEltBits / NumSrcEltBits;
715       unsigned NumSrcElts = SrcVT.getVectorNumElements();
716       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
717       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
718       for (unsigned i = 0; i != Scale; ++i) {
719         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
720         unsigned BitOffset = EltOffset * NumSrcEltBits;
721         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
722         if (!Sub.isZero()) {
723           DemandedSrcBits |= Sub;
724           for (unsigned j = 0; j != NumElts; ++j)
725             if (DemandedElts[j])
726               DemandedSrcElts.setBit((j * Scale) + i);
727         }
728       }
729 
730       if (SDValue V = SimplifyMultipleUseDemandedBits(
731               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
732         return DAG.getBitcast(DstVT, V);
733     }
734 
735     // TODO - bigendian once we have test coverage.
736     if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
737       unsigned Scale = NumSrcEltBits / NumDstEltBits;
738       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
739       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
740       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
741       for (unsigned i = 0; i != NumElts; ++i)
742         if (DemandedElts[i]) {
743           unsigned Offset = (i % Scale) * NumDstEltBits;
744           DemandedSrcBits.insertBits(DemandedBits, Offset);
745           DemandedSrcElts.setBit(i / Scale);
746         }
747 
748       if (SDValue V = SimplifyMultipleUseDemandedBits(
749               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
750         return DAG.getBitcast(DstVT, V);
751     }
752 
753     break;
754   }
755   case ISD::FREEZE: {
756     SDValue N0 = Op.getOperand(0);
757     if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
758                                              /*PoisonOnly=*/false))
759       return N0;
760     break;
761   }
762   case ISD::AND: {
763     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
764     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
765 
766     // If all of the demanded bits are known 1 on one side, return the other.
767     // These bits cannot contribute to the result of the 'and' in this
768     // context.
769     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
770       return Op.getOperand(0);
771     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
772       return Op.getOperand(1);
773     break;
774   }
775   case ISD::OR: {
776     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
777     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
778 
779     // If all of the demanded bits are known zero on one side, return the
780     // other.  These bits cannot contribute to the result of the 'or' in this
781     // context.
782     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
783       return Op.getOperand(0);
784     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
785       return Op.getOperand(1);
786     break;
787   }
788   case ISD::XOR: {
789     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
790     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
791 
792     // If all of the demanded bits are known zero on one side, return the
793     // other.
794     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
795       return Op.getOperand(0);
796     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
797       return Op.getOperand(1);
798     break;
799   }
800   case ISD::ADD: {
801     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
802     if (RHSKnown.isZero())
803       return Op.getOperand(0);
804 
805     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
806     if (LHSKnown.isZero())
807       return Op.getOperand(1);
808     break;
809   }
810   case ISD::SHL: {
811     // If we are only demanding sign bits then we can use the shift source
812     // directly.
813     if (std::optional<uint64_t> MaxSA =
814             DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
815       SDValue Op0 = Op.getOperand(0);
816       unsigned ShAmt = *MaxSA;
817       unsigned NumSignBits =
818           DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
819       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
820       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
821         return Op0;
822     }
823     break;
824   }
825   case ISD::SRL: {
826     // If we are only demanding sign bits then we can use the shift source
827     // directly.
828     if (std::optional<uint64_t> MaxSA =
829             DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
830       SDValue Op0 = Op.getOperand(0);
831       unsigned ShAmt = *MaxSA;
832       // Must already be signbits in DemandedBits bounds, and can't demand any
833       // shifted in zeroes.
834       if (DemandedBits.countl_zero() >= ShAmt) {
835         unsigned NumSignBits =
836             DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
837         if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
838           return Op0;
839       }
840     }
841     break;
842   }
843   case ISD::SETCC: {
844     SDValue Op0 = Op.getOperand(0);
845     SDValue Op1 = Op.getOperand(1);
846     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
847     // If (1) we only need the sign-bit, (2) the setcc operands are the same
848     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
849     // -1, we may be able to bypass the setcc.
850     if (DemandedBits.isSignMask() &&
851         Op0.getScalarValueSizeInBits() == BitWidth &&
852         getBooleanContents(Op0.getValueType()) ==
853             BooleanContent::ZeroOrNegativeOneBooleanContent) {
854       // If we're testing X < 0, then this compare isn't needed - just use X!
855       // FIXME: We're limiting to integer types here, but this should also work
856       // if we don't care about FP signed-zero. The use of SETLT with FP means
857       // that we don't care about NaNs.
858       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
859           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
860         return Op0;
861     }
862     break;
863   }
864   case ISD::SIGN_EXTEND_INREG: {
865     // If none of the extended bits are demanded, eliminate the sextinreg.
866     SDValue Op0 = Op.getOperand(0);
867     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
868     unsigned ExBits = ExVT.getScalarSizeInBits();
869     if (DemandedBits.getActiveBits() <= ExBits &&
870         shouldRemoveRedundantExtend(Op))
871       return Op0;
872     // If the input is already sign extended, just drop the extension.
873     unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
874     if (NumSignBits >= (BitWidth - ExBits + 1))
875       return Op0;
876     break;
877   }
878   case ISD::ANY_EXTEND_VECTOR_INREG:
879   case ISD::SIGN_EXTEND_VECTOR_INREG:
880   case ISD::ZERO_EXTEND_VECTOR_INREG: {
881     if (VT.isScalableVector())
882       return SDValue();
883 
884     // If we only want the lowest element and none of extended bits, then we can
885     // return the bitcasted source vector.
886     SDValue Src = Op.getOperand(0);
887     EVT SrcVT = Src.getValueType();
888     EVT DstVT = Op.getValueType();
889     if (IsLE && DemandedElts == 1 &&
890         DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
891         DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
892       return DAG.getBitcast(DstVT, Src);
893     }
894     break;
895   }
896   case ISD::INSERT_VECTOR_ELT: {
897     if (VT.isScalableVector())
898       return SDValue();
899 
900     // If we don't demand the inserted element, return the base vector.
901     SDValue Vec = Op.getOperand(0);
902     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
903     EVT VecVT = Vec.getValueType();
904     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
905         !DemandedElts[CIdx->getZExtValue()])
906       return Vec;
907     break;
908   }
909   case ISD::INSERT_SUBVECTOR: {
910     if (VT.isScalableVector())
911       return SDValue();
912 
913     SDValue Vec = Op.getOperand(0);
914     SDValue Sub = Op.getOperand(1);
915     uint64_t Idx = Op.getConstantOperandVal(2);
916     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
917     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
918     // If we don't demand the inserted subvector, return the base vector.
919     if (DemandedSubElts == 0)
920       return Vec;
921     break;
922   }
923   case ISD::VECTOR_SHUFFLE: {
924     assert(!VT.isScalableVector());
925     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
926 
927     // If all the demanded elts are from one operand and are inline,
928     // then we can use the operand directly.
929     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
930     for (unsigned i = 0; i != NumElts; ++i) {
931       int M = ShuffleMask[i];
932       if (M < 0 || !DemandedElts[i])
933         continue;
934       AllUndef = false;
935       IdentityLHS &= (M == (int)i);
936       IdentityRHS &= ((M - NumElts) == i);
937     }
938 
939     if (AllUndef)
940       return DAG.getUNDEF(Op.getValueType());
941     if (IdentityLHS)
942       return Op.getOperand(0);
943     if (IdentityRHS)
944       return Op.getOperand(1);
945     break;
946   }
947   default:
948     // TODO: Probably okay to remove after audit; here to reduce change size
949     // in initial enablement patch for scalable vectors
950     if (VT.isScalableVector())
951       return SDValue();
952 
953     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
954       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
955               Op, DemandedBits, DemandedElts, DAG, Depth))
956         return V;
957     break;
958   }
959   return SDValue();
960 }
961 
962 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
963     SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
964     unsigned Depth) const {
965   EVT VT = Op.getValueType();
966   // Since the number of lanes in a scalable vector is unknown at compile time,
967   // we track one bit which is implicitly broadcast to all lanes.  This means
968   // that all lanes in a scalable vector are considered demanded.
969   APInt DemandedElts = VT.isFixedLengthVector()
970                            ? APInt::getAllOnes(VT.getVectorNumElements())
971                            : APInt(1, 1);
972   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
973                                          Depth);
974 }
975 
976 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
977     SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
978     unsigned Depth) const {
979   APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
980   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
981                                          Depth);
982 }
983 
984 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
985 //      or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
986 static SDValue combineShiftToAVG(SDValue Op,
987                                  TargetLowering::TargetLoweringOpt &TLO,
988                                  const TargetLowering &TLI,
989                                  const APInt &DemandedBits,
990                                  const APInt &DemandedElts, unsigned Depth) {
991   assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
992          "SRL or SRA node is required here!");
993   // Is the right shift using an immediate value of 1?
994   ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
995   if (!N1C || !N1C->isOne())
996     return SDValue();
997 
998   // We are looking for an avgfloor
999   // add(ext, ext)
1000   // or one of these as a avgceil
1001   // add(add(ext, ext), 1)
1002   // add(add(ext, 1), ext)
1003   // add(ext, add(ext, 1))
1004   SDValue Add = Op.getOperand(0);
1005   if (Add.getOpcode() != ISD::ADD)
1006     return SDValue();
1007 
1008   SDValue ExtOpA = Add.getOperand(0);
1009   SDValue ExtOpB = Add.getOperand(1);
1010   SDValue Add2;
1011   auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1012     ConstantSDNode *ConstOp;
1013     if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1014         ConstOp->isOne()) {
1015       ExtOpA = Op1;
1016       ExtOpB = Op3;
1017       Add2 = A;
1018       return true;
1019     }
1020     if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1021         ConstOp->isOne()) {
1022       ExtOpA = Op1;
1023       ExtOpB = Op2;
1024       Add2 = A;
1025       return true;
1026     }
1027     return false;
1028   };
1029   bool IsCeil =
1030       (ExtOpA.getOpcode() == ISD::ADD &&
1031        MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1032       (ExtOpB.getOpcode() == ISD::ADD &&
1033        MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1034 
1035   // If the shift is signed (sra):
1036   //  - Needs >= 2 sign bit for both operands.
1037   //  - Needs >= 2 zero bits.
1038   // If the shift is unsigned (srl):
1039   //  - Needs >= 1 zero bit for both operands.
1040   //  - Needs 1 demanded bit zero and >= 2 sign bits.
1041   SelectionDAG &DAG = TLO.DAG;
1042   unsigned ShiftOpc = Op.getOpcode();
1043   bool IsSigned = false;
1044   unsigned KnownBits;
1045   unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1046   unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1047   unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1048   unsigned NumZeroA =
1049       DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1050   unsigned NumZeroB =
1051       DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1052   unsigned NumZero = std::min(NumZeroA, NumZeroB);
1053 
1054   switch (ShiftOpc) {
1055   default:
1056     llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1057   case ISD::SRA: {
1058     if (NumZero >= 2 && NumSigned < NumZero) {
1059       IsSigned = false;
1060       KnownBits = NumZero;
1061       break;
1062     }
1063     if (NumSigned >= 1) {
1064       IsSigned = true;
1065       KnownBits = NumSigned;
1066       break;
1067     }
1068     return SDValue();
1069   }
1070   case ISD::SRL: {
1071     if (NumZero >= 1 && NumSigned < NumZero) {
1072       IsSigned = false;
1073       KnownBits = NumZero;
1074       break;
1075     }
1076     if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1077       IsSigned = true;
1078       KnownBits = NumSigned;
1079       break;
1080     }
1081     return SDValue();
1082   }
1083   }
1084 
1085   unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1086                            : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1087 
1088   // Find the smallest power-2 type that is legal for this vector size and
1089   // operation, given the original type size and the number of known sign/zero
1090   // bits.
1091   EVT VT = Op.getValueType();
1092   unsigned MinWidth =
1093       std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1094   EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1095   if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1096     return SDValue();
1097   if (VT.isVector())
1098     NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1099   if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1100     // If we could not transform, and (both) adds are nuw/nsw, we can use the
1101     // larger type size to do the transform.
1102     if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1103       return SDValue();
1104     if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1105                                Add.getOperand(1)) &&
1106         (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1107                                          Add2.getOperand(1))))
1108       NVT = VT;
1109     else
1110       return SDValue();
1111   }
1112 
1113   // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1114   // this is likely to stop other folds (reassociation, value tracking etc.)
1115   if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1116       (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1117     return SDValue();
1118 
1119   SDLoc DL(Op);
1120   SDValue ResultAVG =
1121       DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1122                   DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1123   return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1124 }
1125 
1126 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1127 /// result of Op are ever used downstream. If we can use this information to
1128 /// simplify Op, create a new simplified DAG node and return true, returning the
1129 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1130 /// return a mask of Known bits for the expression (used to simplify the
1131 /// caller).  The Known bits may only be accurate for those bits in the
1132 /// OriginalDemandedBits and OriginalDemandedElts.
1133 bool TargetLowering::SimplifyDemandedBits(
1134     SDValue Op, const APInt &OriginalDemandedBits,
1135     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1136     unsigned Depth, bool AssumeSingleUse) const {
1137   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1138   assert(Op.getScalarValueSizeInBits() == BitWidth &&
1139          "Mask size mismatches value type size!");
1140 
1141   // Don't know anything.
1142   Known = KnownBits(BitWidth);
1143 
1144   EVT VT = Op.getValueType();
1145   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1146   unsigned NumElts = OriginalDemandedElts.getBitWidth();
1147   assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1148          "Unexpected vector size");
1149 
1150   APInt DemandedBits = OriginalDemandedBits;
1151   APInt DemandedElts = OriginalDemandedElts;
1152   SDLoc dl(Op);
1153 
1154   // Undef operand.
1155   if (Op.isUndef())
1156     return false;
1157 
1158   // We can't simplify target constants.
1159   if (Op.getOpcode() == ISD::TargetConstant)
1160     return false;
1161 
1162   if (Op.getOpcode() == ISD::Constant) {
1163     // We know all of the bits for a constant!
1164     Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1165     return false;
1166   }
1167 
1168   if (Op.getOpcode() == ISD::ConstantFP) {
1169     // We know all of the bits for a floating point constant!
1170     Known = KnownBits::makeConstant(
1171         cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1172     return false;
1173   }
1174 
1175   // Other users may use these bits.
1176   bool HasMultiUse = false;
1177   if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1178     if (Depth >= SelectionDAG::MaxRecursionDepth) {
1179       // Limit search depth.
1180       return false;
1181     }
1182     // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1183     DemandedBits = APInt::getAllOnes(BitWidth);
1184     DemandedElts = APInt::getAllOnes(NumElts);
1185     HasMultiUse = true;
1186   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1187     // Not demanding any bits/elts from Op.
1188     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1189   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1190     // Limit search depth.
1191     return false;
1192   }
1193 
1194   KnownBits Known2;
1195   switch (Op.getOpcode()) {
1196   case ISD::SCALAR_TO_VECTOR: {
1197     if (VT.isScalableVector())
1198       return false;
1199     if (!DemandedElts[0])
1200       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1201 
1202     KnownBits SrcKnown;
1203     SDValue Src = Op.getOperand(0);
1204     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1205     APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1206     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1207       return true;
1208 
1209     // Upper elements are undef, so only get the knownbits if we just demand
1210     // the bottom element.
1211     if (DemandedElts == 1)
1212       Known = SrcKnown.anyextOrTrunc(BitWidth);
1213     break;
1214   }
1215   case ISD::BUILD_VECTOR:
1216     // Collect the known bits that are shared by every demanded element.
1217     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1218     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1219     return false; // Don't fall through, will infinitely loop.
1220   case ISD::SPLAT_VECTOR: {
1221     SDValue Scl = Op.getOperand(0);
1222     APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1223     KnownBits KnownScl;
1224     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1225       return true;
1226 
1227     // Implicitly truncate the bits to match the official semantics of
1228     // SPLAT_VECTOR.
1229     Known = KnownScl.trunc(BitWidth);
1230     break;
1231   }
1232   case ISD::LOAD: {
1233     auto *LD = cast<LoadSDNode>(Op);
1234     if (getTargetConstantFromLoad(LD)) {
1235       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1236       return false; // Don't fall through, will infinitely loop.
1237     }
1238     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1239       // If this is a ZEXTLoad and we are looking at the loaded value.
1240       EVT MemVT = LD->getMemoryVT();
1241       unsigned MemBits = MemVT.getScalarSizeInBits();
1242       Known.Zero.setBitsFrom(MemBits);
1243       return false; // Don't fall through, will infinitely loop.
1244     }
1245     break;
1246   }
1247   case ISD::INSERT_VECTOR_ELT: {
1248     if (VT.isScalableVector())
1249       return false;
1250     SDValue Vec = Op.getOperand(0);
1251     SDValue Scl = Op.getOperand(1);
1252     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1253     EVT VecVT = Vec.getValueType();
1254 
1255     // If index isn't constant, assume we need all vector elements AND the
1256     // inserted element.
1257     APInt DemandedVecElts(DemandedElts);
1258     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1259       unsigned Idx = CIdx->getZExtValue();
1260       DemandedVecElts.clearBit(Idx);
1261 
1262       // Inserted element is not required.
1263       if (!DemandedElts[Idx])
1264         return TLO.CombineTo(Op, Vec);
1265     }
1266 
1267     KnownBits KnownScl;
1268     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1269     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1270     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1271       return true;
1272 
1273     Known = KnownScl.anyextOrTrunc(BitWidth);
1274 
1275     KnownBits KnownVec;
1276     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1277                              Depth + 1))
1278       return true;
1279 
1280     if (!!DemandedVecElts)
1281       Known = Known.intersectWith(KnownVec);
1282 
1283     return false;
1284   }
1285   case ISD::INSERT_SUBVECTOR: {
1286     if (VT.isScalableVector())
1287       return false;
1288     // Demand any elements from the subvector and the remainder from the src its
1289     // inserted into.
1290     SDValue Src = Op.getOperand(0);
1291     SDValue Sub = Op.getOperand(1);
1292     uint64_t Idx = Op.getConstantOperandVal(2);
1293     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1294     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1295     APInt DemandedSrcElts = DemandedElts;
1296     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1297 
1298     KnownBits KnownSub, KnownSrc;
1299     if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1300                              Depth + 1))
1301       return true;
1302     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1303                              Depth + 1))
1304       return true;
1305 
1306     Known.Zero.setAllBits();
1307     Known.One.setAllBits();
1308     if (!!DemandedSubElts)
1309       Known = Known.intersectWith(KnownSub);
1310     if (!!DemandedSrcElts)
1311       Known = Known.intersectWith(KnownSrc);
1312 
1313     // Attempt to avoid multi-use src if we don't need anything from it.
1314     if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1315         !DemandedSrcElts.isAllOnes()) {
1316       SDValue NewSub = SimplifyMultipleUseDemandedBits(
1317           Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1318       SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1319           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1320       if (NewSub || NewSrc) {
1321         NewSub = NewSub ? NewSub : Sub;
1322         NewSrc = NewSrc ? NewSrc : Src;
1323         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1324                                         Op.getOperand(2));
1325         return TLO.CombineTo(Op, NewOp);
1326       }
1327     }
1328     break;
1329   }
1330   case ISD::EXTRACT_SUBVECTOR: {
1331     if (VT.isScalableVector())
1332       return false;
1333     // Offset the demanded elts by the subvector index.
1334     SDValue Src = Op.getOperand(0);
1335     if (Src.getValueType().isScalableVector())
1336       break;
1337     uint64_t Idx = Op.getConstantOperandVal(1);
1338     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1339     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1340 
1341     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1342                              Depth + 1))
1343       return true;
1344 
1345     // Attempt to avoid multi-use src if we don't need anything from it.
1346     if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1347       SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1348           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1349       if (DemandedSrc) {
1350         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1351                                         Op.getOperand(1));
1352         return TLO.CombineTo(Op, NewOp);
1353       }
1354     }
1355     break;
1356   }
1357   case ISD::CONCAT_VECTORS: {
1358     if (VT.isScalableVector())
1359       return false;
1360     Known.Zero.setAllBits();
1361     Known.One.setAllBits();
1362     EVT SubVT = Op.getOperand(0).getValueType();
1363     unsigned NumSubVecs = Op.getNumOperands();
1364     unsigned NumSubElts = SubVT.getVectorNumElements();
1365     for (unsigned i = 0; i != NumSubVecs; ++i) {
1366       APInt DemandedSubElts =
1367           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1368       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1369                                Known2, TLO, Depth + 1))
1370         return true;
1371       // Known bits are shared by every demanded subvector element.
1372       if (!!DemandedSubElts)
1373         Known = Known.intersectWith(Known2);
1374     }
1375     break;
1376   }
1377   case ISD::VECTOR_SHUFFLE: {
1378     assert(!VT.isScalableVector());
1379     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1380 
1381     // Collect demanded elements from shuffle operands..
1382     APInt DemandedLHS, DemandedRHS;
1383     if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1384                                 DemandedRHS))
1385       break;
1386 
1387     if (!!DemandedLHS || !!DemandedRHS) {
1388       SDValue Op0 = Op.getOperand(0);
1389       SDValue Op1 = Op.getOperand(1);
1390 
1391       Known.Zero.setAllBits();
1392       Known.One.setAllBits();
1393       if (!!DemandedLHS) {
1394         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1395                                  Depth + 1))
1396           return true;
1397         Known = Known.intersectWith(Known2);
1398       }
1399       if (!!DemandedRHS) {
1400         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1401                                  Depth + 1))
1402           return true;
1403         Known = Known.intersectWith(Known2);
1404       }
1405 
1406       // Attempt to avoid multi-use ops if we don't need anything from them.
1407       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1408           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1409       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1410           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1411       if (DemandedOp0 || DemandedOp1) {
1412         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1413         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1414         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1415         return TLO.CombineTo(Op, NewOp);
1416       }
1417     }
1418     break;
1419   }
1420   case ISD::AND: {
1421     SDValue Op0 = Op.getOperand(0);
1422     SDValue Op1 = Op.getOperand(1);
1423 
1424     // If the RHS is a constant, check to see if the LHS would be zero without
1425     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1426     // simplify the LHS, here we're using information from the LHS to simplify
1427     // the RHS.
1428     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1429       // Do not increment Depth here; that can cause an infinite loop.
1430       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1431       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1432       if ((LHSKnown.Zero & DemandedBits) ==
1433           (~RHSC->getAPIntValue() & DemandedBits))
1434         return TLO.CombineTo(Op, Op0);
1435 
1436       // If any of the set bits in the RHS are known zero on the LHS, shrink
1437       // the constant.
1438       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1439                                  DemandedElts, TLO))
1440         return true;
1441 
1442       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1443       // constant, but if this 'and' is only clearing bits that were just set by
1444       // the xor, then this 'and' can be eliminated by shrinking the mask of
1445       // the xor. For example, for a 32-bit X:
1446       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1447       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1448           LHSKnown.One == ~RHSC->getAPIntValue()) {
1449         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1450         return TLO.CombineTo(Op, Xor);
1451       }
1452     }
1453 
1454     // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1455     // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1456     if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1457         (Op0.getOperand(0).isUndef() ||
1458          ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
1459         Op0->hasOneUse()) {
1460       unsigned NumSubElts =
1461           Op0.getOperand(1).getValueType().getVectorNumElements();
1462       unsigned SubIdx = Op0.getConstantOperandVal(2);
1463       APInt DemandedSub =
1464           APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1465       KnownBits KnownSubMask =
1466           TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1467       if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1468         SDValue NewAnd =
1469             TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1470         SDValue NewInsert =
1471             TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1472                             Op0.getOperand(1), Op0.getOperand(2));
1473         return TLO.CombineTo(Op, NewInsert);
1474       }
1475     }
1476 
1477     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1478                              Depth + 1))
1479       return true;
1480     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1481                              Known2, TLO, Depth + 1))
1482       return true;
1483 
1484     // If all of the demanded bits are known one on one side, return the other.
1485     // These bits cannot contribute to the result of the 'and'.
1486     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1487       return TLO.CombineTo(Op, Op0);
1488     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1489       return TLO.CombineTo(Op, Op1);
1490     // If all of the demanded bits in the inputs are known zeros, return zero.
1491     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1492       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1493     // If the RHS is a constant, see if we can simplify it.
1494     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1495                                TLO))
1496       return true;
1497     // If the operation can be done in a smaller type, do so.
1498     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1499       return true;
1500 
1501     // Attempt to avoid multi-use ops if we don't need anything from them.
1502     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1503       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1504           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1505       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1506           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1507       if (DemandedOp0 || DemandedOp1) {
1508         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1509         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1510         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1511         return TLO.CombineTo(Op, NewOp);
1512       }
1513     }
1514 
1515     Known &= Known2;
1516     break;
1517   }
1518   case ISD::OR: {
1519     SDValue Op0 = Op.getOperand(0);
1520     SDValue Op1 = Op.getOperand(1);
1521     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1522                              Depth + 1)) {
1523       Op->dropFlags(SDNodeFlags::Disjoint);
1524       return true;
1525     }
1526 
1527     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1528                              Known2, TLO, Depth + 1)) {
1529       Op->dropFlags(SDNodeFlags::Disjoint);
1530       return true;
1531     }
1532 
1533     // If all of the demanded bits are known zero on one side, return the other.
1534     // These bits cannot contribute to the result of the 'or'.
1535     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1536       return TLO.CombineTo(Op, Op0);
1537     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1538       return TLO.CombineTo(Op, Op1);
1539     // If the RHS is a constant, see if we can simplify it.
1540     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1541       return true;
1542     // If the operation can be done in a smaller type, do so.
1543     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1544       return true;
1545 
1546     // Attempt to avoid multi-use ops if we don't need anything from them.
1547     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1548       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1549           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1550       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1551           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1552       if (DemandedOp0 || DemandedOp1) {
1553         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1554         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1555         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1556         return TLO.CombineTo(Op, NewOp);
1557       }
1558     }
1559 
1560     // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1561     // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1562     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1563         Op0->hasOneUse() && Op1->hasOneUse()) {
1564       // Attempt to match all commutations - m_c_Or would've been useful!
1565       for (int I = 0; I != 2; ++I) {
1566         SDValue X = Op.getOperand(I).getOperand(0);
1567         SDValue C1 = Op.getOperand(I).getOperand(1);
1568         SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1569         SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1570         if (Alt.getOpcode() == ISD::OR) {
1571           for (int J = 0; J != 2; ++J) {
1572             if (X == Alt.getOperand(J)) {
1573               SDValue Y = Alt.getOperand(1 - J);
1574               if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1575                                                                {C1, C2})) {
1576                 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1577                 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1578                 return TLO.CombineTo(
1579                     Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1580               }
1581             }
1582           }
1583         }
1584       }
1585     }
1586 
1587     Known |= Known2;
1588     break;
1589   }
1590   case ISD::XOR: {
1591     SDValue Op0 = Op.getOperand(0);
1592     SDValue Op1 = Op.getOperand(1);
1593 
1594     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1595                              Depth + 1))
1596       return true;
1597     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1598                              Depth + 1))
1599       return true;
1600 
1601     // If all of the demanded bits are known zero on one side, return the other.
1602     // These bits cannot contribute to the result of the 'xor'.
1603     if (DemandedBits.isSubsetOf(Known.Zero))
1604       return TLO.CombineTo(Op, Op0);
1605     if (DemandedBits.isSubsetOf(Known2.Zero))
1606       return TLO.CombineTo(Op, Op1);
1607     // If the operation can be done in a smaller type, do so.
1608     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1609       return true;
1610 
1611     // If all of the unknown bits are known to be zero on one side or the other
1612     // turn this into an *inclusive* or.
1613     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1614     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1615       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1616 
1617     ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1618     if (C) {
1619       // If one side is a constant, and all of the set bits in the constant are
1620       // also known set on the other side, turn this into an AND, as we know
1621       // the bits will be cleared.
1622       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1623       // NB: it is okay if more bits are known than are requested
1624       if (C->getAPIntValue() == Known2.One) {
1625         SDValue ANDC =
1626             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1627         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1628       }
1629 
1630       // If the RHS is a constant, see if we can change it. Don't alter a -1
1631       // constant because that's a 'not' op, and that is better for combining
1632       // and codegen.
1633       if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1634         // We're flipping all demanded bits. Flip the undemanded bits too.
1635         SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1636         return TLO.CombineTo(Op, New);
1637       }
1638 
1639       unsigned Op0Opcode = Op0.getOpcode();
1640       if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1641         if (ConstantSDNode *ShiftC =
1642                 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1643           // Don't crash on an oversized shift. We can not guarantee that a
1644           // bogus shift has been simplified to undef.
1645           if (ShiftC->getAPIntValue().ult(BitWidth)) {
1646             uint64_t ShiftAmt = ShiftC->getZExtValue();
1647             APInt Ones = APInt::getAllOnes(BitWidth);
1648             Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1649                                          : Ones.lshr(ShiftAmt);
1650             if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1651                 isDesirableToCommuteXorWithShift(Op.getNode())) {
1652               // If the xor constant is a demanded mask, do a 'not' before the
1653               // shift:
1654               // xor (X << ShiftC), XorC --> (not X) << ShiftC
1655               // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1656               SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1657               return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1658                                                        Op0.getOperand(1)));
1659             }
1660           }
1661         }
1662       }
1663     }
1664 
1665     // If we can't turn this into a 'not', try to shrink the constant.
1666     if (!C || !C->isAllOnes())
1667       if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1668         return true;
1669 
1670     // Attempt to avoid multi-use ops if we don't need anything from them.
1671     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1672       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1673           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1674       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1675           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1676       if (DemandedOp0 || DemandedOp1) {
1677         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1678         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1679         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1680         return TLO.CombineTo(Op, NewOp);
1681       }
1682     }
1683 
1684     Known ^= Known2;
1685     break;
1686   }
1687   case ISD::SELECT:
1688     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1689                              Known, TLO, Depth + 1))
1690       return true;
1691     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1692                              Known2, TLO, Depth + 1))
1693       return true;
1694 
1695     // If the operands are constants, see if we can simplify them.
1696     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1697       return true;
1698 
1699     // Only known if known in both the LHS and RHS.
1700     Known = Known.intersectWith(Known2);
1701     break;
1702   case ISD::VSELECT:
1703     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1704                              Known, TLO, Depth + 1))
1705       return true;
1706     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1707                              Known2, TLO, Depth + 1))
1708       return true;
1709 
1710     // Only known if known in both the LHS and RHS.
1711     Known = Known.intersectWith(Known2);
1712     break;
1713   case ISD::SELECT_CC:
1714     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1715                              Known, TLO, Depth + 1))
1716       return true;
1717     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1718                              Known2, TLO, Depth + 1))
1719       return true;
1720 
1721     // If the operands are constants, see if we can simplify them.
1722     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1723       return true;
1724 
1725     // Only known if known in both the LHS and RHS.
1726     Known = Known.intersectWith(Known2);
1727     break;
1728   case ISD::SETCC: {
1729     SDValue Op0 = Op.getOperand(0);
1730     SDValue Op1 = Op.getOperand(1);
1731     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1732     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1733     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1734     // -1, we may be able to bypass the setcc.
1735     if (DemandedBits.isSignMask() &&
1736         Op0.getScalarValueSizeInBits() == BitWidth &&
1737         getBooleanContents(Op0.getValueType()) ==
1738             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1739       // If we're testing X < 0, then this compare isn't needed - just use X!
1740       // FIXME: We're limiting to integer types here, but this should also work
1741       // if we don't care about FP signed-zero. The use of SETLT with FP means
1742       // that we don't care about NaNs.
1743       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1744           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1745         return TLO.CombineTo(Op, Op0);
1746 
1747       // TODO: Should we check for other forms of sign-bit comparisons?
1748       // Examples: X <= -1, X >= 0
1749     }
1750     if (getBooleanContents(Op0.getValueType()) ==
1751             TargetLowering::ZeroOrOneBooleanContent &&
1752         BitWidth > 1)
1753       Known.Zero.setBitsFrom(1);
1754     break;
1755   }
1756   case ISD::SHL: {
1757     SDValue Op0 = Op.getOperand(0);
1758     SDValue Op1 = Op.getOperand(1);
1759     EVT ShiftVT = Op1.getValueType();
1760 
1761     if (std::optional<uint64_t> KnownSA =
1762             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1763       unsigned ShAmt = *KnownSA;
1764       if (ShAmt == 0)
1765         return TLO.CombineTo(Op, Op0);
1766 
1767       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1768       // single shift.  We can do this if the bottom bits (which are shifted
1769       // out) are never demanded.
1770       // TODO - support non-uniform vector amounts.
1771       if (Op0.getOpcode() == ISD::SRL) {
1772         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1773           if (std::optional<uint64_t> InnerSA =
1774                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1775             unsigned C1 = *InnerSA;
1776             unsigned Opc = ISD::SHL;
1777             int Diff = ShAmt - C1;
1778             if (Diff < 0) {
1779               Diff = -Diff;
1780               Opc = ISD::SRL;
1781             }
1782             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1783             return TLO.CombineTo(
1784                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1785           }
1786         }
1787       }
1788 
1789       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1790       // are not demanded. This will likely allow the anyext to be folded away.
1791       // TODO - support non-uniform vector amounts.
1792       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1793         SDValue InnerOp = Op0.getOperand(0);
1794         EVT InnerVT = InnerOp.getValueType();
1795         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1796         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1797             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1798           SDValue NarrowShl = TLO.DAG.getNode(
1799               ISD::SHL, dl, InnerVT, InnerOp,
1800               TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1801           return TLO.CombineTo(
1802               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1803         }
1804 
1805         // Repeat the SHL optimization above in cases where an extension
1806         // intervenes: (shl (anyext (shr x, c1)), c2) to
1807         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1808         // aren't demanded (as above) and that the shifted upper c1 bits of
1809         // x aren't demanded.
1810         // TODO - support non-uniform vector amounts.
1811         if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1812             InnerOp.hasOneUse()) {
1813           if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1814                   InnerOp, DemandedElts, Depth + 2)) {
1815             unsigned InnerShAmt = *SA2;
1816             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1817                 DemandedBits.getActiveBits() <=
1818                     (InnerBits - InnerShAmt + ShAmt) &&
1819                 DemandedBits.countr_zero() >= ShAmt) {
1820               SDValue NewSA =
1821                   TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1822               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1823                                                InnerOp.getOperand(0));
1824               return TLO.CombineTo(
1825                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1826             }
1827           }
1828         }
1829       }
1830 
1831       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1832       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1833                                Depth + 1)) {
1834         // Disable the nsw and nuw flags. We can no longer guarantee that we
1835         // won't wrap after simplification.
1836         Op->dropFlags(SDNodeFlags::NoWrap);
1837         return true;
1838       }
1839       Known.Zero <<= ShAmt;
1840       Known.One <<= ShAmt;
1841       // low bits known zero.
1842       Known.Zero.setLowBits(ShAmt);
1843 
1844       // Attempt to avoid multi-use ops if we don't need anything from them.
1845       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1846         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1847             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1848         if (DemandedOp0) {
1849           SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1850           return TLO.CombineTo(Op, NewOp);
1851         }
1852       }
1853 
1854       // TODO: Can we merge this fold with the one below?
1855       // Try shrinking the operation as long as the shift amount will still be
1856       // in range.
1857       if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1858           Op.getNode()->hasOneUse()) {
1859         // Search for the smallest integer type with free casts to and from
1860         // Op's type. For expedience, just check power-of-2 integer types.
1861         unsigned DemandedSize = DemandedBits.getActiveBits();
1862         for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1863              SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1864           EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1865           if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1866               isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1867               isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1868               (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1869             assert(DemandedSize <= SmallVTBits &&
1870                    "Narrowed below demanded bits?");
1871             // We found a type with free casts.
1872             SDValue NarrowShl = TLO.DAG.getNode(
1873                 ISD::SHL, dl, SmallVT,
1874                 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1875                 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1876             return TLO.CombineTo(
1877                 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1878           }
1879         }
1880       }
1881 
1882       // Narrow shift to lower half - similar to ShrinkDemandedOp.
1883       // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1884       // Only do this if we demand the upper half so the knownbits are correct.
1885       unsigned HalfWidth = BitWidth / 2;
1886       if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1887           DemandedBits.countLeadingOnes() >= HalfWidth) {
1888         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1889         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1890             isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1891             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1892             (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1893           // If we're demanding the upper bits at all, we must ensure
1894           // that the upper bits of the shift result are known to be zero,
1895           // which is equivalent to the narrow shift being NUW.
1896           if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1897             bool IsNSW = Known.countMinSignBits() > HalfWidth;
1898             SDNodeFlags Flags;
1899             Flags.setNoSignedWrap(IsNSW);
1900             Flags.setNoUnsignedWrap(IsNUW);
1901             SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1902             SDValue NewShiftAmt =
1903                 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1904             SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1905                                                NewShiftAmt, Flags);
1906             SDValue NewExt =
1907                 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1908             return TLO.CombineTo(Op, NewExt);
1909           }
1910         }
1911       }
1912     } else {
1913       // This is a variable shift, so we can't shift the demand mask by a known
1914       // amount. But if we are not demanding high bits, then we are not
1915       // demanding those bits from the pre-shifted operand either.
1916       if (unsigned CTLZ = DemandedBits.countl_zero()) {
1917         APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1918         if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1919                                  Depth + 1)) {
1920           // Disable the nsw and nuw flags. We can no longer guarantee that we
1921           // won't wrap after simplification.
1922           Op->dropFlags(SDNodeFlags::NoWrap);
1923           return true;
1924         }
1925         Known.resetAll();
1926       }
1927     }
1928 
1929     // If we are only demanding sign bits then we can use the shift source
1930     // directly.
1931     if (std::optional<uint64_t> MaxSA =
1932             TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1933       unsigned ShAmt = *MaxSA;
1934       unsigned NumSignBits =
1935           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1936       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1937       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1938         return TLO.CombineTo(Op, Op0);
1939     }
1940     break;
1941   }
1942   case ISD::SRL: {
1943     SDValue Op0 = Op.getOperand(0);
1944     SDValue Op1 = Op.getOperand(1);
1945     EVT ShiftVT = Op1.getValueType();
1946 
1947     if (std::optional<uint64_t> KnownSA =
1948             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1949       unsigned ShAmt = *KnownSA;
1950       if (ShAmt == 0)
1951         return TLO.CombineTo(Op, Op0);
1952 
1953       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1954       // single shift.  We can do this if the top bits (which are shifted out)
1955       // are never demanded.
1956       // TODO - support non-uniform vector amounts.
1957       if (Op0.getOpcode() == ISD::SHL) {
1958         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1959           if (std::optional<uint64_t> InnerSA =
1960                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1961             unsigned C1 = *InnerSA;
1962             unsigned Opc = ISD::SRL;
1963             int Diff = ShAmt - C1;
1964             if (Diff < 0) {
1965               Diff = -Diff;
1966               Opc = ISD::SHL;
1967             }
1968             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1969             return TLO.CombineTo(
1970                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1971           }
1972         }
1973       }
1974 
1975       // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1976       // single sra. We can do this if the top bits are never demanded.
1977       if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1978         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1979           if (std::optional<uint64_t> InnerSA =
1980                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1981             unsigned C1 = *InnerSA;
1982             // Clamp the combined shift amount if it exceeds the bit width.
1983             unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
1984             SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
1985             return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
1986                                                      Op0.getOperand(0), NewSA));
1987           }
1988         }
1989       }
1990 
1991       APInt InDemandedMask = (DemandedBits << ShAmt);
1992 
1993       // If the shift is exact, then it does demand the low bits (and knows that
1994       // they are zero).
1995       if (Op->getFlags().hasExact())
1996         InDemandedMask.setLowBits(ShAmt);
1997 
1998       // Narrow shift to lower half - similar to ShrinkDemandedOp.
1999       // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2000       if ((BitWidth % 2) == 0 && !VT.isVector()) {
2001         APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2);
2002         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2003         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2004             isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2005             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2006             (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2007             ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2008              TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2009           SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2010           SDValue NewShiftAmt =
2011               TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2012           SDValue NewShift =
2013               TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2014           return TLO.CombineTo(
2015               Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2016         }
2017       }
2018 
2019       // Compute the new bits that are at the top now.
2020       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2021                                Depth + 1))
2022         return true;
2023       Known.Zero.lshrInPlace(ShAmt);
2024       Known.One.lshrInPlace(ShAmt);
2025       // High bits known zero.
2026       Known.Zero.setHighBits(ShAmt);
2027 
2028       // Attempt to avoid multi-use ops if we don't need anything from them.
2029       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2030         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2031             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2032         if (DemandedOp0) {
2033           SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2034           return TLO.CombineTo(Op, NewOp);
2035         }
2036       }
2037     } else {
2038       // Use generic knownbits computation as it has support for non-uniform
2039       // shift amounts.
2040       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2041     }
2042 
2043     // If we are only demanding sign bits then we can use the shift source
2044     // directly.
2045     if (std::optional<uint64_t> MaxSA =
2046             TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2047       unsigned ShAmt = *MaxSA;
2048       // Must already be signbits in DemandedBits bounds, and can't demand any
2049       // shifted in zeroes.
2050       if (DemandedBits.countl_zero() >= ShAmt) {
2051         unsigned NumSignBits =
2052             TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2053         if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2054           return TLO.CombineTo(Op, Op0);
2055       }
2056     }
2057 
2058     // Try to match AVG patterns (after shift simplification).
2059     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2060                                         DemandedElts, Depth + 1))
2061       return TLO.CombineTo(Op, AVG);
2062 
2063     break;
2064   }
2065   case ISD::SRA: {
2066     SDValue Op0 = Op.getOperand(0);
2067     SDValue Op1 = Op.getOperand(1);
2068     EVT ShiftVT = Op1.getValueType();
2069 
2070     // If we only want bits that already match the signbit then we don't need
2071     // to shift.
2072     unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2073     if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2074         NumHiDemandedBits)
2075       return TLO.CombineTo(Op, Op0);
2076 
2077     // If this is an arithmetic shift right and only the low-bit is set, we can
2078     // always convert this into a logical shr, even if the shift amount is
2079     // variable.  The low bit of the shift cannot be an input sign bit unless
2080     // the shift amount is >= the size of the datatype, which is undefined.
2081     if (DemandedBits.isOne())
2082       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2083 
2084     if (std::optional<uint64_t> KnownSA =
2085             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2086       unsigned ShAmt = *KnownSA;
2087       if (ShAmt == 0)
2088         return TLO.CombineTo(Op, Op0);
2089 
2090       // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2091       // supports sext_inreg.
2092       if (Op0.getOpcode() == ISD::SHL) {
2093         if (std::optional<uint64_t> InnerSA =
2094                 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2095           unsigned LowBits = BitWidth - ShAmt;
2096           EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2097           if (VT.isVector())
2098             ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2099                                      VT.getVectorElementCount());
2100 
2101           if (*InnerSA == ShAmt) {
2102             if (!TLO.LegalOperations() ||
2103                 getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
2104               return TLO.CombineTo(
2105                   Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2106                                       Op0.getOperand(0),
2107                                       TLO.DAG.getValueType(ExtVT)));
2108 
2109             // Even if we can't convert to sext_inreg, we might be able to
2110             // remove this shift pair if the input is already sign extended.
2111             unsigned NumSignBits =
2112                 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2113             if (NumSignBits > ShAmt)
2114               return TLO.CombineTo(Op, Op0.getOperand(0));
2115           }
2116         }
2117       }
2118 
2119       APInt InDemandedMask = (DemandedBits << ShAmt);
2120 
2121       // If the shift is exact, then it does demand the low bits (and knows that
2122       // they are zero).
2123       if (Op->getFlags().hasExact())
2124         InDemandedMask.setLowBits(ShAmt);
2125 
2126       // If any of the demanded bits are produced by the sign extension, we also
2127       // demand the input sign bit.
2128       if (DemandedBits.countl_zero() < ShAmt)
2129         InDemandedMask.setSignBit();
2130 
2131       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2132                                Depth + 1))
2133         return true;
2134       Known.Zero.lshrInPlace(ShAmt);
2135       Known.One.lshrInPlace(ShAmt);
2136 
2137       // If the input sign bit is known to be zero, or if none of the top bits
2138       // are demanded, turn this into an unsigned shift right.
2139       if (Known.Zero[BitWidth - ShAmt - 1] ||
2140           DemandedBits.countl_zero() >= ShAmt) {
2141         SDNodeFlags Flags;
2142         Flags.setExact(Op->getFlags().hasExact());
2143         return TLO.CombineTo(
2144             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2145       }
2146 
2147       int Log2 = DemandedBits.exactLogBase2();
2148       if (Log2 >= 0) {
2149         // The bit must come from the sign.
2150         SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2151         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2152       }
2153 
2154       if (Known.One[BitWidth - ShAmt - 1])
2155         // New bits are known one.
2156         Known.One.setHighBits(ShAmt);
2157 
2158       // Attempt to avoid multi-use ops if we don't need anything from them.
2159       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2160         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2161             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2162         if (DemandedOp0) {
2163           SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2164           return TLO.CombineTo(Op, NewOp);
2165         }
2166       }
2167     }
2168 
2169     // Try to match AVG patterns (after shift simplification).
2170     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2171                                         DemandedElts, Depth + 1))
2172       return TLO.CombineTo(Op, AVG);
2173 
2174     break;
2175   }
2176   case ISD::FSHL:
2177   case ISD::FSHR: {
2178     SDValue Op0 = Op.getOperand(0);
2179     SDValue Op1 = Op.getOperand(1);
2180     SDValue Op2 = Op.getOperand(2);
2181     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2182 
2183     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2184       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2185 
2186       // For fshl, 0-shift returns the 1st arg.
2187       // For fshr, 0-shift returns the 2nd arg.
2188       if (Amt == 0) {
2189         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2190                                  Known, TLO, Depth + 1))
2191           return true;
2192         break;
2193       }
2194 
2195       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2196       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2197       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2198       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2199       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2200                                Depth + 1))
2201         return true;
2202       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2203                                Depth + 1))
2204         return true;
2205 
2206       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2207       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2208       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2209       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2210       Known = Known.unionWith(Known2);
2211 
2212       // Attempt to avoid multi-use ops if we don't need anything from them.
2213       if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2214           !DemandedElts.isAllOnes()) {
2215         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2216             Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2217         SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2218             Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2219         if (DemandedOp0 || DemandedOp1) {
2220           DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2221           DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2222           SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2223                                           DemandedOp1, Op2);
2224           return TLO.CombineTo(Op, NewOp);
2225         }
2226       }
2227     }
2228 
2229     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2230     if (isPowerOf2_32(BitWidth)) {
2231       APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2232       if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2233                                Known2, TLO, Depth + 1))
2234         return true;
2235     }
2236     break;
2237   }
2238   case ISD::ROTL:
2239   case ISD::ROTR: {
2240     SDValue Op0 = Op.getOperand(0);
2241     SDValue Op1 = Op.getOperand(1);
2242     bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2243 
2244     // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2245     if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2246       return TLO.CombineTo(Op, Op0);
2247 
2248     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2249       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2250       unsigned RevAmt = BitWidth - Amt;
2251 
2252       // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2253       // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2254       APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2255       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2256                                Depth + 1))
2257         return true;
2258 
2259       // rot*(x, 0) --> x
2260       if (Amt == 0)
2261         return TLO.CombineTo(Op, Op0);
2262 
2263       // See if we don't demand either half of the rotated bits.
2264       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2265           DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2266         Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2267         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2268       }
2269       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2270           DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2271         Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2272         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2273       }
2274     }
2275 
2276     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2277     if (isPowerOf2_32(BitWidth)) {
2278       APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2279       if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2280                                Depth + 1))
2281         return true;
2282     }
2283     break;
2284   }
2285   case ISD::SMIN:
2286   case ISD::SMAX:
2287   case ISD::UMIN:
2288   case ISD::UMAX: {
2289     unsigned Opc = Op.getOpcode();
2290     SDValue Op0 = Op.getOperand(0);
2291     SDValue Op1 = Op.getOperand(1);
2292 
2293     // If we're only demanding signbits, then we can simplify to OR/AND node.
2294     unsigned BitOp =
2295         (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2296     unsigned NumSignBits =
2297         std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2298                  TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2299     unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2300     if (NumSignBits >= NumDemandedUpperBits)
2301       return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2302 
2303     // Check if one arg is always less/greater than (or equal) to the other arg.
2304     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2305     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2306     switch (Opc) {
2307     case ISD::SMIN:
2308       if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2309         return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2310       if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2311         return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2312       Known = KnownBits::smin(Known0, Known1);
2313       break;
2314     case ISD::SMAX:
2315       if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2316         return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2317       if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2318         return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2319       Known = KnownBits::smax(Known0, Known1);
2320       break;
2321     case ISD::UMIN:
2322       if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2323         return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2324       if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2325         return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2326       Known = KnownBits::umin(Known0, Known1);
2327       break;
2328     case ISD::UMAX:
2329       if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2330         return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2331       if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2332         return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2333       Known = KnownBits::umax(Known0, Known1);
2334       break;
2335     }
2336     break;
2337   }
2338   case ISD::BITREVERSE: {
2339     SDValue Src = Op.getOperand(0);
2340     APInt DemandedSrcBits = DemandedBits.reverseBits();
2341     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2342                              Depth + 1))
2343       return true;
2344     Known.One = Known2.One.reverseBits();
2345     Known.Zero = Known2.Zero.reverseBits();
2346     break;
2347   }
2348   case ISD::BSWAP: {
2349     SDValue Src = Op.getOperand(0);
2350 
2351     // If the only bits demanded come from one byte of the bswap result,
2352     // just shift the input byte into position to eliminate the bswap.
2353     unsigned NLZ = DemandedBits.countl_zero();
2354     unsigned NTZ = DemandedBits.countr_zero();
2355 
2356     // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
2357     // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
2358     // have 14 leading zeros, round to 8.
2359     NLZ = alignDown(NLZ, 8);
2360     NTZ = alignDown(NTZ, 8);
2361     // If we need exactly one byte, we can do this transformation.
2362     if (BitWidth - NLZ - NTZ == 8) {
2363       // Replace this with either a left or right shift to get the byte into
2364       // the right place.
2365       unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2366       if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2367         unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2368         SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2369         SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2370         return TLO.CombineTo(Op, NewOp);
2371       }
2372     }
2373 
2374     APInt DemandedSrcBits = DemandedBits.byteSwap();
2375     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2376                              Depth + 1))
2377       return true;
2378     Known.One = Known2.One.byteSwap();
2379     Known.Zero = Known2.Zero.byteSwap();
2380     break;
2381   }
2382   case ISD::CTPOP: {
2383     // If only 1 bit is demanded, replace with PARITY as long as we're before
2384     // op legalization.
2385     // FIXME: Limit to scalars for now.
2386     if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2387       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2388                                                Op.getOperand(0)));
2389 
2390     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2391     break;
2392   }
2393   case ISD::SIGN_EXTEND_INREG: {
2394     SDValue Op0 = Op.getOperand(0);
2395     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2396     unsigned ExVTBits = ExVT.getScalarSizeInBits();
2397 
2398     // If we only care about the highest bit, don't bother shifting right.
2399     if (DemandedBits.isSignMask()) {
2400       unsigned MinSignedBits =
2401           TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2402       bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2403       // However if the input is already sign extended we expect the sign
2404       // extension to be dropped altogether later and do not simplify.
2405       if (!AlreadySignExtended) {
2406         // Compute the correct shift amount type, which must be getShiftAmountTy
2407         // for scalar types after legalization.
2408         SDValue ShiftAmt =
2409             TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2410         return TLO.CombineTo(Op,
2411                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2412       }
2413     }
2414 
2415     // If none of the extended bits are demanded, eliminate the sextinreg.
2416     if (DemandedBits.getActiveBits() <= ExVTBits)
2417       return TLO.CombineTo(Op, Op0);
2418 
2419     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2420 
2421     // Since the sign extended bits are demanded, we know that the sign
2422     // bit is demanded.
2423     InputDemandedBits.setBit(ExVTBits - 1);
2424 
2425     if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2426                              Depth + 1))
2427       return true;
2428 
2429     // If the sign bit of the input is known set or clear, then we know the
2430     // top bits of the result.
2431 
2432     // If the input sign bit is known zero, convert this into a zero extension.
2433     if (Known.Zero[ExVTBits - 1])
2434       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2435 
2436     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2437     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2438       Known.One.setBitsFrom(ExVTBits);
2439       Known.Zero &= Mask;
2440     } else { // Input sign bit unknown
2441       Known.Zero &= Mask;
2442       Known.One &= Mask;
2443     }
2444     break;
2445   }
2446   case ISD::BUILD_PAIR: {
2447     EVT HalfVT = Op.getOperand(0).getValueType();
2448     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2449 
2450     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2451     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2452 
2453     KnownBits KnownLo, KnownHi;
2454 
2455     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2456       return true;
2457 
2458     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2459       return true;
2460 
2461     Known = KnownHi.concat(KnownLo);
2462     break;
2463   }
2464   case ISD::ZERO_EXTEND_VECTOR_INREG:
2465     if (VT.isScalableVector())
2466       return false;
2467     [[fallthrough]];
2468   case ISD::ZERO_EXTEND: {
2469     SDValue Src = Op.getOperand(0);
2470     EVT SrcVT = Src.getValueType();
2471     unsigned InBits = SrcVT.getScalarSizeInBits();
2472     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2473     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2474 
2475     // If none of the top bits are demanded, convert this into an any_extend.
2476     if (DemandedBits.getActiveBits() <= InBits) {
2477       // If we only need the non-extended bits of the bottom element
2478       // then we can just bitcast to the result.
2479       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2480           VT.getSizeInBits() == SrcVT.getSizeInBits())
2481         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2482 
2483       unsigned Opc =
2484           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2485       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2486         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2487     }
2488 
2489     APInt InDemandedBits = DemandedBits.trunc(InBits);
2490     APInt InDemandedElts = DemandedElts.zext(InElts);
2491     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2492                              Depth + 1)) {
2493       Op->dropFlags(SDNodeFlags::NonNeg);
2494       return true;
2495     }
2496     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2497     Known = Known.zext(BitWidth);
2498 
2499     // Attempt to avoid multi-use ops if we don't need anything from them.
2500     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2501             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2502       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2503     break;
2504   }
2505   case ISD::SIGN_EXTEND_VECTOR_INREG:
2506     if (VT.isScalableVector())
2507       return false;
2508     [[fallthrough]];
2509   case ISD::SIGN_EXTEND: {
2510     SDValue Src = Op.getOperand(0);
2511     EVT SrcVT = Src.getValueType();
2512     unsigned InBits = SrcVT.getScalarSizeInBits();
2513     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2514     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2515 
2516     APInt InDemandedElts = DemandedElts.zext(InElts);
2517     APInt InDemandedBits = DemandedBits.trunc(InBits);
2518 
2519     // Since some of the sign extended bits are demanded, we know that the sign
2520     // bit is demanded.
2521     InDemandedBits.setBit(InBits - 1);
2522 
2523     // If none of the top bits are demanded, convert this into an any_extend.
2524     if (DemandedBits.getActiveBits() <= InBits) {
2525       // If we only need the non-extended bits of the bottom element
2526       // then we can just bitcast to the result.
2527       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2528           VT.getSizeInBits() == SrcVT.getSizeInBits())
2529         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2530 
2531       // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2532       if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
2533           TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2534               InBits) {
2535         unsigned Opc =
2536             IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2537         if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2538           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2539       }
2540     }
2541 
2542     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2543                              Depth + 1))
2544       return true;
2545     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2546 
2547     // If the sign bit is known one, the top bits match.
2548     Known = Known.sext(BitWidth);
2549 
2550     // If the sign bit is known zero, convert this to a zero extend.
2551     if (Known.isNonNegative()) {
2552       unsigned Opc =
2553           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2554       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2555         SDNodeFlags Flags;
2556         if (!IsVecInReg)
2557           Flags |= SDNodeFlags::NonNeg;
2558         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2559       }
2560     }
2561 
2562     // Attempt to avoid multi-use ops if we don't need anything from them.
2563     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2564             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2565       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2566     break;
2567   }
2568   case ISD::ANY_EXTEND_VECTOR_INREG:
2569     if (VT.isScalableVector())
2570       return false;
2571     [[fallthrough]];
2572   case ISD::ANY_EXTEND: {
2573     SDValue Src = Op.getOperand(0);
2574     EVT SrcVT = Src.getValueType();
2575     unsigned InBits = SrcVT.getScalarSizeInBits();
2576     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2577     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2578 
2579     // If we only need the bottom element then we can just bitcast.
2580     // TODO: Handle ANY_EXTEND?
2581     if (IsLE && IsVecInReg && DemandedElts == 1 &&
2582         VT.getSizeInBits() == SrcVT.getSizeInBits())
2583       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2584 
2585     APInt InDemandedBits = DemandedBits.trunc(InBits);
2586     APInt InDemandedElts = DemandedElts.zext(InElts);
2587     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2588                              Depth + 1))
2589       return true;
2590     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2591     Known = Known.anyext(BitWidth);
2592 
2593     // Attempt to avoid multi-use ops if we don't need anything from them.
2594     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2595             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2596       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2597     break;
2598   }
2599   case ISD::TRUNCATE: {
2600     SDValue Src = Op.getOperand(0);
2601 
2602     // Simplify the input, using demanded bit information, and compute the known
2603     // zero/one bits live out.
2604     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2605     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2606     if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2607                              Depth + 1)) {
2608       // Disable the nsw and nuw flags. We can no longer guarantee that we
2609       // won't wrap after simplification.
2610       Op->dropFlags(SDNodeFlags::NoWrap);
2611       return true;
2612     }
2613     Known = Known.trunc(BitWidth);
2614 
2615     // Attempt to avoid multi-use ops if we don't need anything from them.
2616     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2617             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2618       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2619 
2620     // If the input is only used by this truncate, see if we can shrink it based
2621     // on the known demanded bits.
2622     switch (Src.getOpcode()) {
2623     default:
2624       break;
2625     case ISD::SRL:
2626       // Shrink SRL by a constant if none of the high bits shifted in are
2627       // demanded.
2628       if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2629         // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2630         // undesirable.
2631         break;
2632 
2633       if (Src.getNode()->hasOneUse()) {
2634         if (isTruncateFree(Src, VT) &&
2635             !isTruncateFree(Src.getValueType(), VT)) {
2636           // If truncate is only free at trunc(srl), do not turn it into
2637           // srl(trunc). The check is done by first check the truncate is free
2638           // at Src's opcode(srl), then check the truncate is not done by
2639           // referencing sub-register. In test, if both trunc(srl) and
2640           // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2641           // trunc(srl)'s trunc is free, trunc(srl) is better.
2642           break;
2643         }
2644 
2645         std::optional<uint64_t> ShAmtC =
2646             TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2647         if (!ShAmtC || *ShAmtC >= BitWidth)
2648           break;
2649         uint64_t ShVal = *ShAmtC;
2650 
2651         APInt HighBits =
2652             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2653         HighBits.lshrInPlace(ShVal);
2654         HighBits = HighBits.trunc(BitWidth);
2655         if (!(HighBits & DemandedBits)) {
2656           // None of the shifted in bits are needed.  Add a truncate of the
2657           // shift input, then shift it.
2658           SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2659           SDValue NewTrunc =
2660               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2661           return TLO.CombineTo(
2662               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2663         }
2664       }
2665       break;
2666     }
2667 
2668     break;
2669   }
2670   case ISD::AssertZext: {
2671     // AssertZext demands all of the high bits, plus any of the low bits
2672     // demanded by its users.
2673     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2674     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2675     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2676                              TLO, Depth + 1))
2677       return true;
2678 
2679     Known.Zero |= ~InMask;
2680     Known.One &= (~Known.Zero);
2681     break;
2682   }
2683   case ISD::EXTRACT_VECTOR_ELT: {
2684     SDValue Src = Op.getOperand(0);
2685     SDValue Idx = Op.getOperand(1);
2686     ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2687     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2688 
2689     if (SrcEltCnt.isScalable())
2690       return false;
2691 
2692     // Demand the bits from every vector element without a constant index.
2693     unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2694     APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2695     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2696       if (CIdx->getAPIntValue().ult(NumSrcElts))
2697         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2698 
2699     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2700     // anything about the extended bits.
2701     APInt DemandedSrcBits = DemandedBits;
2702     if (BitWidth > EltBitWidth)
2703       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2704 
2705     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2706                              Depth + 1))
2707       return true;
2708 
2709     // Attempt to avoid multi-use ops if we don't need anything from them.
2710     if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2711       if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2712               Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2713         SDValue NewOp =
2714             TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2715         return TLO.CombineTo(Op, NewOp);
2716       }
2717     }
2718 
2719     Known = Known2;
2720     if (BitWidth > EltBitWidth)
2721       Known = Known.anyext(BitWidth);
2722     break;
2723   }
2724   case ISD::BITCAST: {
2725     if (VT.isScalableVector())
2726       return false;
2727     SDValue Src = Op.getOperand(0);
2728     EVT SrcVT = Src.getValueType();
2729     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2730 
2731     // If this is an FP->Int bitcast and if the sign bit is the only
2732     // thing demanded, turn this into a FGETSIGN.
2733     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2734         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2735         SrcVT.isFloatingPoint()) {
2736       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2737       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2738       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2739           SrcVT != MVT::f128) {
2740         // Cannot eliminate/lower SHL for f128 yet.
2741         EVT Ty = OpVTLegal ? VT : MVT::i32;
2742         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2743         // place.  We expect the SHL to be eliminated by other optimizations.
2744         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2745         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2746         if (!OpVTLegal && OpVTSizeInBits > 32)
2747           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2748         unsigned ShVal = Op.getValueSizeInBits() - 1;
2749         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2750         return TLO.CombineTo(Op,
2751                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2752       }
2753     }
2754 
2755     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2756     // Demand the elt/bit if any of the original elts/bits are demanded.
2757     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2758       unsigned Scale = BitWidth / NumSrcEltBits;
2759       unsigned NumSrcElts = SrcVT.getVectorNumElements();
2760       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2761       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2762       for (unsigned i = 0; i != Scale; ++i) {
2763         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2764         unsigned BitOffset = EltOffset * NumSrcEltBits;
2765         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2766         if (!Sub.isZero()) {
2767           DemandedSrcBits |= Sub;
2768           for (unsigned j = 0; j != NumElts; ++j)
2769             if (DemandedElts[j])
2770               DemandedSrcElts.setBit((j * Scale) + i);
2771         }
2772       }
2773 
2774       APInt KnownSrcUndef, KnownSrcZero;
2775       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2776                                      KnownSrcZero, TLO, Depth + 1))
2777         return true;
2778 
2779       KnownBits KnownSrcBits;
2780       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2781                                KnownSrcBits, TLO, Depth + 1))
2782         return true;
2783     } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2784       // TODO - bigendian once we have test coverage.
2785       unsigned Scale = NumSrcEltBits / BitWidth;
2786       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2787       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2788       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2789       for (unsigned i = 0; i != NumElts; ++i)
2790         if (DemandedElts[i]) {
2791           unsigned Offset = (i % Scale) * BitWidth;
2792           DemandedSrcBits.insertBits(DemandedBits, Offset);
2793           DemandedSrcElts.setBit(i / Scale);
2794         }
2795 
2796       if (SrcVT.isVector()) {
2797         APInt KnownSrcUndef, KnownSrcZero;
2798         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2799                                        KnownSrcZero, TLO, Depth + 1))
2800           return true;
2801       }
2802 
2803       KnownBits KnownSrcBits;
2804       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2805                                KnownSrcBits, TLO, Depth + 1))
2806         return true;
2807 
2808       // Attempt to avoid multi-use ops if we don't need anything from them.
2809       if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2810         if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2811                 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2812           SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2813           return TLO.CombineTo(Op, NewOp);
2814         }
2815       }
2816     }
2817 
2818     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2819     // recursive call where Known may be useful to the caller.
2820     if (Depth > 0) {
2821       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2822       return false;
2823     }
2824     break;
2825   }
2826   case ISD::MUL:
2827     if (DemandedBits.isPowerOf2()) {
2828       // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2829       // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2830       // odd (has LSB set), then the left-shifted low bit of X is the answer.
2831       unsigned CTZ = DemandedBits.countr_zero();
2832       ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2833       if (C && C->getAPIntValue().countr_zero() == CTZ) {
2834         SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2835         SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2836         return TLO.CombineTo(Op, Shl);
2837       }
2838     }
2839     // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2840     // X * X is odd iff X is odd.
2841     // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2842     if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2843       SDValue One = TLO.DAG.getConstant(1, dl, VT);
2844       SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2845       return TLO.CombineTo(Op, And1);
2846     }
2847     [[fallthrough]];
2848   case ISD::ADD:
2849   case ISD::SUB: {
2850     // Add, Sub, and Mul don't demand any bits in positions beyond that
2851     // of the highest bit demanded of them.
2852     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2853     SDNodeFlags Flags = Op.getNode()->getFlags();
2854     unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2855     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2856     KnownBits KnownOp0, KnownOp1;
2857     auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2858                                       const KnownBits &KnownRHS) {
2859       if (Op.getOpcode() == ISD::MUL)
2860         Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2861       return Demanded;
2862     };
2863     if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2864                              Depth + 1) ||
2865         SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2866                              DemandedElts, KnownOp0, TLO, Depth + 1) ||
2867         // See if the operation should be performed at a smaller bit width.
2868         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2869       // Disable the nsw and nuw flags. We can no longer guarantee that we
2870       // won't wrap after simplification.
2871       Op->dropFlags(SDNodeFlags::NoWrap);
2872       return true;
2873     }
2874 
2875     // neg x with only low bit demanded is simply x.
2876     if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2877         isNullConstant(Op0))
2878       return TLO.CombineTo(Op, Op1);
2879 
2880     // Attempt to avoid multi-use ops if we don't need anything from them.
2881     if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2882       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2883           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2884       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2885           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2886       if (DemandedOp0 || DemandedOp1) {
2887         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2888         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2889         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2890                                         Flags & ~SDNodeFlags::NoWrap);
2891         return TLO.CombineTo(Op, NewOp);
2892       }
2893     }
2894 
2895     // If we have a constant operand, we may be able to turn it into -1 if we
2896     // do not demand the high bits. This can make the constant smaller to
2897     // encode, allow more general folding, or match specialized instruction
2898     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2899     // is probably not useful (and could be detrimental).
2900     ConstantSDNode *C = isConstOrConstSplat(Op1);
2901     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2902     if (C && !C->isAllOnes() && !C->isOne() &&
2903         (C->getAPIntValue() | HighMask).isAllOnes()) {
2904       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2905       // Disable the nsw and nuw flags. We can no longer guarantee that we
2906       // won't wrap after simplification.
2907       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2908                                       Flags & ~SDNodeFlags::NoWrap);
2909       return TLO.CombineTo(Op, NewOp);
2910     }
2911 
2912     // Match a multiply with a disguised negated-power-of-2 and convert to a
2913     // an equivalent shift-left amount.
2914     // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2915     auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2916       if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2917         return 0;
2918 
2919       // Don't touch opaque constants. Also, ignore zero and power-of-2
2920       // multiplies. Those will get folded later.
2921       ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2922       if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2923           !MulC->getAPIntValue().isPowerOf2()) {
2924         APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2925         if (UnmaskedC.isNegatedPowerOf2())
2926           return (-UnmaskedC).logBase2();
2927       }
2928       return 0;
2929     };
2930 
2931     auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2932                        unsigned ShlAmt) {
2933       SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2934       SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2935       SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2936       return TLO.CombineTo(Op, Res);
2937     };
2938 
2939     if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2940       if (Op.getOpcode() == ISD::ADD) {
2941         // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2942         if (unsigned ShAmt = getShiftLeftAmt(Op0))
2943           return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2944         // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2945         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2946           return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2947       }
2948       if (Op.getOpcode() == ISD::SUB) {
2949         // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2950         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2951           return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2952       }
2953     }
2954 
2955     if (Op.getOpcode() == ISD::MUL) {
2956       Known = KnownBits::mul(KnownOp0, KnownOp1);
2957     } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2958       Known = KnownBits::computeForAddSub(
2959           Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2960           Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2961     }
2962     break;
2963   }
2964   default:
2965     // We also ask the target about intrinsics (which could be specific to it).
2966     if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2967         Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2968       // TODO: Probably okay to remove after audit; here to reduce change size
2969       // in initial enablement patch for scalable vectors
2970       if (Op.getValueType().isScalableVector())
2971         break;
2972       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2973                                             Known, TLO, Depth))
2974         return true;
2975       break;
2976     }
2977 
2978     // Just use computeKnownBits to compute output bits.
2979     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2980     break;
2981   }
2982 
2983   // If we know the value of all of the demanded bits, return this as a
2984   // constant.
2985   if (!isTargetCanonicalConstantNode(Op) &&
2986       DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2987     // Avoid folding to a constant if any OpaqueConstant is involved.
2988     if (llvm::any_of(Op->ops(), [](SDValue V) {
2989           auto *C = dyn_cast<ConstantSDNode>(V);
2990           return C && C->isOpaque();
2991         }))
2992       return false;
2993     if (VT.isInteger())
2994       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2995     if (VT.isFloatingPoint())
2996       return TLO.CombineTo(
2997           Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
2998                                     dl, VT));
2999   }
3000 
3001   // A multi use 'all demanded elts' simplify failed to find any knownbits.
3002   // Try again just for the original demanded elts.
3003   // Ensure we do this AFTER constant folding above.
3004   if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3005     Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3006 
3007   return false;
3008 }
3009 
3010 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
3011                                                 const APInt &DemandedElts,
3012                                                 DAGCombinerInfo &DCI) const {
3013   SelectionDAG &DAG = DCI.DAG;
3014   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3015                         !DCI.isBeforeLegalizeOps());
3016 
3017   APInt KnownUndef, KnownZero;
3018   bool Simplified =
3019       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3020   if (Simplified) {
3021     DCI.AddToWorklist(Op.getNode());
3022     DCI.CommitTargetLoweringOpt(TLO);
3023   }
3024 
3025   return Simplified;
3026 }
3027 
3028 /// Given a vector binary operation and known undefined elements for each input
3029 /// operand, compute whether each element of the output is undefined.
3030 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
3031                                          const APInt &UndefOp0,
3032                                          const APInt &UndefOp1) {
3033   EVT VT = BO.getValueType();
3034   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3035          "Vector binop only");
3036 
3037   EVT EltVT = VT.getVectorElementType();
3038   unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3039   assert(UndefOp0.getBitWidth() == NumElts &&
3040          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3041 
3042   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3043                                    const APInt &UndefVals) {
3044     if (UndefVals[Index])
3045       return DAG.getUNDEF(EltVT);
3046 
3047     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3048       // Try hard to make sure that the getNode() call is not creating temporary
3049       // nodes. Ignore opaque integers because they do not constant fold.
3050       SDValue Elt = BV->getOperand(Index);
3051       auto *C = dyn_cast<ConstantSDNode>(Elt);
3052       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3053         return Elt;
3054     }
3055 
3056     return SDValue();
3057   };
3058 
3059   APInt KnownUndef = APInt::getZero(NumElts);
3060   for (unsigned i = 0; i != NumElts; ++i) {
3061     // If both inputs for this element are either constant or undef and match
3062     // the element type, compute the constant/undef result for this element of
3063     // the vector.
3064     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3065     // not handle FP constants. The code within getNode() should be refactored
3066     // to avoid the danger of creating a bogus temporary node here.
3067     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3068     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3069     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3070       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3071         KnownUndef.setBit(i);
3072   }
3073   return KnownUndef;
3074 }
3075 
3076 bool TargetLowering::SimplifyDemandedVectorElts(
3077     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3078     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3079     bool AssumeSingleUse) const {
3080   EVT VT = Op.getValueType();
3081   unsigned Opcode = Op.getOpcode();
3082   APInt DemandedElts = OriginalDemandedElts;
3083   unsigned NumElts = DemandedElts.getBitWidth();
3084   assert(VT.isVector() && "Expected vector op");
3085 
3086   KnownUndef = KnownZero = APInt::getZero(NumElts);
3087 
3088   if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3089     return false;
3090 
3091   // TODO: For now we assume we know nothing about scalable vectors.
3092   if (VT.isScalableVector())
3093     return false;
3094 
3095   assert(VT.getVectorNumElements() == NumElts &&
3096          "Mask size mismatches value type element count!");
3097 
3098   // Undef operand.
3099   if (Op.isUndef()) {
3100     KnownUndef.setAllBits();
3101     return false;
3102   }
3103 
3104   // If Op has other users, assume that all elements are needed.
3105   if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3106     DemandedElts.setAllBits();
3107 
3108   // Not demanding any elements from Op.
3109   if (DemandedElts == 0) {
3110     KnownUndef.setAllBits();
3111     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3112   }
3113 
3114   // Limit search depth.
3115   if (Depth >= SelectionDAG::MaxRecursionDepth)
3116     return false;
3117 
3118   SDLoc DL(Op);
3119   unsigned EltSizeInBits = VT.getScalarSizeInBits();
3120   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3121 
3122   // Helper for demanding the specified elements and all the bits of both binary
3123   // operands.
3124   auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3125     SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3126                                                            TLO.DAG, Depth + 1);
3127     SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3128                                                            TLO.DAG, Depth + 1);
3129     if (NewOp0 || NewOp1) {
3130       SDValue NewOp =
3131           TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3132                           NewOp1 ? NewOp1 : Op1, Op->getFlags());
3133       return TLO.CombineTo(Op, NewOp);
3134     }
3135     return false;
3136   };
3137 
3138   switch (Opcode) {
3139   case ISD::SCALAR_TO_VECTOR: {
3140     if (!DemandedElts[0]) {
3141       KnownUndef.setAllBits();
3142       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3143     }
3144     SDValue ScalarSrc = Op.getOperand(0);
3145     if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3146       SDValue Src = ScalarSrc.getOperand(0);
3147       SDValue Idx = ScalarSrc.getOperand(1);
3148       EVT SrcVT = Src.getValueType();
3149 
3150       ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3151 
3152       if (SrcEltCnt.isScalable())
3153         return false;
3154 
3155       unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3156       if (isNullConstant(Idx)) {
3157         APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3158         APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3159         APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3160         if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3161                                        TLO, Depth + 1))
3162           return true;
3163       }
3164     }
3165     KnownUndef.setHighBits(NumElts - 1);
3166     break;
3167   }
3168   case ISD::BITCAST: {
3169     SDValue Src = Op.getOperand(0);
3170     EVT SrcVT = Src.getValueType();
3171 
3172     // We only handle vectors here.
3173     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3174     if (!SrcVT.isVector())
3175       break;
3176 
3177     // Fast handling of 'identity' bitcasts.
3178     unsigned NumSrcElts = SrcVT.getVectorNumElements();
3179     if (NumSrcElts == NumElts)
3180       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3181                                         KnownZero, TLO, Depth + 1);
3182 
3183     APInt SrcDemandedElts, SrcZero, SrcUndef;
3184 
3185     // Bitcast from 'large element' src vector to 'small element' vector, we
3186     // must demand a source element if any DemandedElt maps to it.
3187     if ((NumElts % NumSrcElts) == 0) {
3188       unsigned Scale = NumElts / NumSrcElts;
3189       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3190       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3191                                      TLO, Depth + 1))
3192         return true;
3193 
3194       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3195       // of the large element.
3196       // TODO - bigendian once we have test coverage.
3197       if (IsLE) {
3198         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3199         APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3200         for (unsigned i = 0; i != NumElts; ++i)
3201           if (DemandedElts[i]) {
3202             unsigned Ofs = (i % Scale) * EltSizeInBits;
3203             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3204           }
3205 
3206         KnownBits Known;
3207         if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3208                                  TLO, Depth + 1))
3209           return true;
3210 
3211         // The bitcast has split each wide element into a number of
3212         // narrow subelements. We have just computed the Known bits
3213         // for wide elements. See if element splitting results in
3214         // some subelements being zero. Only for demanded elements!
3215         for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3216           if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3217                    .isAllOnes())
3218             continue;
3219           for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3220             unsigned Elt = Scale * SrcElt + SubElt;
3221             if (DemandedElts[Elt])
3222               KnownZero.setBit(Elt);
3223           }
3224         }
3225       }
3226 
3227       // If the src element is zero/undef then all the output elements will be -
3228       // only demanded elements are guaranteed to be correct.
3229       for (unsigned i = 0; i != NumSrcElts; ++i) {
3230         if (SrcDemandedElts[i]) {
3231           if (SrcZero[i])
3232             KnownZero.setBits(i * Scale, (i + 1) * Scale);
3233           if (SrcUndef[i])
3234             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3235         }
3236       }
3237     }
3238 
3239     // Bitcast from 'small element' src vector to 'large element' vector, we
3240     // demand all smaller source elements covered by the larger demanded element
3241     // of this vector.
3242     if ((NumSrcElts % NumElts) == 0) {
3243       unsigned Scale = NumSrcElts / NumElts;
3244       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3245       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3246                                      TLO, Depth + 1))
3247         return true;
3248 
3249       // If all the src elements covering an output element are zero/undef, then
3250       // the output element will be as well, assuming it was demanded.
3251       for (unsigned i = 0; i != NumElts; ++i) {
3252         if (DemandedElts[i]) {
3253           if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3254             KnownZero.setBit(i);
3255           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3256             KnownUndef.setBit(i);
3257         }
3258       }
3259     }
3260     break;
3261   }
3262   case ISD::FREEZE: {
3263     SDValue N0 = Op.getOperand(0);
3264     if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3265                                                  /*PoisonOnly=*/false))
3266       return TLO.CombineTo(Op, N0);
3267 
3268     // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3269     // freeze(op(x, ...)) -> op(freeze(x), ...).
3270     if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3271       return TLO.CombineTo(
3272           Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
3273                               TLO.DAG.getFreeze(N0.getOperand(0))));
3274     break;
3275   }
3276   case ISD::BUILD_VECTOR: {
3277     // Check all elements and simplify any unused elements with UNDEF.
3278     if (!DemandedElts.isAllOnes()) {
3279       // Don't simplify BROADCASTS.
3280       if (llvm::any_of(Op->op_values(),
3281                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3282         SmallVector<SDValue, 32> Ops(Op->ops());
3283         bool Updated = false;
3284         for (unsigned i = 0; i != NumElts; ++i) {
3285           if (!DemandedElts[i] && !Ops[i].isUndef()) {
3286             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3287             KnownUndef.setBit(i);
3288             Updated = true;
3289           }
3290         }
3291         if (Updated)
3292           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3293       }
3294     }
3295     for (unsigned i = 0; i != NumElts; ++i) {
3296       SDValue SrcOp = Op.getOperand(i);
3297       if (SrcOp.isUndef()) {
3298         KnownUndef.setBit(i);
3299       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3300                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
3301         KnownZero.setBit(i);
3302       }
3303     }
3304     break;
3305   }
3306   case ISD::CONCAT_VECTORS: {
3307     EVT SubVT = Op.getOperand(0).getValueType();
3308     unsigned NumSubVecs = Op.getNumOperands();
3309     unsigned NumSubElts = SubVT.getVectorNumElements();
3310     for (unsigned i = 0; i != NumSubVecs; ++i) {
3311       SDValue SubOp = Op.getOperand(i);
3312       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3313       APInt SubUndef, SubZero;
3314       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3315                                      Depth + 1))
3316         return true;
3317       KnownUndef.insertBits(SubUndef, i * NumSubElts);
3318       KnownZero.insertBits(SubZero, i * NumSubElts);
3319     }
3320 
3321     // Attempt to avoid multi-use ops if we don't need anything from them.
3322     if (!DemandedElts.isAllOnes()) {
3323       bool FoundNewSub = false;
3324       SmallVector<SDValue, 2> DemandedSubOps;
3325       for (unsigned i = 0; i != NumSubVecs; ++i) {
3326         SDValue SubOp = Op.getOperand(i);
3327         APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3328         SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3329             SubOp, SubElts, TLO.DAG, Depth + 1);
3330         DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3331         FoundNewSub = NewSubOp ? true : FoundNewSub;
3332       }
3333       if (FoundNewSub) {
3334         SDValue NewOp =
3335             TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3336         return TLO.CombineTo(Op, NewOp);
3337       }
3338     }
3339     break;
3340   }
3341   case ISD::INSERT_SUBVECTOR: {
3342     // Demand any elements from the subvector and the remainder from the src its
3343     // inserted into.
3344     SDValue Src = Op.getOperand(0);
3345     SDValue Sub = Op.getOperand(1);
3346     uint64_t Idx = Op.getConstantOperandVal(2);
3347     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3348     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3349     APInt DemandedSrcElts = DemandedElts;
3350     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3351 
3352     APInt SubUndef, SubZero;
3353     if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3354                                    Depth + 1))
3355       return true;
3356 
3357     // If none of the src operand elements are demanded, replace it with undef.
3358     if (!DemandedSrcElts && !Src.isUndef())
3359       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3360                                                TLO.DAG.getUNDEF(VT), Sub,
3361                                                Op.getOperand(2)));
3362 
3363     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3364                                    TLO, Depth + 1))
3365       return true;
3366     KnownUndef.insertBits(SubUndef, Idx);
3367     KnownZero.insertBits(SubZero, Idx);
3368 
3369     // Attempt to avoid multi-use ops if we don't need anything from them.
3370     if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3371       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3372           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3373       SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3374           Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3375       if (NewSrc || NewSub) {
3376         NewSrc = NewSrc ? NewSrc : Src;
3377         NewSub = NewSub ? NewSub : Sub;
3378         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3379                                         NewSub, Op.getOperand(2));
3380         return TLO.CombineTo(Op, NewOp);
3381       }
3382     }
3383     break;
3384   }
3385   case ISD::EXTRACT_SUBVECTOR: {
3386     // Offset the demanded elts by the subvector index.
3387     SDValue Src = Op.getOperand(0);
3388     if (Src.getValueType().isScalableVector())
3389       break;
3390     uint64_t Idx = Op.getConstantOperandVal(1);
3391     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3392     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3393 
3394     APInt SrcUndef, SrcZero;
3395     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3396                                    Depth + 1))
3397       return true;
3398     KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3399     KnownZero = SrcZero.extractBits(NumElts, Idx);
3400 
3401     // Attempt to avoid multi-use ops if we don't need anything from them.
3402     if (!DemandedElts.isAllOnes()) {
3403       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3404           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3405       if (NewSrc) {
3406         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3407                                         Op.getOperand(1));
3408         return TLO.CombineTo(Op, NewOp);
3409       }
3410     }
3411     break;
3412   }
3413   case ISD::INSERT_VECTOR_ELT: {
3414     SDValue Vec = Op.getOperand(0);
3415     SDValue Scl = Op.getOperand(1);
3416     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3417 
3418     // For a legal, constant insertion index, if we don't need this insertion
3419     // then strip it, else remove it from the demanded elts.
3420     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3421       unsigned Idx = CIdx->getZExtValue();
3422       if (!DemandedElts[Idx])
3423         return TLO.CombineTo(Op, Vec);
3424 
3425       APInt DemandedVecElts(DemandedElts);
3426       DemandedVecElts.clearBit(Idx);
3427       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3428                                      KnownZero, TLO, Depth + 1))
3429         return true;
3430 
3431       KnownUndef.setBitVal(Idx, Scl.isUndef());
3432 
3433       KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3434       break;
3435     }
3436 
3437     APInt VecUndef, VecZero;
3438     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3439                                    Depth + 1))
3440       return true;
3441     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3442     break;
3443   }
3444   case ISD::VSELECT: {
3445     SDValue Sel = Op.getOperand(0);
3446     SDValue LHS = Op.getOperand(1);
3447     SDValue RHS = Op.getOperand(2);
3448 
3449     // Try to transform the select condition based on the current demanded
3450     // elements.
3451     APInt UndefSel, ZeroSel;
3452     if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3453                                    Depth + 1))
3454       return true;
3455 
3456     // See if we can simplify either vselect operand.
3457     APInt DemandedLHS(DemandedElts);
3458     APInt DemandedRHS(DemandedElts);
3459     APInt UndefLHS, ZeroLHS;
3460     APInt UndefRHS, ZeroRHS;
3461     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3462                                    Depth + 1))
3463       return true;
3464     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3465                                    Depth + 1))
3466       return true;
3467 
3468     KnownUndef = UndefLHS & UndefRHS;
3469     KnownZero = ZeroLHS & ZeroRHS;
3470 
3471     // If we know that the selected element is always zero, we don't need the
3472     // select value element.
3473     APInt DemandedSel = DemandedElts & ~KnownZero;
3474     if (DemandedSel != DemandedElts)
3475       if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3476                                      Depth + 1))
3477         return true;
3478 
3479     break;
3480   }
3481   case ISD::VECTOR_SHUFFLE: {
3482     SDValue LHS = Op.getOperand(0);
3483     SDValue RHS = Op.getOperand(1);
3484     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3485 
3486     // Collect demanded elements from shuffle operands..
3487     APInt DemandedLHS(NumElts, 0);
3488     APInt DemandedRHS(NumElts, 0);
3489     for (unsigned i = 0; i != NumElts; ++i) {
3490       int M = ShuffleMask[i];
3491       if (M < 0 || !DemandedElts[i])
3492         continue;
3493       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3494       if (M < (int)NumElts)
3495         DemandedLHS.setBit(M);
3496       else
3497         DemandedRHS.setBit(M - NumElts);
3498     }
3499 
3500     // See if we can simplify either shuffle operand.
3501     APInt UndefLHS, ZeroLHS;
3502     APInt UndefRHS, ZeroRHS;
3503     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3504                                    Depth + 1))
3505       return true;
3506     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3507                                    Depth + 1))
3508       return true;
3509 
3510     // Simplify mask using undef elements from LHS/RHS.
3511     bool Updated = false;
3512     bool IdentityLHS = true, IdentityRHS = true;
3513     SmallVector<int, 32> NewMask(ShuffleMask);
3514     for (unsigned i = 0; i != NumElts; ++i) {
3515       int &M = NewMask[i];
3516       if (M < 0)
3517         continue;
3518       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3519           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3520         Updated = true;
3521         M = -1;
3522       }
3523       IdentityLHS &= (M < 0) || (M == (int)i);
3524       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3525     }
3526 
3527     // Update legal shuffle masks based on demanded elements if it won't reduce
3528     // to Identity which can cause premature removal of the shuffle mask.
3529     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3530       SDValue LegalShuffle =
3531           buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3532       if (LegalShuffle)
3533         return TLO.CombineTo(Op, LegalShuffle);
3534     }
3535 
3536     // Propagate undef/zero elements from LHS/RHS.
3537     for (unsigned i = 0; i != NumElts; ++i) {
3538       int M = ShuffleMask[i];
3539       if (M < 0) {
3540         KnownUndef.setBit(i);
3541       } else if (M < (int)NumElts) {
3542         if (UndefLHS[M])
3543           KnownUndef.setBit(i);
3544         if (ZeroLHS[M])
3545           KnownZero.setBit(i);
3546       } else {
3547         if (UndefRHS[M - NumElts])
3548           KnownUndef.setBit(i);
3549         if (ZeroRHS[M - NumElts])
3550           KnownZero.setBit(i);
3551       }
3552     }
3553     break;
3554   }
3555   case ISD::ANY_EXTEND_VECTOR_INREG:
3556   case ISD::SIGN_EXTEND_VECTOR_INREG:
3557   case ISD::ZERO_EXTEND_VECTOR_INREG: {
3558     APInt SrcUndef, SrcZero;
3559     SDValue Src = Op.getOperand(0);
3560     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3561     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3562     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3563                                    Depth + 1))
3564       return true;
3565     KnownZero = SrcZero.zextOrTrunc(NumElts);
3566     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3567 
3568     if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3569         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3570         DemandedSrcElts == 1) {
3571       // aext - if we just need the bottom element then we can bitcast.
3572       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3573     }
3574 
3575     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3576       // zext(undef) upper bits are guaranteed to be zero.
3577       if (DemandedElts.isSubsetOf(KnownUndef))
3578         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3579       KnownUndef.clearAllBits();
3580 
3581       // zext - if we just need the bottom element then we can mask:
3582       // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3583       if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3584           Op->isOnlyUserOf(Src.getNode()) &&
3585           Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3586         SDLoc DL(Op);
3587         EVT SrcVT = Src.getValueType();
3588         EVT SrcSVT = SrcVT.getScalarType();
3589         SmallVector<SDValue> MaskElts;
3590         MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3591         MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3592         SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3593         if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3594                 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3595           Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3596           return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3597         }
3598       }
3599     }
3600     break;
3601   }
3602 
3603   // TODO: There are more binop opcodes that could be handled here - MIN,
3604   // MAX, saturated math, etc.
3605   case ISD::ADD: {
3606     SDValue Op0 = Op.getOperand(0);
3607     SDValue Op1 = Op.getOperand(1);
3608     if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3609       APInt UndefLHS, ZeroLHS;
3610       if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3611                                      Depth + 1, /*AssumeSingleUse*/ true))
3612         return true;
3613     }
3614     [[fallthrough]];
3615   }
3616   case ISD::AVGCEILS:
3617   case ISD::AVGCEILU:
3618   case ISD::AVGFLOORS:
3619   case ISD::AVGFLOORU:
3620   case ISD::OR:
3621   case ISD::XOR:
3622   case ISD::SUB:
3623   case ISD::FADD:
3624   case ISD::FSUB:
3625   case ISD::FMUL:
3626   case ISD::FDIV:
3627   case ISD::FREM: {
3628     SDValue Op0 = Op.getOperand(0);
3629     SDValue Op1 = Op.getOperand(1);
3630 
3631     APInt UndefRHS, ZeroRHS;
3632     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3633                                    Depth + 1))
3634       return true;
3635     APInt UndefLHS, ZeroLHS;
3636     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3637                                    Depth + 1))
3638       return true;
3639 
3640     KnownZero = ZeroLHS & ZeroRHS;
3641     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3642 
3643     // Attempt to avoid multi-use ops if we don't need anything from them.
3644     // TODO - use KnownUndef to relax the demandedelts?
3645     if (!DemandedElts.isAllOnes())
3646       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3647         return true;
3648     break;
3649   }
3650   case ISD::SHL:
3651   case ISD::SRL:
3652   case ISD::SRA:
3653   case ISD::ROTL:
3654   case ISD::ROTR: {
3655     SDValue Op0 = Op.getOperand(0);
3656     SDValue Op1 = Op.getOperand(1);
3657 
3658     APInt UndefRHS, ZeroRHS;
3659     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3660                                    Depth + 1))
3661       return true;
3662     APInt UndefLHS, ZeroLHS;
3663     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3664                                    Depth + 1))
3665       return true;
3666 
3667     KnownZero = ZeroLHS;
3668     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3669 
3670     // Attempt to avoid multi-use ops if we don't need anything from them.
3671     // TODO - use KnownUndef to relax the demandedelts?
3672     if (!DemandedElts.isAllOnes())
3673       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3674         return true;
3675     break;
3676   }
3677   case ISD::MUL:
3678   case ISD::MULHU:
3679   case ISD::MULHS:
3680   case ISD::AND: {
3681     SDValue Op0 = Op.getOperand(0);
3682     SDValue Op1 = Op.getOperand(1);
3683 
3684     APInt SrcUndef, SrcZero;
3685     if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3686                                    Depth + 1))
3687       return true;
3688     // If we know that a demanded element was zero in Op1 we don't need to
3689     // demand it in Op0 - its guaranteed to be zero.
3690     APInt DemandedElts0 = DemandedElts & ~SrcZero;
3691     if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3692                                    TLO, Depth + 1))
3693       return true;
3694 
3695     KnownUndef &= DemandedElts0;
3696     KnownZero &= DemandedElts0;
3697 
3698     // If every element pair has a zero/undef then just fold to zero.
3699     // fold (and x, undef) -> 0  /  (and x, 0) -> 0
3700     // fold (mul x, undef) -> 0  /  (mul x, 0) -> 0
3701     if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3702       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3703 
3704     // If either side has a zero element, then the result element is zero, even
3705     // if the other is an UNDEF.
3706     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3707     // and then handle 'and' nodes with the rest of the binop opcodes.
3708     KnownZero |= SrcZero;
3709     KnownUndef &= SrcUndef;
3710     KnownUndef &= ~KnownZero;
3711 
3712     // Attempt to avoid multi-use ops if we don't need anything from them.
3713     if (!DemandedElts.isAllOnes())
3714       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3715         return true;
3716     break;
3717   }
3718   case ISD::TRUNCATE:
3719   case ISD::SIGN_EXTEND:
3720   case ISD::ZERO_EXTEND:
3721     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3722                                    KnownZero, TLO, Depth + 1))
3723       return true;
3724 
3725     if (!DemandedElts.isAllOnes())
3726       if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3727               Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3728         return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3729 
3730     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3731       // zext(undef) upper bits are guaranteed to be zero.
3732       if (DemandedElts.isSubsetOf(KnownUndef))
3733         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3734       KnownUndef.clearAllBits();
3735     }
3736     break;
3737   case ISD::SINT_TO_FP:
3738   case ISD::UINT_TO_FP:
3739   case ISD::FP_TO_SINT:
3740   case ISD::FP_TO_UINT:
3741     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3742                                    KnownZero, TLO, Depth + 1))
3743       return true;
3744     // Don't fall through to generic undef -> undef handling.
3745     return false;
3746   default: {
3747     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3748       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3749                                                   KnownZero, TLO, Depth))
3750         return true;
3751     } else {
3752       KnownBits Known;
3753       APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3754       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3755                                TLO, Depth, AssumeSingleUse))
3756         return true;
3757     }
3758     break;
3759   }
3760   }
3761   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3762 
3763   // Constant fold all undef cases.
3764   // TODO: Handle zero cases as well.
3765   if (DemandedElts.isSubsetOf(KnownUndef))
3766     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3767 
3768   return false;
3769 }
3770 
3771 /// Determine which of the bits specified in Mask are known to be either zero or
3772 /// one and return them in the Known.
3773 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3774                                                    KnownBits &Known,
3775                                                    const APInt &DemandedElts,
3776                                                    const SelectionDAG &DAG,
3777                                                    unsigned Depth) const {
3778   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3779           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3780           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3781           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3782          "Should use MaskedValueIsZero if you don't know whether Op"
3783          " is a target node!");
3784   Known.resetAll();
3785 }
3786 
3787 void TargetLowering::computeKnownBitsForTargetInstr(
3788     GISelKnownBits &Analysis, Register R, KnownBits &Known,
3789     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3790     unsigned Depth) const {
3791   Known.resetAll();
3792 }
3793 
3794 void TargetLowering::computeKnownBitsForFrameIndex(
3795   const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3796   // The low bits are known zero if the pointer is aligned.
3797   Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3798 }
3799 
3800 Align TargetLowering::computeKnownAlignForTargetInstr(
3801   GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3802   unsigned Depth) const {
3803   return Align(1);
3804 }
3805 
3806 /// This method can be implemented by targets that want to expose additional
3807 /// information about sign bits to the DAG Combiner.
3808 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3809                                                          const APInt &,
3810                                                          const SelectionDAG &,
3811                                                          unsigned Depth) const {
3812   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3813           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3814           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3815           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3816          "Should use ComputeNumSignBits if you don't know whether Op"
3817          " is a target node!");
3818   return 1;
3819 }
3820 
3821 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3822   GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3823   const MachineRegisterInfo &MRI, unsigned Depth) const {
3824   return 1;
3825 }
3826 
3827 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3828     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3829     TargetLoweringOpt &TLO, unsigned Depth) const {
3830   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3831           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3832           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3833           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3834          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3835          " is a target node!");
3836   return false;
3837 }
3838 
3839 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3840     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3841     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3842   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3843           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3844           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3845           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3846          "Should use SimplifyDemandedBits if you don't know whether Op"
3847          " is a target node!");
3848   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3849   return false;
3850 }
3851 
3852 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3853     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3854     SelectionDAG &DAG, unsigned Depth) const {
3855   assert(
3856       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3857        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3858        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3859        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3860       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3861       " is a target node!");
3862   return SDValue();
3863 }
3864 
3865 SDValue
3866 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3867                                         SDValue N1, MutableArrayRef<int> Mask,
3868                                         SelectionDAG &DAG) const {
3869   bool LegalMask = isShuffleMaskLegal(Mask, VT);
3870   if (!LegalMask) {
3871     std::swap(N0, N1);
3872     ShuffleVectorSDNode::commuteMask(Mask);
3873     LegalMask = isShuffleMaskLegal(Mask, VT);
3874   }
3875 
3876   if (!LegalMask)
3877     return SDValue();
3878 
3879   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3880 }
3881 
3882 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3883   return nullptr;
3884 }
3885 
3886 bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3887     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3888     bool PoisonOnly, unsigned Depth) const {
3889   assert(
3890       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3891        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3892        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3893        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3894       "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3895       " is a target node!");
3896 
3897   // If Op can't create undef/poison and none of its operands are undef/poison
3898   // then Op is never undef/poison.
3899   return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3900                                               /*ConsiderFlags*/ true, Depth) &&
3901          all_of(Op->ops(), [&](SDValue V) {
3902            return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3903                                                        Depth + 1);
3904          });
3905 }
3906 
3907 bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3908     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3909     bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3910   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3911           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3912           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3913           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3914          "Should use canCreateUndefOrPoison if you don't know whether Op"
3915          " is a target node!");
3916   // Be conservative and return true.
3917   return true;
3918 }
3919 
3920 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3921                                                   const SelectionDAG &DAG,
3922                                                   bool SNaN,
3923                                                   unsigned Depth) const {
3924   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3925           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3926           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3927           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3928          "Should use isKnownNeverNaN if you don't know whether Op"
3929          " is a target node!");
3930   return false;
3931 }
3932 
3933 bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3934                                                const APInt &DemandedElts,
3935                                                APInt &UndefElts,
3936                                                const SelectionDAG &DAG,
3937                                                unsigned Depth) const {
3938   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3939           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3940           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3941           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3942          "Should use isSplatValue if you don't know whether Op"
3943          " is a target node!");
3944   return false;
3945 }
3946 
3947 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3948 // work with truncating build vectors and vectors with elements of less than
3949 // 8 bits.
3950 bool TargetLowering::isConstTrueVal(SDValue N) const {
3951   if (!N)
3952     return false;
3953 
3954   unsigned EltWidth;
3955   APInt CVal;
3956   if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3957                                                /*AllowTruncation=*/true)) {
3958     CVal = CN->getAPIntValue();
3959     EltWidth = N.getValueType().getScalarSizeInBits();
3960   } else
3961     return false;
3962 
3963   // If this is a truncating splat, truncate the splat value.
3964   // Otherwise, we may fail to match the expected values below.
3965   if (EltWidth < CVal.getBitWidth())
3966     CVal = CVal.trunc(EltWidth);
3967 
3968   switch (getBooleanContents(N.getValueType())) {
3969   case UndefinedBooleanContent:
3970     return CVal[0];
3971   case ZeroOrOneBooleanContent:
3972     return CVal.isOne();
3973   case ZeroOrNegativeOneBooleanContent:
3974     return CVal.isAllOnes();
3975   }
3976 
3977   llvm_unreachable("Invalid boolean contents");
3978 }
3979 
3980 bool TargetLowering::isConstFalseVal(SDValue N) const {
3981   if (!N)
3982     return false;
3983 
3984   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3985   if (!CN) {
3986     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3987     if (!BV)
3988       return false;
3989 
3990     // Only interested in constant splats, we don't care about undef
3991     // elements in identifying boolean constants and getConstantSplatNode
3992     // returns NULL if all ops are undef;
3993     CN = BV->getConstantSplatNode();
3994     if (!CN)
3995       return false;
3996   }
3997 
3998   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3999     return !CN->getAPIntValue()[0];
4000 
4001   return CN->isZero();
4002 }
4003 
4004 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
4005                                        bool SExt) const {
4006   if (VT == MVT::i1)
4007     return N->isOne();
4008 
4009   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
4010   switch (Cnt) {
4011   case TargetLowering::ZeroOrOneBooleanContent:
4012     // An extended value of 1 is always true, unless its original type is i1,
4013     // in which case it will be sign extended to -1.
4014     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4015   case TargetLowering::UndefinedBooleanContent:
4016   case TargetLowering::ZeroOrNegativeOneBooleanContent:
4017     return N->isAllOnes() && SExt;
4018   }
4019   llvm_unreachable("Unexpected enumeration.");
4020 }
4021 
4022 /// This helper function of SimplifySetCC tries to optimize the comparison when
4023 /// either operand of the SetCC node is a bitwise-and instruction.
4024 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4025                                          ISD::CondCode Cond, const SDLoc &DL,
4026                                          DAGCombinerInfo &DCI) const {
4027   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4028     std::swap(N0, N1);
4029 
4030   SelectionDAG &DAG = DCI.DAG;
4031   EVT OpVT = N0.getValueType();
4032   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4033       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4034     return SDValue();
4035 
4036   // (X & Y) != 0 --> zextOrTrunc(X & Y)
4037   // iff everything but LSB is known zero:
4038   if (Cond == ISD::SETNE && isNullConstant(N1) &&
4039       (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
4040        getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
4041     unsigned NumEltBits = OpVT.getScalarSizeInBits();
4042     APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4043     if (DAG.MaskedValueIsZero(N0, UpperBits))
4044       return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4045   }
4046 
4047   // Try to eliminate a power-of-2 mask constant by converting to a signbit
4048   // test in a narrow type that we can truncate to with no cost. Examples:
4049   // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4050   // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4051   // TODO: This conservatively checks for type legality on the source and
4052   //       destination types. That may inhibit optimizations, but it also
4053   //       allows setcc->shift transforms that may be more beneficial.
4054   auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4055   if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4056       isTypeLegal(OpVT) && N0.hasOneUse()) {
4057     EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4058                                      AndC->getAPIntValue().getActiveBits());
4059     if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4060       SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4061       SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4062       return DAG.getSetCC(DL, VT, Trunc, Zero,
4063                           Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4064     }
4065   }
4066 
4067   // Match these patterns in any of their permutations:
4068   // (X & Y) == Y
4069   // (X & Y) != Y
4070   SDValue X, Y;
4071   if (N0.getOperand(0) == N1) {
4072     X = N0.getOperand(1);
4073     Y = N0.getOperand(0);
4074   } else if (N0.getOperand(1) == N1) {
4075     X = N0.getOperand(0);
4076     Y = N0.getOperand(1);
4077   } else {
4078     return SDValue();
4079   }
4080 
4081   // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4082   // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4083   // its liable to create and infinite loop.
4084   SDValue Zero = DAG.getConstant(0, DL, OpVT);
4085   if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4086       DAG.isKnownToBeAPowerOfTwo(Y)) {
4087     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4088     // Note that where Y is variable and is known to have at most one bit set
4089     // (for example, if it is Z & 1) we cannot do this; the expressions are not
4090     // equivalent when Y == 0.
4091     assert(OpVT.isInteger());
4092     Cond = ISD::getSetCCInverse(Cond, OpVT);
4093     if (DCI.isBeforeLegalizeOps() ||
4094         isCondCodeLegal(Cond, N0.getSimpleValueType()))
4095       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4096   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4097     // If the target supports an 'and-not' or 'and-complement' logic operation,
4098     // try to use that to make a comparison operation more efficient.
4099     // But don't do this transform if the mask is a single bit because there are
4100     // more efficient ways to deal with that case (for example, 'bt' on x86 or
4101     // 'rlwinm' on PPC).
4102 
4103     // Bail out if the compare operand that we want to turn into a zero is
4104     // already a zero (otherwise, infinite loop).
4105     if (isNullConstant(Y))
4106       return SDValue();
4107 
4108     // Transform this into: ~X & Y == 0.
4109     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4110     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4111     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4112   }
4113 
4114   return SDValue();
4115 }
4116 
4117 /// There are multiple IR patterns that could be checking whether certain
4118 /// truncation of a signed number would be lossy or not. The pattern which is
4119 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
4120 /// We are looking for the following pattern: (KeptBits is a constant)
4121 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4122 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4123 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
4124 /// We will unfold it into the natural trunc+sext pattern:
4125 ///   ((%x << C) a>> C) dstcond %x
4126 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
4127 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4128     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4129     const SDLoc &DL) const {
4130   // We must be comparing with a constant.
4131   ConstantSDNode *C1;
4132   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4133     return SDValue();
4134 
4135   // N0 should be:  add %x, (1 << (KeptBits-1))
4136   if (N0->getOpcode() != ISD::ADD)
4137     return SDValue();
4138 
4139   // And we must be 'add'ing a constant.
4140   ConstantSDNode *C01;
4141   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4142     return SDValue();
4143 
4144   SDValue X = N0->getOperand(0);
4145   EVT XVT = X.getValueType();
4146 
4147   // Validate constants ...
4148 
4149   APInt I1 = C1->getAPIntValue();
4150 
4151   ISD::CondCode NewCond;
4152   if (Cond == ISD::CondCode::SETULT) {
4153     NewCond = ISD::CondCode::SETEQ;
4154   } else if (Cond == ISD::CondCode::SETULE) {
4155     NewCond = ISD::CondCode::SETEQ;
4156     // But need to 'canonicalize' the constant.
4157     I1 += 1;
4158   } else if (Cond == ISD::CondCode::SETUGT) {
4159     NewCond = ISD::CondCode::SETNE;
4160     // But need to 'canonicalize' the constant.
4161     I1 += 1;
4162   } else if (Cond == ISD::CondCode::SETUGE) {
4163     NewCond = ISD::CondCode::SETNE;
4164   } else
4165     return SDValue();
4166 
4167   APInt I01 = C01->getAPIntValue();
4168 
4169   auto checkConstants = [&I1, &I01]() -> bool {
4170     // Both of them must be power-of-two, and the constant from setcc is bigger.
4171     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4172   };
4173 
4174   if (checkConstants()) {
4175     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
4176   } else {
4177     // What if we invert constants? (and the target predicate)
4178     I1.negate();
4179     I01.negate();
4180     assert(XVT.isInteger());
4181     NewCond = getSetCCInverse(NewCond, XVT);
4182     if (!checkConstants())
4183       return SDValue();
4184     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
4185   }
4186 
4187   // They are power-of-two, so which bit is set?
4188   const unsigned KeptBits = I1.logBase2();
4189   const unsigned KeptBitsMinusOne = I01.logBase2();
4190 
4191   // Magic!
4192   if (KeptBits != (KeptBitsMinusOne + 1))
4193     return SDValue();
4194   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4195 
4196   // We don't want to do this in every single case.
4197   SelectionDAG &DAG = DCI.DAG;
4198   if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4199     return SDValue();
4200 
4201   // Unfold into:  sext_inreg(%x) cond %x
4202   // Where 'cond' will be either 'eq' or 'ne'.
4203   SDValue SExtInReg = DAG.getNode(
4204       ISD::SIGN_EXTEND_INREG, DL, XVT, X,
4205       DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4206   return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4207 }
4208 
4209 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4210 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4211     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4212     DAGCombinerInfo &DCI, const SDLoc &DL) const {
4213   assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4214          "Should be a comparison with 0.");
4215   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4216          "Valid only for [in]equality comparisons.");
4217 
4218   unsigned NewShiftOpcode;
4219   SDValue X, C, Y;
4220 
4221   SelectionDAG &DAG = DCI.DAG;
4222 
4223   // Look for '(C l>>/<< Y)'.
4224   auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4225     // The shift should be one-use.
4226     if (!V.hasOneUse())
4227       return false;
4228     unsigned OldShiftOpcode = V.getOpcode();
4229     switch (OldShiftOpcode) {
4230     case ISD::SHL:
4231       NewShiftOpcode = ISD::SRL;
4232       break;
4233     case ISD::SRL:
4234       NewShiftOpcode = ISD::SHL;
4235       break;
4236     default:
4237       return false; // must be a logical shift.
4238     }
4239     // We should be shifting a constant.
4240     // FIXME: best to use isConstantOrConstantVector().
4241     C = V.getOperand(0);
4242     ConstantSDNode *CC =
4243         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4244     if (!CC)
4245       return false;
4246     Y = V.getOperand(1);
4247 
4248     ConstantSDNode *XC =
4249         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4250     return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4251         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4252   };
4253 
4254   // LHS of comparison should be an one-use 'and'.
4255   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4256     return SDValue();
4257 
4258   X = N0.getOperand(0);
4259   SDValue Mask = N0.getOperand(1);
4260 
4261   // 'and' is commutative!
4262   if (!Match(Mask)) {
4263     std::swap(X, Mask);
4264     if (!Match(Mask))
4265       return SDValue();
4266   }
4267 
4268   EVT VT = X.getValueType();
4269 
4270   // Produce:
4271   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4272   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4273   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4274   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4275   return T2;
4276 }
4277 
4278 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4279 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4280 /// handle the commuted versions of these patterns.
4281 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4282                                            ISD::CondCode Cond, const SDLoc &DL,
4283                                            DAGCombinerInfo &DCI) const {
4284   unsigned BOpcode = N0.getOpcode();
4285   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4286          "Unexpected binop");
4287   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4288 
4289   // (X + Y) == X --> Y == 0
4290   // (X - Y) == X --> Y == 0
4291   // (X ^ Y) == X --> Y == 0
4292   SelectionDAG &DAG = DCI.DAG;
4293   EVT OpVT = N0.getValueType();
4294   SDValue X = N0.getOperand(0);
4295   SDValue Y = N0.getOperand(1);
4296   if (X == N1)
4297     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4298 
4299   if (Y != N1)
4300     return SDValue();
4301 
4302   // (X + Y) == Y --> X == 0
4303   // (X ^ Y) == Y --> X == 0
4304   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4305     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4306 
4307   // The shift would not be valid if the operands are boolean (i1).
4308   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4309     return SDValue();
4310 
4311   // (X - Y) == Y --> X == Y << 1
4312   SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4313   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4314   if (!DCI.isCalledByLegalizer())
4315     DCI.AddToWorklist(YShl1.getNode());
4316   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4317 }
4318 
4319 static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4320                                       SDValue N0, const APInt &C1,
4321                                       ISD::CondCode Cond, const SDLoc &dl,
4322                                       SelectionDAG &DAG) {
4323   // Look through truncs that don't change the value of a ctpop.
4324   // FIXME: Add vector support? Need to be careful with setcc result type below.
4325   SDValue CTPOP = N0;
4326   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4327       N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
4328     CTPOP = N0.getOperand(0);
4329 
4330   if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4331     return SDValue();
4332 
4333   EVT CTVT = CTPOP.getValueType();
4334   SDValue CTOp = CTPOP.getOperand(0);
4335 
4336   // Expand a power-of-2-or-zero comparison based on ctpop:
4337   // (ctpop x) u< 2 -> (x & x-1) == 0
4338   // (ctpop x) u> 1 -> (x & x-1) != 0
4339   if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4340     // Keep the CTPOP if it is a cheap vector op.
4341     if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4342       return SDValue();
4343 
4344     unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4345     if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4346       return SDValue();
4347     if (C1 == 0 && (Cond == ISD::SETULT))
4348       return SDValue(); // This is handled elsewhere.
4349 
4350     unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4351 
4352     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4353     SDValue Result = CTOp;
4354     for (unsigned i = 0; i < Passes; i++) {
4355       SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4356       Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4357     }
4358     ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4359     return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4360   }
4361 
4362   // Expand a power-of-2 comparison based on ctpop
4363   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4364     // Keep the CTPOP if it is cheap.
4365     if (TLI.isCtpopFast(CTVT))
4366       return SDValue();
4367 
4368     SDValue Zero = DAG.getConstant(0, dl, CTVT);
4369     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4370     assert(CTVT.isInteger());
4371     SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4372 
4373     // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4374     // check before emitting a potentially unnecessary op.
4375     if (DAG.isKnownNeverZero(CTOp)) {
4376       // (ctpop x) == 1 --> (x & x-1) == 0
4377       // (ctpop x) != 1 --> (x & x-1) != 0
4378       SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4379       SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4380       return RHS;
4381     }
4382 
4383     // (ctpop x) == 1 --> (x ^ x-1) >  x-1
4384     // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4385     SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4386     ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4387     return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4388   }
4389 
4390   return SDValue();
4391 }
4392 
4393 static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4394                                    ISD::CondCode Cond, const SDLoc &dl,
4395                                    SelectionDAG &DAG) {
4396   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4397     return SDValue();
4398 
4399   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4400   if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4401     return SDValue();
4402 
4403   auto getRotateSource = [](SDValue X) {
4404     if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4405       return X.getOperand(0);
4406     return SDValue();
4407   };
4408 
4409   // Peek through a rotated value compared against 0 or -1:
4410   // (rot X, Y) == 0/-1 --> X == 0/-1
4411   // (rot X, Y) != 0/-1 --> X != 0/-1
4412   if (SDValue R = getRotateSource(N0))
4413     return DAG.getSetCC(dl, VT, R, N1, Cond);
4414 
4415   // Peek through an 'or' of a rotated value compared against 0:
4416   // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4417   // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4418   //
4419   // TODO: Add the 'and' with -1 sibling.
4420   // TODO: Recurse through a series of 'or' ops to find the rotate.
4421   EVT OpVT = N0.getValueType();
4422   if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4423     if (SDValue R = getRotateSource(N0.getOperand(0))) {
4424       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4425       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4426     }
4427     if (SDValue R = getRotateSource(N0.getOperand(1))) {
4428       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4429       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4430     }
4431   }
4432 
4433   return SDValue();
4434 }
4435 
4436 static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4437                                         ISD::CondCode Cond, const SDLoc &dl,
4438                                         SelectionDAG &DAG) {
4439   // If we are testing for all-bits-clear, we might be able to do that with
4440   // less shifting since bit-order does not matter.
4441   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4442     return SDValue();
4443 
4444   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4445   if (!C1 || !C1->isZero())
4446     return SDValue();
4447 
4448   if (!N0.hasOneUse() ||
4449       (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4450     return SDValue();
4451 
4452   unsigned BitWidth = N0.getScalarValueSizeInBits();
4453   auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4454   if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4455     return SDValue();
4456 
4457   // Canonicalize fshr as fshl to reduce pattern-matching.
4458   unsigned ShAmt = ShAmtC->getZExtValue();
4459   if (N0.getOpcode() == ISD::FSHR)
4460     ShAmt = BitWidth - ShAmt;
4461 
4462   // Match an 'or' with a specific operand 'Other' in either commuted variant.
4463   SDValue X, Y;
4464   auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4465     if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4466       return false;
4467     if (Or.getOperand(0) == Other) {
4468       X = Or.getOperand(0);
4469       Y = Or.getOperand(1);
4470       return true;
4471     }
4472     if (Or.getOperand(1) == Other) {
4473       X = Or.getOperand(1);
4474       Y = Or.getOperand(0);
4475       return true;
4476     }
4477     return false;
4478   };
4479 
4480   EVT OpVT = N0.getValueType();
4481   EVT ShAmtVT = N0.getOperand(2).getValueType();
4482   SDValue F0 = N0.getOperand(0);
4483   SDValue F1 = N0.getOperand(1);
4484   if (matchOr(F0, F1)) {
4485     // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4486     SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4487     SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4488     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4489     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4490   }
4491   if (matchOr(F1, F0)) {
4492     // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4493     SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4494     SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4495     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4496     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4497   }
4498 
4499   return SDValue();
4500 }
4501 
4502 /// Try to simplify a setcc built with the specified operands and cc. If it is
4503 /// unable to simplify it, return a null SDValue.
4504 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4505                                       ISD::CondCode Cond, bool foldBooleans,
4506                                       DAGCombinerInfo &DCI,
4507                                       const SDLoc &dl) const {
4508   SelectionDAG &DAG = DCI.DAG;
4509   const DataLayout &Layout = DAG.getDataLayout();
4510   EVT OpVT = N0.getValueType();
4511   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4512 
4513   // Constant fold or commute setcc.
4514   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4515     return Fold;
4516 
4517   bool N0ConstOrSplat =
4518       isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4519   bool N1ConstOrSplat =
4520       isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4521 
4522   // Canonicalize toward having the constant on the RHS.
4523   // TODO: Handle non-splat vector constants. All undef causes trouble.
4524   // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4525   // infinite loop here when we encounter one.
4526   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
4527   if (N0ConstOrSplat && !N1ConstOrSplat &&
4528       (DCI.isBeforeLegalizeOps() ||
4529        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4530     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4531 
4532   // If we have a subtract with the same 2 non-constant operands as this setcc
4533   // -- but in reverse order -- then try to commute the operands of this setcc
4534   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4535   // instruction on some targets.
4536   if (!N0ConstOrSplat && !N1ConstOrSplat &&
4537       (DCI.isBeforeLegalizeOps() ||
4538        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4539       DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4540       !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4541     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4542 
4543   if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4544     return V;
4545 
4546   if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4547     return V;
4548 
4549   if (auto *N1C = isConstOrConstSplat(N1)) {
4550     const APInt &C1 = N1C->getAPIntValue();
4551 
4552     // Optimize some CTPOP cases.
4553     if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4554       return V;
4555 
4556     // For equality to 0 of a no-wrap multiply, decompose and test each op:
4557     // X * Y == 0 --> (X == 0) || (Y == 0)
4558     // X * Y != 0 --> (X != 0) && (Y != 0)
4559     // TODO: This bails out if minsize is set, but if the target doesn't have a
4560     //       single instruction multiply for this type, it would likely be
4561     //       smaller to decompose.
4562     if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4563         N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4564         (N0->getFlags().hasNoUnsignedWrap() ||
4565          N0->getFlags().hasNoSignedWrap()) &&
4566         !Attr.hasFnAttr(Attribute::MinSize)) {
4567       SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4568       SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4569       unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4570       return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4571     }
4572 
4573     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4574     // equality comparison, then we're just comparing whether X itself is
4575     // zero.
4576     if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4577         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4578         llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4579       if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4580         if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4581             ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4582           if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4583             // (srl (ctlz x), 5) == 0  -> X != 0
4584             // (srl (ctlz x), 5) != 1  -> X != 0
4585             Cond = ISD::SETNE;
4586           } else {
4587             // (srl (ctlz x), 5) != 0  -> X == 0
4588             // (srl (ctlz x), 5) == 1  -> X == 0
4589             Cond = ISD::SETEQ;
4590           }
4591           SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4592           return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4593                               Cond);
4594         }
4595       }
4596     }
4597   }
4598 
4599   // FIXME: Support vectors.
4600   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4601     const APInt &C1 = N1C->getAPIntValue();
4602 
4603     // (zext x) == C --> x == (trunc C)
4604     // (sext x) == C --> x == (trunc C)
4605     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4606         DCI.isBeforeLegalize() && N0->hasOneUse()) {
4607       unsigned MinBits = N0.getValueSizeInBits();
4608       SDValue PreExt;
4609       bool Signed = false;
4610       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4611         // ZExt
4612         MinBits = N0->getOperand(0).getValueSizeInBits();
4613         PreExt = N0->getOperand(0);
4614       } else if (N0->getOpcode() == ISD::AND) {
4615         // DAGCombine turns costly ZExts into ANDs
4616         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4617           if ((C->getAPIntValue()+1).isPowerOf2()) {
4618             MinBits = C->getAPIntValue().countr_one();
4619             PreExt = N0->getOperand(0);
4620           }
4621       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4622         // SExt
4623         MinBits = N0->getOperand(0).getValueSizeInBits();
4624         PreExt = N0->getOperand(0);
4625         Signed = true;
4626       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4627         // ZEXTLOAD / SEXTLOAD
4628         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4629           MinBits = LN0->getMemoryVT().getSizeInBits();
4630           PreExt = N0;
4631         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4632           Signed = true;
4633           MinBits = LN0->getMemoryVT().getSizeInBits();
4634           PreExt = N0;
4635         }
4636       }
4637 
4638       // Figure out how many bits we need to preserve this constant.
4639       unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4640 
4641       // Make sure we're not losing bits from the constant.
4642       if (MinBits > 0 &&
4643           MinBits < C1.getBitWidth() &&
4644           MinBits >= ReqdBits) {
4645         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4646         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4647           // Will get folded away.
4648           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4649           if (MinBits == 1 && C1 == 1)
4650             // Invert the condition.
4651             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4652                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4653           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4654           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4655         }
4656 
4657         // If truncating the setcc operands is not desirable, we can still
4658         // simplify the expression in some cases:
4659         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4660         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4661         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4662         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4663         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4664         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4665         SDValue TopSetCC = N0->getOperand(0);
4666         unsigned N0Opc = N0->getOpcode();
4667         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4668         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4669             TopSetCC.getOpcode() == ISD::SETCC &&
4670             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4671             (isConstFalseVal(N1) ||
4672              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4673 
4674           bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4675                          (!N1C->isZero() && Cond == ISD::SETNE);
4676 
4677           if (!Inverse)
4678             return TopSetCC;
4679 
4680           ISD::CondCode InvCond = ISD::getSetCCInverse(
4681               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4682               TopSetCC.getOperand(0).getValueType());
4683           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4684                                       TopSetCC.getOperand(1),
4685                                       InvCond);
4686         }
4687       }
4688     }
4689 
4690     // If the LHS is '(and load, const)', the RHS is 0, the test is for
4691     // equality or unsigned, and all 1 bits of the const are in the same
4692     // partial word, see if we can shorten the load.
4693     if (DCI.isBeforeLegalize() &&
4694         !ISD::isSignedIntSetCC(Cond) &&
4695         N0.getOpcode() == ISD::AND && C1 == 0 &&
4696         N0.getNode()->hasOneUse() &&
4697         isa<LoadSDNode>(N0.getOperand(0)) &&
4698         N0.getOperand(0).getNode()->hasOneUse() &&
4699         isa<ConstantSDNode>(N0.getOperand(1))) {
4700       auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4701       APInt bestMask;
4702       unsigned bestWidth = 0, bestOffset = 0;
4703       if (Lod->isSimple() && Lod->isUnindexed() &&
4704           (Lod->getMemoryVT().isByteSized() ||
4705            isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4706         unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4707         unsigned origWidth = N0.getValueSizeInBits();
4708         unsigned maskWidth = origWidth;
4709         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4710         // 8 bits, but have to be careful...
4711         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4712           origWidth = Lod->getMemoryVT().getSizeInBits();
4713         const APInt &Mask = N0.getConstantOperandAPInt(1);
4714         // Only consider power-of-2 widths (and at least one byte) as candiates
4715         // for the narrowed load.
4716         for (unsigned width = 8; width < origWidth; width *= 2) {
4717           EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4718           if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4719             continue;
4720           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4721           // Avoid accessing any padding here for now (we could use memWidth
4722           // instead of origWidth here otherwise).
4723           unsigned maxOffset = origWidth - width;
4724           for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4725             if (Mask.isSubsetOf(newMask)) {
4726               unsigned ptrOffset =
4727                   Layout.isLittleEndian() ? offset : memWidth - width - offset;
4728               unsigned IsFast = 0;
4729               Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4730               if (allowsMemoryAccess(
4731                       *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4732                       NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4733                   IsFast) {
4734                 bestOffset = ptrOffset / 8;
4735                 bestMask = Mask.lshr(offset);
4736                 bestWidth = width;
4737                 break;
4738               }
4739             }
4740             newMask <<= 8;
4741           }
4742           if (bestWidth)
4743             break;
4744         }
4745       }
4746       if (bestWidth) {
4747         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4748         SDValue Ptr = Lod->getBasePtr();
4749         if (bestOffset != 0)
4750           Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4751         SDValue NewLoad =
4752             DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4753                         Lod->getPointerInfo().getWithOffset(bestOffset),
4754                         Lod->getOriginalAlign());
4755         SDValue And =
4756             DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4757                         DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4758         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4759       }
4760     }
4761 
4762     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4763     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4764       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4765 
4766       // If the comparison constant has bits in the upper part, the
4767       // zero-extended value could never match.
4768       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4769                                               C1.getBitWidth() - InSize))) {
4770         switch (Cond) {
4771         case ISD::SETUGT:
4772         case ISD::SETUGE:
4773         case ISD::SETEQ:
4774           return DAG.getConstant(0, dl, VT);
4775         case ISD::SETULT:
4776         case ISD::SETULE:
4777         case ISD::SETNE:
4778           return DAG.getConstant(1, dl, VT);
4779         case ISD::SETGT:
4780         case ISD::SETGE:
4781           // True if the sign bit of C1 is set.
4782           return DAG.getConstant(C1.isNegative(), dl, VT);
4783         case ISD::SETLT:
4784         case ISD::SETLE:
4785           // True if the sign bit of C1 isn't set.
4786           return DAG.getConstant(C1.isNonNegative(), dl, VT);
4787         default:
4788           break;
4789         }
4790       }
4791 
4792       // Otherwise, we can perform the comparison with the low bits.
4793       switch (Cond) {
4794       case ISD::SETEQ:
4795       case ISD::SETNE:
4796       case ISD::SETUGT:
4797       case ISD::SETUGE:
4798       case ISD::SETULT:
4799       case ISD::SETULE: {
4800         EVT newVT = N0.getOperand(0).getValueType();
4801         // FIXME: Should use isNarrowingProfitable.
4802         if (DCI.isBeforeLegalizeOps() ||
4803             (isOperationLegal(ISD::SETCC, newVT) &&
4804              isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4805              isTypeDesirableForOp(ISD::SETCC, newVT))) {
4806           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4807           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4808 
4809           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4810                                           NewConst, Cond);
4811           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4812         }
4813         break;
4814       }
4815       default:
4816         break; // todo, be more careful with signed comparisons
4817       }
4818     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4819                (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4820                !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4821                                       OpVT)) {
4822       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4823       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4824       EVT ExtDstTy = N0.getValueType();
4825       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4826 
4827       // If the constant doesn't fit into the number of bits for the source of
4828       // the sign extension, it is impossible for both sides to be equal.
4829       if (C1.getSignificantBits() > ExtSrcTyBits)
4830         return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4831 
4832       assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4833              ExtDstTy != ExtSrcTy && "Unexpected types!");
4834       APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4835       SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4836                                    DAG.getConstant(Imm, dl, ExtDstTy));
4837       if (!DCI.isCalledByLegalizer())
4838         DCI.AddToWorklist(ZextOp.getNode());
4839       // Otherwise, make this a use of a zext.
4840       return DAG.getSetCC(dl, VT, ZextOp,
4841                           DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4842     } else if ((N1C->isZero() || N1C->isOne()) &&
4843                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4844       // SETCC (X), [0|1], [EQ|NE]  -> X if X is known 0/1. i1 types are
4845       // excluded as they are handled below whilst checking for foldBooleans.
4846       if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4847           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4848           (N0.getValueType() == MVT::i1 ||
4849            getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4850           DAG.MaskedValueIsZero(
4851               N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {
4852         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4853         if (TrueWhenTrue)
4854           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4855         // Invert the condition.
4856         if (N0.getOpcode() == ISD::SETCC) {
4857           ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4858           CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4859           if (DCI.isBeforeLegalizeOps() ||
4860               isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4861             return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4862         }
4863       }
4864 
4865       if ((N0.getOpcode() == ISD::XOR ||
4866            (N0.getOpcode() == ISD::AND &&
4867             N0.getOperand(0).getOpcode() == ISD::XOR &&
4868             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4869           isOneConstant(N0.getOperand(1))) {
4870         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
4871         // can only do this if the top bits are known zero.
4872         unsigned BitWidth = N0.getValueSizeInBits();
4873         if (DAG.MaskedValueIsZero(N0,
4874                                   APInt::getHighBitsSet(BitWidth,
4875                                                         BitWidth-1))) {
4876           // Okay, get the un-inverted input value.
4877           SDValue Val;
4878           if (N0.getOpcode() == ISD::XOR) {
4879             Val = N0.getOperand(0);
4880           } else {
4881             assert(N0.getOpcode() == ISD::AND &&
4882                     N0.getOperand(0).getOpcode() == ISD::XOR);
4883             // ((X^1)&1)^1 -> X & 1
4884             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4885                               N0.getOperand(0).getOperand(0),
4886                               N0.getOperand(1));
4887           }
4888 
4889           return DAG.getSetCC(dl, VT, Val, N1,
4890                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4891         }
4892       } else if (N1C->isOne()) {
4893         SDValue Op0 = N0;
4894         if (Op0.getOpcode() == ISD::TRUNCATE)
4895           Op0 = Op0.getOperand(0);
4896 
4897         if ((Op0.getOpcode() == ISD::XOR) &&
4898             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4899             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4900           SDValue XorLHS = Op0.getOperand(0);
4901           SDValue XorRHS = Op0.getOperand(1);
4902           // Ensure that the input setccs return an i1 type or 0/1 value.
4903           if (Op0.getValueType() == MVT::i1 ||
4904               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4905                       ZeroOrOneBooleanContent &&
4906                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4907                         ZeroOrOneBooleanContent)) {
4908             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4909             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4910             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4911           }
4912         }
4913         if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4914           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4915           if (Op0.getValueType().bitsGT(VT))
4916             Op0 = DAG.getNode(ISD::AND, dl, VT,
4917                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4918                           DAG.getConstant(1, dl, VT));
4919           else if (Op0.getValueType().bitsLT(VT))
4920             Op0 = DAG.getNode(ISD::AND, dl, VT,
4921                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4922                         DAG.getConstant(1, dl, VT));
4923 
4924           return DAG.getSetCC(dl, VT, Op0,
4925                               DAG.getConstant(0, dl, Op0.getValueType()),
4926                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4927         }
4928         if (Op0.getOpcode() == ISD::AssertZext &&
4929             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4930           return DAG.getSetCC(dl, VT, Op0,
4931                               DAG.getConstant(0, dl, Op0.getValueType()),
4932                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4933       }
4934     }
4935 
4936     // Given:
4937     //   icmp eq/ne (urem %x, %y), 0
4938     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4939     //   icmp eq/ne %x, 0
4940     if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4941         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4942       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4943       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4944       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4945         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4946     }
4947 
4948     // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4949     //  and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4950     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4951         N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4952         N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4953         N1C->isAllOnes()) {
4954       return DAG.getSetCC(dl, VT, N0.getOperand(0),
4955                           DAG.getConstant(0, dl, OpVT),
4956                           Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4957     }
4958 
4959     if (SDValue V =
4960             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4961       return V;
4962   }
4963 
4964   // These simplifications apply to splat vectors as well.
4965   // TODO: Handle more splat vector cases.
4966   if (auto *N1C = isConstOrConstSplat(N1)) {
4967     const APInt &C1 = N1C->getAPIntValue();
4968 
4969     APInt MinVal, MaxVal;
4970     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4971     if (ISD::isSignedIntSetCC(Cond)) {
4972       MinVal = APInt::getSignedMinValue(OperandBitSize);
4973       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4974     } else {
4975       MinVal = APInt::getMinValue(OperandBitSize);
4976       MaxVal = APInt::getMaxValue(OperandBitSize);
4977     }
4978 
4979     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4980     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4981       // X >= MIN --> true
4982       if (C1 == MinVal)
4983         return DAG.getBoolConstant(true, dl, VT, OpVT);
4984 
4985       if (!VT.isVector()) { // TODO: Support this for vectors.
4986         // X >= C0 --> X > (C0 - 1)
4987         APInt C = C1 - 1;
4988         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4989         if ((DCI.isBeforeLegalizeOps() ||
4990              isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
4991             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4992                                   isLegalICmpImmediate(C.getSExtValue())))) {
4993           return DAG.getSetCC(dl, VT, N0,
4994                               DAG.getConstant(C, dl, N1.getValueType()),
4995                               NewCC);
4996         }
4997       }
4998     }
4999 
5000     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5001       // X <= MAX --> true
5002       if (C1 == MaxVal)
5003         return DAG.getBoolConstant(true, dl, VT, OpVT);
5004 
5005       // X <= C0 --> X < (C0 + 1)
5006       if (!VT.isVector()) { // TODO: Support this for vectors.
5007         APInt C = C1 + 1;
5008         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
5009         if ((DCI.isBeforeLegalizeOps() ||
5010              isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5011             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5012                                   isLegalICmpImmediate(C.getSExtValue())))) {
5013           return DAG.getSetCC(dl, VT, N0,
5014                               DAG.getConstant(C, dl, N1.getValueType()),
5015                               NewCC);
5016         }
5017       }
5018     }
5019 
5020     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5021       if (C1 == MinVal)
5022         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5023 
5024       // TODO: Support this for vectors after legalize ops.
5025       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5026         // Canonicalize setlt X, Max --> setne X, Max
5027         if (C1 == MaxVal)
5028           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5029 
5030         // If we have setult X, 1, turn it into seteq X, 0
5031         if (C1 == MinVal+1)
5032           return DAG.getSetCC(dl, VT, N0,
5033                               DAG.getConstant(MinVal, dl, N0.getValueType()),
5034                               ISD::SETEQ);
5035       }
5036     }
5037 
5038     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5039       if (C1 == MaxVal)
5040         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5041 
5042       // TODO: Support this for vectors after legalize ops.
5043       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5044         // Canonicalize setgt X, Min --> setne X, Min
5045         if (C1 == MinVal)
5046           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5047 
5048         // If we have setugt X, Max-1, turn it into seteq X, Max
5049         if (C1 == MaxVal-1)
5050           return DAG.getSetCC(dl, VT, N0,
5051                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
5052                               ISD::SETEQ);
5053       }
5054     }
5055 
5056     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5057       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
5058       if (C1.isZero())
5059         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5060                 VT, N0, N1, Cond, DCI, dl))
5061           return CC;
5062 
5063       // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5064       // For example, when high 32-bits of i64 X are known clear:
5065       // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0
5066       // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1
5067       bool CmpZero = N1C->isZero();
5068       bool CmpNegOne = N1C->isAllOnes();
5069       if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5070         // Match or(lo,shl(hi,bw/2)) pattern.
5071         auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5072           unsigned EltBits = V.getScalarValueSizeInBits();
5073           if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5074             return false;
5075           SDValue LHS = V.getOperand(0);
5076           SDValue RHS = V.getOperand(1);
5077           APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5078           // Unshifted element must have zero upperbits.
5079           if (RHS.getOpcode() == ISD::SHL &&
5080               isa<ConstantSDNode>(RHS.getOperand(1)) &&
5081               RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5082               DAG.MaskedValueIsZero(LHS, HiBits)) {
5083             Lo = LHS;
5084             Hi = RHS.getOperand(0);
5085             return true;
5086           }
5087           if (LHS.getOpcode() == ISD::SHL &&
5088               isa<ConstantSDNode>(LHS.getOperand(1)) &&
5089               LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5090               DAG.MaskedValueIsZero(RHS, HiBits)) {
5091             Lo = RHS;
5092             Hi = LHS.getOperand(0);
5093             return true;
5094           }
5095           return false;
5096         };
5097 
5098         auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5099           unsigned EltBits = N0.getScalarValueSizeInBits();
5100           unsigned HalfBits = EltBits / 2;
5101           APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5102           SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5103           SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5104           SDValue NewN0 =
5105               DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5106           SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5107           return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5108         };
5109 
5110         SDValue Lo, Hi;
5111         if (IsConcat(N0, Lo, Hi))
5112           return MergeConcat(Lo, Hi);
5113 
5114         if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5115           SDValue Lo0, Lo1, Hi0, Hi1;
5116           if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5117               IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5118             return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5119                                DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5120           }
5121         }
5122       }
5123     }
5124 
5125     // If we have "setcc X, C0", check to see if we can shrink the immediate
5126     // by changing cc.
5127     // TODO: Support this for vectors after legalize ops.
5128     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5129       // SETUGT X, SINTMAX  -> SETLT X, 0
5130       // SETUGE X, SINTMIN -> SETLT X, 0
5131       if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5132           (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5133         return DAG.getSetCC(dl, VT, N0,
5134                             DAG.getConstant(0, dl, N1.getValueType()),
5135                             ISD::SETLT);
5136 
5137       // SETULT X, SINTMIN  -> SETGT X, -1
5138       // SETULE X, SINTMAX  -> SETGT X, -1
5139       if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5140           (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5141         return DAG.getSetCC(dl, VT, N0,
5142                             DAG.getAllOnesConstant(dl, N1.getValueType()),
5143                             ISD::SETGT);
5144     }
5145   }
5146 
5147   // Back to non-vector simplifications.
5148   // TODO: Can we do these for vector splats?
5149   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5150     const APInt &C1 = N1C->getAPIntValue();
5151     EVT ShValTy = N0.getValueType();
5152 
5153     // Fold bit comparisons when we can. This will result in an
5154     // incorrect value when boolean false is negative one, unless
5155     // the bitsize is 1 in which case the false value is the same
5156     // in practice regardless of the representation.
5157     if ((VT.getSizeInBits() == 1 ||
5158          getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
5159         (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5160         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5161         N0.getOpcode() == ISD::AND) {
5162       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5163         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
5164           // Perform the xform if the AND RHS is a single bit.
5165           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5166           if (AndRHS->getAPIntValue().isPowerOf2() &&
5167               !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5168             return DAG.getNode(
5169                 ISD::TRUNCATE, dl, VT,
5170                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5171                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5172           }
5173         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5174           // (X & 8) == 8  -->  (X & 8) >> 3
5175           // Perform the xform if C1 is a single bit.
5176           unsigned ShCt = C1.logBase2();
5177           if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5178             return DAG.getNode(
5179                 ISD::TRUNCATE, dl, VT,
5180                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5181                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5182           }
5183         }
5184       }
5185     }
5186 
5187     if (C1.getSignificantBits() <= 64 &&
5188         !isLegalICmpImmediate(C1.getSExtValue())) {
5189       // (X & -256) == 256 -> (X >> 8) == 1
5190       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5191           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5192         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5193           const APInt &AndRHSC = AndRHS->getAPIntValue();
5194           if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5195             unsigned ShiftBits = AndRHSC.countr_zero();
5196             if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5197               SDValue Shift = DAG.getNode(
5198                   ISD::SRL, dl, ShValTy, N0.getOperand(0),
5199                   DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5200               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5201               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5202             }
5203           }
5204         }
5205       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5206                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5207         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5208         // X <  0x100000000 -> (X >> 32) <  1
5209         // X >= 0x100000000 -> (X >> 32) >= 1
5210         // X <= 0x0ffffffff -> (X >> 32) <  1
5211         // X >  0x0ffffffff -> (X >> 32) >= 1
5212         unsigned ShiftBits;
5213         APInt NewC = C1;
5214         ISD::CondCode NewCond = Cond;
5215         if (AdjOne) {
5216           ShiftBits = C1.countr_one();
5217           NewC = NewC + 1;
5218           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5219         } else {
5220           ShiftBits = C1.countr_zero();
5221         }
5222         NewC.lshrInPlace(ShiftBits);
5223         if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5224             isLegalICmpImmediate(NewC.getSExtValue()) &&
5225             !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5226           SDValue Shift =
5227               DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5228                           DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5229           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5230           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5231         }
5232       }
5233     }
5234   }
5235 
5236   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5237     auto *CFP = cast<ConstantFPSDNode>(N1);
5238     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5239 
5240     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
5241     // constant if knowing that the operand is non-nan is enough.  We prefer to
5242     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5243     // materialize 0.0.
5244     if (Cond == ISD::SETO || Cond == ISD::SETUO)
5245       return DAG.getSetCC(dl, VT, N0, N0, Cond);
5246 
5247     // setcc (fneg x), C -> setcc swap(pred) x, -C
5248     if (N0.getOpcode() == ISD::FNEG) {
5249       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
5250       if (DCI.isBeforeLegalizeOps() ||
5251           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5252         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5253         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5254       }
5255     }
5256 
5257     // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5258     if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
5259         !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5260       bool IsFabs = N0.getOpcode() == ISD::FABS;
5261       SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5262       if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5263         FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5264                                              : (IsFabs ? fcInf : fcPosInf);
5265         if (Cond == ISD::SETUEQ)
5266           Flag |= fcNan;
5267         return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5268                            DAG.getTargetConstant(Flag, dl, MVT::i32));
5269       }
5270     }
5271 
5272     // If the condition is not legal, see if we can find an equivalent one
5273     // which is legal.
5274     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
5275       // If the comparison was an awkward floating-point == or != and one of
5276       // the comparison operands is infinity or negative infinity, convert the
5277       // condition to a less-awkward <= or >=.
5278       if (CFP->getValueAPF().isInfinity()) {
5279         bool IsNegInf = CFP->getValueAPF().isNegative();
5280         ISD::CondCode NewCond = ISD::SETCC_INVALID;
5281         switch (Cond) {
5282         case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5283         case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5284         case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5285         case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5286         default: break;
5287         }
5288         if (NewCond != ISD::SETCC_INVALID &&
5289             isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5290           return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5291       }
5292     }
5293   }
5294 
5295   if (N0 == N1) {
5296     // The sext(setcc()) => setcc() optimization relies on the appropriate
5297     // constant being emitted.
5298     assert(!N0.getValueType().isInteger() &&
5299            "Integer types should be handled by FoldSetCC");
5300 
5301     bool EqTrue = ISD::isTrueWhenEqual(Cond);
5302     unsigned UOF = ISD::getUnorderedFlavor(Cond);
5303     if (UOF == 2) // FP operators that are undefined on NaNs.
5304       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5305     if (UOF == unsigned(EqTrue))
5306       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5307     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
5308     // if it is not already.
5309     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5310     if (NewCond != Cond &&
5311         (DCI.isBeforeLegalizeOps() ||
5312                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5313       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5314   }
5315 
5316   // ~X > ~Y --> Y > X
5317   // ~X < ~Y --> Y < X
5318   // ~X < C --> X > ~C
5319   // ~X > C --> X < ~C
5320   if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5321       N0.getValueType().isInteger()) {
5322     if (isBitwiseNot(N0)) {
5323       if (isBitwiseNot(N1))
5324         return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5325 
5326       if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
5327           !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
5328         SDValue Not = DAG.getNOT(dl, N1, OpVT);
5329         return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5330       }
5331     }
5332   }
5333 
5334   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5335       N0.getValueType().isInteger()) {
5336     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5337         N0.getOpcode() == ISD::XOR) {
5338       // Simplify (X+Y) == (X+Z) -->  Y == Z
5339       if (N0.getOpcode() == N1.getOpcode()) {
5340         if (N0.getOperand(0) == N1.getOperand(0))
5341           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5342         if (N0.getOperand(1) == N1.getOperand(1))
5343           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5344         if (isCommutativeBinOp(N0.getOpcode())) {
5345           // If X op Y == Y op X, try other combinations.
5346           if (N0.getOperand(0) == N1.getOperand(1))
5347             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5348                                 Cond);
5349           if (N0.getOperand(1) == N1.getOperand(0))
5350             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5351                                 Cond);
5352         }
5353       }
5354 
5355       // If RHS is a legal immediate value for a compare instruction, we need
5356       // to be careful about increasing register pressure needlessly.
5357       bool LegalRHSImm = false;
5358 
5359       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5360         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5361           // Turn (X+C1) == C2 --> X == C2-C1
5362           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5363             return DAG.getSetCC(
5364                 dl, VT, N0.getOperand(0),
5365                 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5366                                 dl, N0.getValueType()),
5367                 Cond);
5368 
5369           // Turn (X^C1) == C2 --> X == C1^C2
5370           if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5371             return DAG.getSetCC(
5372                 dl, VT, N0.getOperand(0),
5373                 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5374                                 dl, N0.getValueType()),
5375                 Cond);
5376         }
5377 
5378         // Turn (C1-X) == C2 --> X == C1-C2
5379         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5380           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5381             return DAG.getSetCC(
5382                 dl, VT, N0.getOperand(1),
5383                 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5384                                 dl, N0.getValueType()),
5385                 Cond);
5386 
5387         // Could RHSC fold directly into a compare?
5388         if (RHSC->getValueType(0).getSizeInBits() <= 64)
5389           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5390       }
5391 
5392       // (X+Y) == X --> Y == 0 and similar folds.
5393       // Don't do this if X is an immediate that can fold into a cmp
5394       // instruction and X+Y has other uses. It could be an induction variable
5395       // chain, and the transform would increase register pressure.
5396       if (!LegalRHSImm || N0.hasOneUse())
5397         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5398           return V;
5399     }
5400 
5401     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5402         N1.getOpcode() == ISD::XOR)
5403       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5404         return V;
5405 
5406     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5407       return V;
5408   }
5409 
5410   // Fold remainder of division by a constant.
5411   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5412       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5413     // When division is cheap or optimizing for minimum size,
5414     // fall through to DIVREM creation by skipping this fold.
5415     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5416       if (N0.getOpcode() == ISD::UREM) {
5417         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5418           return Folded;
5419       } else if (N0.getOpcode() == ISD::SREM) {
5420         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5421           return Folded;
5422       }
5423     }
5424   }
5425 
5426   // Fold away ALL boolean setcc's.
5427   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5428     SDValue Temp;
5429     switch (Cond) {
5430     default: llvm_unreachable("Unknown integer setcc!");
5431     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
5432       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5433       N0 = DAG.getNOT(dl, Temp, OpVT);
5434       if (!DCI.isCalledByLegalizer())
5435         DCI.AddToWorklist(Temp.getNode());
5436       break;
5437     case ISD::SETNE:  // X != Y   -->  (X^Y)
5438       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5439       break;
5440     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
5441     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
5442       Temp = DAG.getNOT(dl, N0, OpVT);
5443       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5444       if (!DCI.isCalledByLegalizer())
5445         DCI.AddToWorklist(Temp.getNode());
5446       break;
5447     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
5448     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
5449       Temp = DAG.getNOT(dl, N1, OpVT);
5450       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5451       if (!DCI.isCalledByLegalizer())
5452         DCI.AddToWorklist(Temp.getNode());
5453       break;
5454     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
5455     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
5456       Temp = DAG.getNOT(dl, N0, OpVT);
5457       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5458       if (!DCI.isCalledByLegalizer())
5459         DCI.AddToWorklist(Temp.getNode());
5460       break;
5461     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
5462     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
5463       Temp = DAG.getNOT(dl, N1, OpVT);
5464       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5465       break;
5466     }
5467     if (VT.getScalarType() != MVT::i1) {
5468       if (!DCI.isCalledByLegalizer())
5469         DCI.AddToWorklist(N0.getNode());
5470       // FIXME: If running after legalize, we probably can't do this.
5471       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
5472       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5473     }
5474     return N0;
5475   }
5476 
5477   // Could not fold it.
5478   return SDValue();
5479 }
5480 
5481 /// Returns true (and the GlobalValue and the offset) if the node is a
5482 /// GlobalAddress + offset.
5483 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5484                                     int64_t &Offset) const {
5485 
5486   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5487 
5488   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5489     GA = GASD->getGlobal();
5490     Offset += GASD->getOffset();
5491     return true;
5492   }
5493 
5494   if (N->getOpcode() == ISD::ADD) {
5495     SDValue N1 = N->getOperand(0);
5496     SDValue N2 = N->getOperand(1);
5497     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5498       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5499         Offset += V->getSExtValue();
5500         return true;
5501       }
5502     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5503       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5504         Offset += V->getSExtValue();
5505         return true;
5506       }
5507     }
5508   }
5509 
5510   return false;
5511 }
5512 
5513 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5514                                           DAGCombinerInfo &DCI) const {
5515   // Default implementation: no optimization.
5516   return SDValue();
5517 }
5518 
5519 //===----------------------------------------------------------------------===//
5520 //  Inline Assembler Implementation Methods
5521 //===----------------------------------------------------------------------===//
5522 
5523 TargetLowering::ConstraintType
5524 TargetLowering::getConstraintType(StringRef Constraint) const {
5525   unsigned S = Constraint.size();
5526 
5527   if (S == 1) {
5528     switch (Constraint[0]) {
5529     default: break;
5530     case 'r':
5531       return C_RegisterClass;
5532     case 'm': // memory
5533     case 'o': // offsetable
5534     case 'V': // not offsetable
5535       return C_Memory;
5536     case 'p': // Address.
5537       return C_Address;
5538     case 'n': // Simple Integer
5539     case 'E': // Floating Point Constant
5540     case 'F': // Floating Point Constant
5541       return C_Immediate;
5542     case 'i': // Simple Integer or Relocatable Constant
5543     case 's': // Relocatable Constant
5544     case 'X': // Allow ANY value.
5545     case 'I': // Target registers.
5546     case 'J':
5547     case 'K':
5548     case 'L':
5549     case 'M':
5550     case 'N':
5551     case 'O':
5552     case 'P':
5553     case '<':
5554     case '>':
5555       return C_Other;
5556     }
5557   }
5558 
5559   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5560     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5561       return C_Memory;
5562     return C_Register;
5563   }
5564   return C_Unknown;
5565 }
5566 
5567 /// Try to replace an X constraint, which matches anything, with another that
5568 /// has more specific requirements based on the type of the corresponding
5569 /// operand.
5570 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5571   if (ConstraintVT.isInteger())
5572     return "r";
5573   if (ConstraintVT.isFloatingPoint())
5574     return "f"; // works for many targets
5575   return nullptr;
5576 }
5577 
5578 SDValue TargetLowering::LowerAsmOutputForConstraint(
5579     SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5580     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5581   return SDValue();
5582 }
5583 
5584 /// Lower the specified operand into the Ops vector.
5585 /// If it is invalid, don't add anything to Ops.
5586 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5587                                                   StringRef Constraint,
5588                                                   std::vector<SDValue> &Ops,
5589                                                   SelectionDAG &DAG) const {
5590 
5591   if (Constraint.size() > 1)
5592     return;
5593 
5594   char ConstraintLetter = Constraint[0];
5595   switch (ConstraintLetter) {
5596   default: break;
5597   case 'X':    // Allows any operand
5598   case 'i':    // Simple Integer or Relocatable Constant
5599   case 'n':    // Simple Integer
5600   case 's': {  // Relocatable Constant
5601 
5602     ConstantSDNode *C;
5603     uint64_t Offset = 0;
5604 
5605     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5606     // etc., since getelementpointer is variadic. We can't use
5607     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5608     // while in this case the GA may be furthest from the root node which is
5609     // likely an ISD::ADD.
5610     while (true) {
5611       if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5612         // gcc prints these as sign extended.  Sign extend value to 64 bits
5613         // now; without this it would get ZExt'd later in
5614         // ScheduleDAGSDNodes::EmitNode, which is very generic.
5615         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5616         BooleanContent BCont = getBooleanContents(MVT::i64);
5617         ISD::NodeType ExtOpc =
5618             IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5619         int64_t ExtVal =
5620             ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5621         Ops.push_back(
5622             DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5623         return;
5624       }
5625       if (ConstraintLetter != 'n') {
5626         if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5627           Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5628                                                    GA->getValueType(0),
5629                                                    Offset + GA->getOffset()));
5630           return;
5631         }
5632         if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5633           Ops.push_back(DAG.getTargetBlockAddress(
5634               BA->getBlockAddress(), BA->getValueType(0),
5635               Offset + BA->getOffset(), BA->getTargetFlags()));
5636           return;
5637         }
5638         if (isa<BasicBlockSDNode>(Op)) {
5639           Ops.push_back(Op);
5640           return;
5641         }
5642       }
5643       const unsigned OpCode = Op.getOpcode();
5644       if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5645         if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5646           Op = Op.getOperand(1);
5647         // Subtraction is not commutative.
5648         else if (OpCode == ISD::ADD &&
5649                  (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5650           Op = Op.getOperand(0);
5651         else
5652           return;
5653         Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5654         continue;
5655       }
5656       return;
5657     }
5658     break;
5659   }
5660   }
5661 }
5662 
5663 void TargetLowering::CollectTargetIntrinsicOperands(
5664     const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5665 }
5666 
5667 std::pair<unsigned, const TargetRegisterClass *>
5668 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5669                                              StringRef Constraint,
5670                                              MVT VT) const {
5671   if (!Constraint.starts_with("{"))
5672     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5673   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5674 
5675   // Remove the braces from around the name.
5676   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5677 
5678   std::pair<unsigned, const TargetRegisterClass *> R =
5679       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5680 
5681   // Figure out which register class contains this reg.
5682   for (const TargetRegisterClass *RC : RI->regclasses()) {
5683     // If none of the value types for this register class are valid, we
5684     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5685     if (!isLegalRC(*RI, *RC))
5686       continue;
5687 
5688     for (const MCPhysReg &PR : *RC) {
5689       if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5690         std::pair<unsigned, const TargetRegisterClass *> S =
5691             std::make_pair(PR, RC);
5692 
5693         // If this register class has the requested value type, return it,
5694         // otherwise keep searching and return the first class found
5695         // if no other is found which explicitly has the requested type.
5696         if (RI->isTypeLegalForClass(*RC, VT))
5697           return S;
5698         if (!R.second)
5699           R = S;
5700       }
5701     }
5702   }
5703 
5704   return R;
5705 }
5706 
5707 //===----------------------------------------------------------------------===//
5708 // Constraint Selection.
5709 
5710 /// Return true of this is an input operand that is a matching constraint like
5711 /// "4".
5712 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5713   assert(!ConstraintCode.empty() && "No known constraint!");
5714   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5715 }
5716 
5717 /// If this is an input matching constraint, this method returns the output
5718 /// operand it matches.
5719 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5720   assert(!ConstraintCode.empty() && "No known constraint!");
5721   return atoi(ConstraintCode.c_str());
5722 }
5723 
5724 /// Split up the constraint string from the inline assembly value into the
5725 /// specific constraints and their prefixes, and also tie in the associated
5726 /// operand values.
5727 /// If this returns an empty vector, and if the constraint string itself
5728 /// isn't empty, there was an error parsing.
5729 TargetLowering::AsmOperandInfoVector
5730 TargetLowering::ParseConstraints(const DataLayout &DL,
5731                                  const TargetRegisterInfo *TRI,
5732                                  const CallBase &Call) const {
5733   /// Information about all of the constraints.
5734   AsmOperandInfoVector ConstraintOperands;
5735   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5736   unsigned maCount = 0; // Largest number of multiple alternative constraints.
5737 
5738   // Do a prepass over the constraints, canonicalizing them, and building up the
5739   // ConstraintOperands list.
5740   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5741   unsigned ResNo = 0; // ResNo - The result number of the next output.
5742   unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5743 
5744   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5745     ConstraintOperands.emplace_back(std::move(CI));
5746     AsmOperandInfo &OpInfo = ConstraintOperands.back();
5747 
5748     // Update multiple alternative constraint count.
5749     if (OpInfo.multipleAlternatives.size() > maCount)
5750       maCount = OpInfo.multipleAlternatives.size();
5751 
5752     OpInfo.ConstraintVT = MVT::Other;
5753 
5754     // Compute the value type for each operand.
5755     switch (OpInfo.Type) {
5756     case InlineAsm::isOutput:
5757       // Indirect outputs just consume an argument.
5758       if (OpInfo.isIndirect) {
5759         OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5760         break;
5761       }
5762 
5763       // The return value of the call is this value.  As such, there is no
5764       // corresponding argument.
5765       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5766       if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5767         OpInfo.ConstraintVT =
5768             getAsmOperandValueType(DL, STy->getElementType(ResNo))
5769                 .getSimpleVT();
5770       } else {
5771         assert(ResNo == 0 && "Asm only has one result!");
5772         OpInfo.ConstraintVT =
5773             getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5774       }
5775       ++ResNo;
5776       break;
5777     case InlineAsm::isInput:
5778       OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5779       break;
5780     case InlineAsm::isLabel:
5781       OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5782       ++LabelNo;
5783       continue;
5784     case InlineAsm::isClobber:
5785       // Nothing to do.
5786       break;
5787     }
5788 
5789     if (OpInfo.CallOperandVal) {
5790       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5791       if (OpInfo.isIndirect) {
5792         OpTy = Call.getParamElementType(ArgNo);
5793         assert(OpTy && "Indirect operand must have elementtype attribute");
5794       }
5795 
5796       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5797       if (StructType *STy = dyn_cast<StructType>(OpTy))
5798         if (STy->getNumElements() == 1)
5799           OpTy = STy->getElementType(0);
5800 
5801       // If OpTy is not a single value, it may be a struct/union that we
5802       // can tile with integers.
5803       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5804         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5805         switch (BitSize) {
5806         default: break;
5807         case 1:
5808         case 8:
5809         case 16:
5810         case 32:
5811         case 64:
5812         case 128:
5813           OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5814           break;
5815         }
5816       }
5817 
5818       EVT VT = getAsmOperandValueType(DL, OpTy, true);
5819       OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5820       ArgNo++;
5821     }
5822   }
5823 
5824   // If we have multiple alternative constraints, select the best alternative.
5825   if (!ConstraintOperands.empty()) {
5826     if (maCount) {
5827       unsigned bestMAIndex = 0;
5828       int bestWeight = -1;
5829       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
5830       int weight = -1;
5831       unsigned maIndex;
5832       // Compute the sums of the weights for each alternative, keeping track
5833       // of the best (highest weight) one so far.
5834       for (maIndex = 0; maIndex < maCount; ++maIndex) {
5835         int weightSum = 0;
5836         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5837              cIndex != eIndex; ++cIndex) {
5838           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5839           if (OpInfo.Type == InlineAsm::isClobber)
5840             continue;
5841 
5842           // If this is an output operand with a matching input operand,
5843           // look up the matching input. If their types mismatch, e.g. one
5844           // is an integer, the other is floating point, or their sizes are
5845           // different, flag it as an maCantMatch.
5846           if (OpInfo.hasMatchingInput()) {
5847             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5848             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5849               if ((OpInfo.ConstraintVT.isInteger() !=
5850                    Input.ConstraintVT.isInteger()) ||
5851                   (OpInfo.ConstraintVT.getSizeInBits() !=
5852                    Input.ConstraintVT.getSizeInBits())) {
5853                 weightSum = -1; // Can't match.
5854                 break;
5855               }
5856             }
5857           }
5858           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5859           if (weight == -1) {
5860             weightSum = -1;
5861             break;
5862           }
5863           weightSum += weight;
5864         }
5865         // Update best.
5866         if (weightSum > bestWeight) {
5867           bestWeight = weightSum;
5868           bestMAIndex = maIndex;
5869         }
5870       }
5871 
5872       // Now select chosen alternative in each constraint.
5873       for (AsmOperandInfo &cInfo : ConstraintOperands)
5874         if (cInfo.Type != InlineAsm::isClobber)
5875           cInfo.selectAlternative(bestMAIndex);
5876     }
5877   }
5878 
5879   // Check and hook up tied operands, choose constraint code to use.
5880   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5881        cIndex != eIndex; ++cIndex) {
5882     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5883 
5884     // If this is an output operand with a matching input operand, look up the
5885     // matching input. If their types mismatch, e.g. one is an integer, the
5886     // other is floating point, or their sizes are different, flag it as an
5887     // error.
5888     if (OpInfo.hasMatchingInput()) {
5889       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5890 
5891       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5892         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5893             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5894                                          OpInfo.ConstraintVT);
5895         std::pair<unsigned, const TargetRegisterClass *> InputRC =
5896             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5897                                          Input.ConstraintVT);
5898         const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
5899                                     OpInfo.ConstraintVT.isFloatingPoint();
5900         const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
5901                                    Input.ConstraintVT.isFloatingPoint();
5902         if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
5903             (MatchRC.second != InputRC.second)) {
5904           report_fatal_error("Unsupported asm: input constraint"
5905                              " with a matching output constraint of"
5906                              " incompatible type!");
5907         }
5908       }
5909     }
5910   }
5911 
5912   return ConstraintOperands;
5913 }
5914 
5915 /// Return a number indicating our preference for chosing a type of constraint
5916 /// over another, for the purpose of sorting them. Immediates are almost always
5917 /// preferrable (when they can be emitted). A higher return value means a
5918 /// stronger preference for one constraint type relative to another.
5919 /// FIXME: We should prefer registers over memory but doing so may lead to
5920 /// unrecoverable register exhaustion later.
5921 /// https://github.com/llvm/llvm-project/issues/20571
5922 static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5923   switch (CT) {
5924   case TargetLowering::C_Immediate:
5925   case TargetLowering::C_Other:
5926     return 4;
5927   case TargetLowering::C_Memory:
5928   case TargetLowering::C_Address:
5929     return 3;
5930   case TargetLowering::C_RegisterClass:
5931     return 2;
5932   case TargetLowering::C_Register:
5933     return 1;
5934   case TargetLowering::C_Unknown:
5935     return 0;
5936   }
5937   llvm_unreachable("Invalid constraint type");
5938 }
5939 
5940 /// Examine constraint type and operand type and determine a weight value.
5941 /// This object must already have been set up with the operand type
5942 /// and the current alternative constraint selected.
5943 TargetLowering::ConstraintWeight
5944   TargetLowering::getMultipleConstraintMatchWeight(
5945     AsmOperandInfo &info, int maIndex) const {
5946   InlineAsm::ConstraintCodeVector *rCodes;
5947   if (maIndex >= (int)info.multipleAlternatives.size())
5948     rCodes = &info.Codes;
5949   else
5950     rCodes = &info.multipleAlternatives[maIndex].Codes;
5951   ConstraintWeight BestWeight = CW_Invalid;
5952 
5953   // Loop over the options, keeping track of the most general one.
5954   for (const std::string &rCode : *rCodes) {
5955     ConstraintWeight weight =
5956         getSingleConstraintMatchWeight(info, rCode.c_str());
5957     if (weight > BestWeight)
5958       BestWeight = weight;
5959   }
5960 
5961   return BestWeight;
5962 }
5963 
5964 /// Examine constraint type and operand type and determine a weight value.
5965 /// This object must already have been set up with the operand type
5966 /// and the current alternative constraint selected.
5967 TargetLowering::ConstraintWeight
5968   TargetLowering::getSingleConstraintMatchWeight(
5969     AsmOperandInfo &info, const char *constraint) const {
5970   ConstraintWeight weight = CW_Invalid;
5971   Value *CallOperandVal = info.CallOperandVal;
5972     // If we don't have a value, we can't do a match,
5973     // but allow it at the lowest weight.
5974   if (!CallOperandVal)
5975     return CW_Default;
5976   // Look at the constraint type.
5977   switch (*constraint) {
5978     case 'i': // immediate integer.
5979     case 'n': // immediate integer with a known value.
5980       if (isa<ConstantInt>(CallOperandVal))
5981         weight = CW_Constant;
5982       break;
5983     case 's': // non-explicit intregal immediate.
5984       if (isa<GlobalValue>(CallOperandVal))
5985         weight = CW_Constant;
5986       break;
5987     case 'E': // immediate float if host format.
5988     case 'F': // immediate float.
5989       if (isa<ConstantFP>(CallOperandVal))
5990         weight = CW_Constant;
5991       break;
5992     case '<': // memory operand with autodecrement.
5993     case '>': // memory operand with autoincrement.
5994     case 'm': // memory operand.
5995     case 'o': // offsettable memory operand
5996     case 'V': // non-offsettable memory operand
5997       weight = CW_Memory;
5998       break;
5999     case 'r': // general register.
6000     case 'g': // general register, memory operand or immediate integer.
6001               // note: Clang converts "g" to "imr".
6002       if (CallOperandVal->getType()->isIntegerTy())
6003         weight = CW_Register;
6004       break;
6005     case 'X': // any operand.
6006   default:
6007     weight = CW_Default;
6008     break;
6009   }
6010   return weight;
6011 }
6012 
6013 /// If there are multiple different constraints that we could pick for this
6014 /// operand (e.g. "imr") try to pick the 'best' one.
6015 /// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6016 /// into seven classes:
6017 ///    Register      -> one specific register
6018 ///    RegisterClass -> a group of regs
6019 ///    Memory        -> memory
6020 ///    Address       -> a symbolic memory reference
6021 ///    Immediate     -> immediate values
6022 ///    Other         -> magic values (such as "Flag Output Operands")
6023 ///    Unknown       -> something we don't recognize yet and can't handle
6024 /// Ideally, we would pick the most specific constraint possible: if we have
6025 /// something that fits into a register, we would pick it.  The problem here
6026 /// is that if we have something that could either be in a register or in
6027 /// memory that use of the register could cause selection of *other*
6028 /// operands to fail: they might only succeed if we pick memory.  Because of
6029 /// this the heuristic we use is:
6030 ///
6031 ///  1) If there is an 'other' constraint, and if the operand is valid for
6032 ///     that constraint, use it.  This makes us take advantage of 'i'
6033 ///     constraints when available.
6034 ///  2) Otherwise, pick the most general constraint present.  This prefers
6035 ///     'm' over 'r', for example.
6036 ///
6037 TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
6038     TargetLowering::AsmOperandInfo &OpInfo) const {
6039   ConstraintGroup Ret;
6040 
6041   Ret.reserve(OpInfo.Codes.size());
6042   for (StringRef Code : OpInfo.Codes) {
6043     TargetLowering::ConstraintType CType = getConstraintType(Code);
6044 
6045     // Indirect 'other' or 'immediate' constraints are not allowed.
6046     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6047                                CType == TargetLowering::C_Register ||
6048                                CType == TargetLowering::C_RegisterClass))
6049       continue;
6050 
6051     // Things with matching constraints can only be registers, per gcc
6052     // documentation.  This mainly affects "g" constraints.
6053     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6054       continue;
6055 
6056     Ret.emplace_back(Code, CType);
6057   }
6058 
6059   std::stable_sort(
6060       Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
6061         return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6062       });
6063 
6064   return Ret;
6065 }
6066 
6067 /// If we have an immediate, see if we can lower it. Return true if we can,
6068 /// false otherwise.
6069 static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6070                                      SDValue Op, SelectionDAG *DAG,
6071                                      const TargetLowering &TLI) {
6072 
6073   assert((P.second == TargetLowering::C_Other ||
6074           P.second == TargetLowering::C_Immediate) &&
6075          "need immediate or other");
6076 
6077   if (!Op.getNode())
6078     return false;
6079 
6080   std::vector<SDValue> ResultOps;
6081   TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6082   return !ResultOps.empty();
6083 }
6084 
6085 /// Determines the constraint code and constraint type to use for the specific
6086 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6087 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6088                                             SDValue Op,
6089                                             SelectionDAG *DAG) const {
6090   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6091 
6092   // Single-letter constraints ('r') are very common.
6093   if (OpInfo.Codes.size() == 1) {
6094     OpInfo.ConstraintCode = OpInfo.Codes[0];
6095     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6096   } else {
6097     ConstraintGroup G = getConstraintPreferences(OpInfo);
6098     if (G.empty())
6099       return;
6100 
6101     unsigned BestIdx = 0;
6102     for (const unsigned E = G.size();
6103          BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6104                          G[BestIdx].second == TargetLowering::C_Immediate);
6105          ++BestIdx) {
6106       if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6107         break;
6108       // If we're out of constraints, just pick the first one.
6109       if (BestIdx + 1 == E) {
6110         BestIdx = 0;
6111         break;
6112       }
6113     }
6114 
6115     OpInfo.ConstraintCode = G[BestIdx].first;
6116     OpInfo.ConstraintType = G[BestIdx].second;
6117   }
6118 
6119   // 'X' matches anything.
6120   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6121     // Constants are handled elsewhere.  For Functions, the type here is the
6122     // type of the result, which is not what we want to look at; leave them
6123     // alone.
6124     Value *v = OpInfo.CallOperandVal;
6125     if (isa<ConstantInt>(v) || isa<Function>(v)) {
6126       return;
6127     }
6128 
6129     if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6130       OpInfo.ConstraintCode = "i";
6131       return;
6132     }
6133 
6134     // Otherwise, try to resolve it to something we know about by looking at
6135     // the actual operand type.
6136     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6137       OpInfo.ConstraintCode = Repl;
6138       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6139     }
6140   }
6141 }
6142 
6143 /// Given an exact SDIV by a constant, create a multiplication
6144 /// with the multiplicative inverse of the constant.
6145 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6146 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6147                               const SDLoc &dl, SelectionDAG &DAG,
6148                               SmallVectorImpl<SDNode *> &Created) {
6149   SDValue Op0 = N->getOperand(0);
6150   SDValue Op1 = N->getOperand(1);
6151   EVT VT = N->getValueType(0);
6152   EVT SVT = VT.getScalarType();
6153   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6154   EVT ShSVT = ShVT.getScalarType();
6155 
6156   bool UseSRA = false;
6157   SmallVector<SDValue, 16> Shifts, Factors;
6158 
6159   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6160     if (C->isZero())
6161       return false;
6162     APInt Divisor = C->getAPIntValue();
6163     unsigned Shift = Divisor.countr_zero();
6164     if (Shift) {
6165       Divisor.ashrInPlace(Shift);
6166       UseSRA = true;
6167     }
6168     APInt Factor = Divisor.multiplicativeInverse();
6169     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6170     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6171     return true;
6172   };
6173 
6174   // Collect all magic values from the build vector.
6175   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6176     return SDValue();
6177 
6178   SDValue Shift, Factor;
6179   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6180     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6181     Factor = DAG.getBuildVector(VT, dl, Factors);
6182   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6183     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6184            "Expected matchUnaryPredicate to return one element for scalable "
6185            "vectors");
6186     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6187     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6188   } else {
6189     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6190     Shift = Shifts[0];
6191     Factor = Factors[0];
6192   }
6193 
6194   SDValue Res = Op0;
6195   if (UseSRA) {
6196     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6197     Created.push_back(Res.getNode());
6198   }
6199 
6200   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6201 }
6202 
6203 /// Given an exact UDIV by a constant, create a multiplication
6204 /// with the multiplicative inverse of the constant.
6205 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6206 static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6207                               const SDLoc &dl, SelectionDAG &DAG,
6208                               SmallVectorImpl<SDNode *> &Created) {
6209   EVT VT = N->getValueType(0);
6210   EVT SVT = VT.getScalarType();
6211   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6212   EVT ShSVT = ShVT.getScalarType();
6213 
6214   bool UseSRL = false;
6215   SmallVector<SDValue, 16> Shifts, Factors;
6216 
6217   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6218     if (C->isZero())
6219       return false;
6220     APInt Divisor = C->getAPIntValue();
6221     unsigned Shift = Divisor.countr_zero();
6222     if (Shift) {
6223       Divisor.lshrInPlace(Shift);
6224       UseSRL = true;
6225     }
6226     // Calculate the multiplicative inverse modulo BW.
6227     APInt Factor = Divisor.multiplicativeInverse();
6228     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6229     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6230     return true;
6231   };
6232 
6233   SDValue Op1 = N->getOperand(1);
6234 
6235   // Collect all magic values from the build vector.
6236   if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6237     return SDValue();
6238 
6239   SDValue Shift, Factor;
6240   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6241     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6242     Factor = DAG.getBuildVector(VT, dl, Factors);
6243   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6244     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6245            "Expected matchUnaryPredicate to return one element for scalable "
6246            "vectors");
6247     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6248     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6249   } else {
6250     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6251     Shift = Shifts[0];
6252     Factor = Factors[0];
6253   }
6254 
6255   SDValue Res = N->getOperand(0);
6256   if (UseSRL) {
6257     Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6258     Created.push_back(Res.getNode());
6259   }
6260 
6261   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6262 }
6263 
6264 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6265                               SelectionDAG &DAG,
6266                               SmallVectorImpl<SDNode *> &Created) const {
6267   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6268   if (isIntDivCheap(N->getValueType(0), Attr))
6269     return SDValue(N, 0); // Lower SDIV as SDIV
6270   return SDValue();
6271 }
6272 
6273 SDValue
6274 TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6275                               SelectionDAG &DAG,
6276                               SmallVectorImpl<SDNode *> &Created) const {
6277   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6278   if (isIntDivCheap(N->getValueType(0), Attr))
6279     return SDValue(N, 0); // Lower SREM as SREM
6280   return SDValue();
6281 }
6282 
6283 /// Build sdiv by power-of-2 with conditional move instructions
6284 /// Ref: "Hacker's Delight" by Henry Warren 10-1
6285 /// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6286 ///   bgez x, label
6287 ///   add x, x, 2**k-1
6288 /// label:
6289 ///   sra res, x, k
6290 ///   neg res, res (when the divisor is negative)
6291 SDValue TargetLowering::buildSDIVPow2WithCMov(
6292     SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6293     SmallVectorImpl<SDNode *> &Created) const {
6294   unsigned Lg2 = Divisor.countr_zero();
6295   EVT VT = N->getValueType(0);
6296 
6297   SDLoc DL(N);
6298   SDValue N0 = N->getOperand(0);
6299   SDValue Zero = DAG.getConstant(0, DL, VT);
6300   APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6301   SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6302 
6303   // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6304   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6305   SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6306   SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6307   SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6308 
6309   Created.push_back(Cmp.getNode());
6310   Created.push_back(Add.getNode());
6311   Created.push_back(CMov.getNode());
6312 
6313   // Divide by pow2.
6314   SDValue SRA =
6315       DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6316 
6317   // If we're dividing by a positive value, we're done.  Otherwise, we must
6318   // negate the result.
6319   if (Divisor.isNonNegative())
6320     return SRA;
6321 
6322   Created.push_back(SRA.getNode());
6323   return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6324 }
6325 
6326 /// Given an ISD::SDIV node expressing a divide by constant,
6327 /// return a DAG expression to select that will generate the same value by
6328 /// multiplying by a magic number.
6329 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6330 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6331                                   bool IsAfterLegalization,
6332                                   bool IsAfterLegalTypes,
6333                                   SmallVectorImpl<SDNode *> &Created) const {
6334   SDLoc dl(N);
6335   EVT VT = N->getValueType(0);
6336   EVT SVT = VT.getScalarType();
6337   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6338   EVT ShSVT = ShVT.getScalarType();
6339   unsigned EltBits = VT.getScalarSizeInBits();
6340   EVT MulVT;
6341 
6342   // Check to see if we can do this.
6343   // FIXME: We should be more aggressive here.
6344   if (!isTypeLegal(VT)) {
6345     // Limit this to simple scalars for now.
6346     if (VT.isVector() || !VT.isSimple())
6347       return SDValue();
6348 
6349     // If this type will be promoted to a large enough type with a legal
6350     // multiply operation, we can go ahead and do this transform.
6351     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6352       return SDValue();
6353 
6354     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6355     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6356         !isOperationLegal(ISD::MUL, MulVT))
6357       return SDValue();
6358   }
6359 
6360   // If the sdiv has an 'exact' bit we can use a simpler lowering.
6361   if (N->getFlags().hasExact())
6362     return BuildExactSDIV(*this, N, dl, DAG, Created);
6363 
6364   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6365 
6366   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6367     if (C->isZero())
6368       return false;
6369 
6370     const APInt &Divisor = C->getAPIntValue();
6371     SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
6372     int NumeratorFactor = 0;
6373     int ShiftMask = -1;
6374 
6375     if (Divisor.isOne() || Divisor.isAllOnes()) {
6376       // If d is +1/-1, we just multiply the numerator by +1/-1.
6377       NumeratorFactor = Divisor.getSExtValue();
6378       magics.Magic = 0;
6379       magics.ShiftAmount = 0;
6380       ShiftMask = 0;
6381     } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6382       // If d > 0 and m < 0, add the numerator.
6383       NumeratorFactor = 1;
6384     } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6385       // If d < 0 and m > 0, subtract the numerator.
6386       NumeratorFactor = -1;
6387     }
6388 
6389     MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6390     Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6391     Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6392     ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6393     return true;
6394   };
6395 
6396   SDValue N0 = N->getOperand(0);
6397   SDValue N1 = N->getOperand(1);
6398 
6399   // Collect the shifts / magic values from each element.
6400   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6401     return SDValue();
6402 
6403   SDValue MagicFactor, Factor, Shift, ShiftMask;
6404   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6405     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6406     Factor = DAG.getBuildVector(VT, dl, Factors);
6407     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6408     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6409   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6410     assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6411            Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6412            "Expected matchUnaryPredicate to return one element for scalable "
6413            "vectors");
6414     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6415     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6416     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6417     ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6418   } else {
6419     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6420     MagicFactor = MagicFactors[0];
6421     Factor = Factors[0];
6422     Shift = Shifts[0];
6423     ShiftMask = ShiftMasks[0];
6424   }
6425 
6426   // Multiply the numerator (operand 0) by the magic value.
6427   // FIXME: We should support doing a MUL in a wider type.
6428   auto GetMULHS = [&](SDValue X, SDValue Y) {
6429     // If the type isn't legal, use a wider mul of the type calculated
6430     // earlier.
6431     if (!isTypeLegal(VT)) {
6432       X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6433       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6434       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6435       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6436                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6437       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6438     }
6439 
6440     if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6441       return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6442     if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6443       SDValue LoHi =
6444           DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6445       return SDValue(LoHi.getNode(), 1);
6446     }
6447     // If type twice as wide legal, widen and use a mul plus a shift.
6448     unsigned Size = VT.getScalarSizeInBits();
6449     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6450     if (VT.isVector())
6451       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6452                                 VT.getVectorElementCount());
6453     // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6454     // custom lowered. This is very expensive so avoid it at all costs for
6455     // constant divisors.
6456     if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6457          isOperationCustom(ISD::SDIVREM, VT.getScalarType())) ||
6458         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6459       X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6460       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6461       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6462       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6463                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6464       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6465     }
6466     return SDValue();
6467   };
6468 
6469   SDValue Q = GetMULHS(N0, MagicFactor);
6470   if (!Q)
6471     return SDValue();
6472 
6473   Created.push_back(Q.getNode());
6474 
6475   // (Optionally) Add/subtract the numerator using Factor.
6476   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6477   Created.push_back(Factor.getNode());
6478   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6479   Created.push_back(Q.getNode());
6480 
6481   // Shift right algebraic by shift value.
6482   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6483   Created.push_back(Q.getNode());
6484 
6485   // Extract the sign bit, mask it and add it to the quotient.
6486   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6487   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6488   Created.push_back(T.getNode());
6489   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6490   Created.push_back(T.getNode());
6491   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6492 }
6493 
6494 /// Given an ISD::UDIV node expressing a divide by constant,
6495 /// return a DAG expression to select that will generate the same value by
6496 /// multiplying by a magic number.
6497 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6498 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6499                                   bool IsAfterLegalization,
6500                                   bool IsAfterLegalTypes,
6501                                   SmallVectorImpl<SDNode *> &Created) const {
6502   SDLoc dl(N);
6503   EVT VT = N->getValueType(0);
6504   EVT SVT = VT.getScalarType();
6505   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6506   EVT ShSVT = ShVT.getScalarType();
6507   unsigned EltBits = VT.getScalarSizeInBits();
6508   EVT MulVT;
6509 
6510   // Check to see if we can do this.
6511   // FIXME: We should be more aggressive here.
6512   if (!isTypeLegal(VT)) {
6513     // Limit this to simple scalars for now.
6514     if (VT.isVector() || !VT.isSimple())
6515       return SDValue();
6516 
6517     // If this type will be promoted to a large enough type with a legal
6518     // multiply operation, we can go ahead and do this transform.
6519     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6520       return SDValue();
6521 
6522     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6523     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6524         !isOperationLegal(ISD::MUL, MulVT))
6525       return SDValue();
6526   }
6527 
6528   // If the udiv has an 'exact' bit we can use a simpler lowering.
6529   if (N->getFlags().hasExact())
6530     return BuildExactUDIV(*this, N, dl, DAG, Created);
6531 
6532   SDValue N0 = N->getOperand(0);
6533   SDValue N1 = N->getOperand(1);
6534 
6535   // Try to use leading zeros of the dividend to reduce the multiplier and
6536   // avoid expensive fixups.
6537   unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6538 
6539   bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6540   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6541 
6542   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6543     if (C->isZero())
6544       return false;
6545     const APInt& Divisor = C->getAPIntValue();
6546 
6547     SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6548 
6549     // Magic algorithm doesn't work for division by 1. We need to emit a select
6550     // at the end.
6551     if (Divisor.isOne()) {
6552       PreShift = PostShift = DAG.getUNDEF(ShSVT);
6553       MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6554     } else {
6555       UnsignedDivisionByConstantInfo magics =
6556           UnsignedDivisionByConstantInfo::get(
6557               Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6558 
6559       MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6560 
6561       assert(magics.PreShift < Divisor.getBitWidth() &&
6562              "We shouldn't generate an undefined shift!");
6563       assert(magics.PostShift < Divisor.getBitWidth() &&
6564              "We shouldn't generate an undefined shift!");
6565       assert((!magics.IsAdd || magics.PreShift == 0) &&
6566              "Unexpected pre-shift");
6567       PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6568       PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6569       NPQFactor = DAG.getConstant(
6570           magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6571                        : APInt::getZero(EltBits),
6572           dl, SVT);
6573       UseNPQ |= magics.IsAdd;
6574       UsePreShift |= magics.PreShift != 0;
6575       UsePostShift |= magics.PostShift != 0;
6576     }
6577 
6578     PreShifts.push_back(PreShift);
6579     MagicFactors.push_back(MagicFactor);
6580     NPQFactors.push_back(NPQFactor);
6581     PostShifts.push_back(PostShift);
6582     return true;
6583   };
6584 
6585   // Collect the shifts/magic values from each element.
6586   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6587     return SDValue();
6588 
6589   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6590   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6591     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6592     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6593     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6594     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6595   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6596     assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6597            NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6598            "Expected matchUnaryPredicate to return one for scalable vectors");
6599     PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6600     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6601     NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6602     PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6603   } else {
6604     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6605     PreShift = PreShifts[0];
6606     MagicFactor = MagicFactors[0];
6607     PostShift = PostShifts[0];
6608   }
6609 
6610   SDValue Q = N0;
6611   if (UsePreShift) {
6612     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6613     Created.push_back(Q.getNode());
6614   }
6615 
6616   // FIXME: We should support doing a MUL in a wider type.
6617   auto GetMULHU = [&](SDValue X, SDValue Y) {
6618     // If the type isn't legal, use a wider mul of the type calculated
6619     // earlier.
6620     if (!isTypeLegal(VT)) {
6621       X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6622       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6623       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6624       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6625                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6626       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6627     }
6628 
6629     if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6630       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6631     if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6632       SDValue LoHi =
6633           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6634       return SDValue(LoHi.getNode(), 1);
6635     }
6636     // If type twice as wide legal, widen and use a mul plus a shift.
6637     unsigned Size = VT.getScalarSizeInBits();
6638     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6639     if (VT.isVector())
6640       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6641                                 VT.getVectorElementCount());
6642     // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6643     // custom lowered. This is very expensive so avoid it at all costs for
6644     // constant divisors.
6645     if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6646          isOperationCustom(ISD::UDIVREM, VT.getScalarType())) ||
6647         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6648       X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6649       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6650       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6651       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6652                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6653       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6654     }
6655     return SDValue(); // No mulhu or equivalent
6656   };
6657 
6658   // Multiply the numerator (operand 0) by the magic value.
6659   Q = GetMULHU(Q, MagicFactor);
6660   if (!Q)
6661     return SDValue();
6662 
6663   Created.push_back(Q.getNode());
6664 
6665   if (UseNPQ) {
6666     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6667     Created.push_back(NPQ.getNode());
6668 
6669     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6670     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6671     if (VT.isVector())
6672       NPQ = GetMULHU(NPQ, NPQFactor);
6673     else
6674       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6675 
6676     Created.push_back(NPQ.getNode());
6677 
6678     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6679     Created.push_back(Q.getNode());
6680   }
6681 
6682   if (UsePostShift) {
6683     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6684     Created.push_back(Q.getNode());
6685   }
6686 
6687   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6688 
6689   SDValue One = DAG.getConstant(1, dl, VT);
6690   SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6691   return DAG.getSelect(dl, VT, IsOne, N0, Q);
6692 }
6693 
6694 /// If all values in Values that *don't* match the predicate are same 'splat'
6695 /// value, then replace all values with that splat value.
6696 /// Else, if AlternativeReplacement was provided, then replace all values that
6697 /// do match predicate with AlternativeReplacement value.
6698 static void
6699 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6700                           std::function<bool(SDValue)> Predicate,
6701                           SDValue AlternativeReplacement = SDValue()) {
6702   SDValue Replacement;
6703   // Is there a value for which the Predicate does *NOT* match? What is it?
6704   auto SplatValue = llvm::find_if_not(Values, Predicate);
6705   if (SplatValue != Values.end()) {
6706     // Does Values consist only of SplatValue's and values matching Predicate?
6707     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6708           return Value == *SplatValue || Predicate(Value);
6709         })) // Then we shall replace values matching predicate with SplatValue.
6710       Replacement = *SplatValue;
6711   }
6712   if (!Replacement) {
6713     // Oops, we did not find the "baseline" splat value.
6714     if (!AlternativeReplacement)
6715       return; // Nothing to do.
6716     // Let's replace with provided value then.
6717     Replacement = AlternativeReplacement;
6718   }
6719   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6720 }
6721 
6722 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6723 /// where the divisor is constant and the comparison target is zero,
6724 /// return a DAG expression that will generate the same comparison result
6725 /// using only multiplications, additions and shifts/rotations.
6726 /// Ref: "Hacker's Delight" 10-17.
6727 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6728                                         SDValue CompTargetNode,
6729                                         ISD::CondCode Cond,
6730                                         DAGCombinerInfo &DCI,
6731                                         const SDLoc &DL) const {
6732   SmallVector<SDNode *, 5> Built;
6733   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6734                                          DCI, DL, Built)) {
6735     for (SDNode *N : Built)
6736       DCI.AddToWorklist(N);
6737     return Folded;
6738   }
6739 
6740   return SDValue();
6741 }
6742 
6743 SDValue
6744 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6745                                   SDValue CompTargetNode, ISD::CondCode Cond,
6746                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6747                                   SmallVectorImpl<SDNode *> &Created) const {
6748   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6749   // - D must be constant, with D = D0 * 2^K where D0 is odd
6750   // - P is the multiplicative inverse of D0 modulo 2^W
6751   // - Q = floor(((2^W) - 1) / D)
6752   // where W is the width of the common type of N and D.
6753   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6754          "Only applicable for (in)equality comparisons.");
6755 
6756   SelectionDAG &DAG = DCI.DAG;
6757 
6758   EVT VT = REMNode.getValueType();
6759   EVT SVT = VT.getScalarType();
6760   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6761   EVT ShSVT = ShVT.getScalarType();
6762 
6763   // If MUL is unavailable, we cannot proceed in any case.
6764   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6765     return SDValue();
6766 
6767   bool ComparingWithAllZeros = true;
6768   bool AllComparisonsWithNonZerosAreTautological = true;
6769   bool HadTautologicalLanes = false;
6770   bool AllLanesAreTautological = true;
6771   bool HadEvenDivisor = false;
6772   bool AllDivisorsArePowerOfTwo = true;
6773   bool HadTautologicalInvertedLanes = false;
6774   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6775 
6776   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6777     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6778     if (CDiv->isZero())
6779       return false;
6780 
6781     const APInt &D = CDiv->getAPIntValue();
6782     const APInt &Cmp = CCmp->getAPIntValue();
6783 
6784     ComparingWithAllZeros &= Cmp.isZero();
6785 
6786     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6787     // if C2 is not less than C1, the comparison is always false.
6788     // But we will only be able to produce the comparison that will give the
6789     // opposive tautological answer. So this lane would need to be fixed up.
6790     bool TautologicalInvertedLane = D.ule(Cmp);
6791     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6792 
6793     // If all lanes are tautological (either all divisors are ones, or divisor
6794     // is not greater than the constant we are comparing with),
6795     // we will prefer to avoid the fold.
6796     bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6797     HadTautologicalLanes |= TautologicalLane;
6798     AllLanesAreTautological &= TautologicalLane;
6799 
6800     // If we are comparing with non-zero, we need'll need  to subtract said
6801     // comparison value from the LHS. But there is no point in doing that if
6802     // every lane where we are comparing with non-zero is tautological..
6803     if (!Cmp.isZero())
6804       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6805 
6806     // Decompose D into D0 * 2^K
6807     unsigned K = D.countr_zero();
6808     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6809     APInt D0 = D.lshr(K);
6810 
6811     // D is even if it has trailing zeros.
6812     HadEvenDivisor |= (K != 0);
6813     // D is a power-of-two if D0 is one.
6814     // If all divisors are power-of-two, we will prefer to avoid the fold.
6815     AllDivisorsArePowerOfTwo &= D0.isOne();
6816 
6817     // P = inv(D0, 2^W)
6818     // 2^W requires W + 1 bits, so we have to extend and then truncate.
6819     unsigned W = D.getBitWidth();
6820     APInt P = D0.multiplicativeInverse();
6821     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6822 
6823     // Q = floor((2^W - 1) u/ D)
6824     // R = ((2^W - 1) u% D)
6825     APInt Q, R;
6826     APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6827 
6828     // If we are comparing with zero, then that comparison constant is okay,
6829     // else it may need to be one less than that.
6830     if (Cmp.ugt(R))
6831       Q -= 1;
6832 
6833     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6834            "We are expecting that K is always less than all-ones for ShSVT");
6835 
6836     // If the lane is tautological the result can be constant-folded.
6837     if (TautologicalLane) {
6838       // Set P and K amount to a bogus values so we can try to splat them.
6839       P = 0;
6840       K = -1;
6841       // And ensure that comparison constant is tautological,
6842       // it will always compare true/false.
6843       Q = -1;
6844     }
6845 
6846     PAmts.push_back(DAG.getConstant(P, DL, SVT));
6847     KAmts.push_back(
6848         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
6849                               /*implicitTrunc=*/true),
6850                         DL, ShSVT));
6851     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6852     return true;
6853   };
6854 
6855   SDValue N = REMNode.getOperand(0);
6856   SDValue D = REMNode.getOperand(1);
6857 
6858   // Collect the values from each element.
6859   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6860     return SDValue();
6861 
6862   // If all lanes are tautological, the result can be constant-folded.
6863   if (AllLanesAreTautological)
6864     return SDValue();
6865 
6866   // If this is a urem by a powers-of-two, avoid the fold since it can be
6867   // best implemented as a bit test.
6868   if (AllDivisorsArePowerOfTwo)
6869     return SDValue();
6870 
6871   SDValue PVal, KVal, QVal;
6872   if (D.getOpcode() == ISD::BUILD_VECTOR) {
6873     if (HadTautologicalLanes) {
6874       // Try to turn PAmts into a splat, since we don't care about the values
6875       // that are currently '0'. If we can't, just keep '0'`s.
6876       turnVectorIntoSplatVector(PAmts, isNullConstant);
6877       // Try to turn KAmts into a splat, since we don't care about the values
6878       // that are currently '-1'. If we can't, change them to '0'`s.
6879       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6880                                 DAG.getConstant(0, DL, ShSVT));
6881     }
6882 
6883     PVal = DAG.getBuildVector(VT, DL, PAmts);
6884     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6885     QVal = DAG.getBuildVector(VT, DL, QAmts);
6886   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6887     assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6888            "Expected matchBinaryPredicate to return one element for "
6889            "SPLAT_VECTORs");
6890     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6891     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6892     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6893   } else {
6894     PVal = PAmts[0];
6895     KVal = KAmts[0];
6896     QVal = QAmts[0];
6897   }
6898 
6899   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6900     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6901       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6902     assert(CompTargetNode.getValueType() == N.getValueType() &&
6903            "Expecting that the types on LHS and RHS of comparisons match.");
6904     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6905   }
6906 
6907   // (mul N, P)
6908   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6909   Created.push_back(Op0.getNode());
6910 
6911   // Rotate right only if any divisor was even. We avoid rotates for all-odd
6912   // divisors as a performance improvement, since rotating by 0 is a no-op.
6913   if (HadEvenDivisor) {
6914     // We need ROTR to do this.
6915     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6916       return SDValue();
6917     // UREM: (rotr (mul N, P), K)
6918     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6919     Created.push_back(Op0.getNode());
6920   }
6921 
6922   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6923   SDValue NewCC =
6924       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6925                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6926   if (!HadTautologicalInvertedLanes)
6927     return NewCC;
6928 
6929   // If any lanes previously compared always-false, the NewCC will give
6930   // always-true result for them, so we need to fixup those lanes.
6931   // Or the other way around for inequality predicate.
6932   assert(VT.isVector() && "Can/should only get here for vectors.");
6933   Created.push_back(NewCC.getNode());
6934 
6935   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6936   // if C2 is not less than C1, the comparison is always false.
6937   // But we have produced the comparison that will give the
6938   // opposive tautological answer. So these lanes would need to be fixed up.
6939   SDValue TautologicalInvertedChannels =
6940       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6941   Created.push_back(TautologicalInvertedChannels.getNode());
6942 
6943   // NOTE: we avoid letting illegal types through even if we're before legalize
6944   // ops – legalization has a hard time producing good code for this.
6945   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6946     // If we have a vector select, let's replace the comparison results in the
6947     // affected lanes with the correct tautological result.
6948     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6949                                               DL, SETCCVT, SETCCVT);
6950     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6951                        Replacement, NewCC);
6952   }
6953 
6954   // Else, we can just invert the comparison result in the appropriate lanes.
6955   //
6956   // NOTE: see the note above VSELECT above.
6957   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6958     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6959                        TautologicalInvertedChannels);
6960 
6961   return SDValue(); // Don't know how to lower.
6962 }
6963 
6964 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6965 /// where the divisor is constant and the comparison target is zero,
6966 /// return a DAG expression that will generate the same comparison result
6967 /// using only multiplications, additions and shifts/rotations.
6968 /// Ref: "Hacker's Delight" 10-17.
6969 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6970                                         SDValue CompTargetNode,
6971                                         ISD::CondCode Cond,
6972                                         DAGCombinerInfo &DCI,
6973                                         const SDLoc &DL) const {
6974   SmallVector<SDNode *, 7> Built;
6975   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6976                                          DCI, DL, Built)) {
6977     assert(Built.size() <= 7 && "Max size prediction failed.");
6978     for (SDNode *N : Built)
6979       DCI.AddToWorklist(N);
6980     return Folded;
6981   }
6982 
6983   return SDValue();
6984 }
6985 
6986 SDValue
6987 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6988                                   SDValue CompTargetNode, ISD::CondCode Cond,
6989                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6990                                   SmallVectorImpl<SDNode *> &Created) const {
6991   // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6992   // Fold:
6993   //   (seteq/ne (srem N, D), 0)
6994   // To:
6995   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
6996   //
6997   // - D must be constant, with D = D0 * 2^K where D0 is odd
6998   // - P is the multiplicative inverse of D0 modulo 2^W
6999   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7000   // - Q = floor((2 * A) / (2^K))
7001   // where W is the width of the common type of N and D.
7002   //
7003   // When D is a power of two (and thus D0 is 1), the normal
7004   // formula for A and Q don't apply, because the derivation
7005   // depends on D not dividing 2^(W-1), and thus theorem ZRS
7006   // does not apply. This specifically fails when N = INT_MIN.
7007   //
7008   // Instead, for power-of-two D, we use:
7009   // - A = 2^(W-1)
7010   // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7011   // - Q = 2^(W-K) - 1
7012   // |-> Test that the top K bits are zero after rotation
7013   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7014          "Only applicable for (in)equality comparisons.");
7015 
7016   SelectionDAG &DAG = DCI.DAG;
7017 
7018   EVT VT = REMNode.getValueType();
7019   EVT SVT = VT.getScalarType();
7020   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7021   EVT ShSVT = ShVT.getScalarType();
7022 
7023   // If we are after ops legalization, and MUL is unavailable, we can not
7024   // proceed.
7025   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7026     return SDValue();
7027 
7028   // TODO: Could support comparing with non-zero too.
7029   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7030   if (!CompTarget || !CompTarget->isZero())
7031     return SDValue();
7032 
7033   bool HadIntMinDivisor = false;
7034   bool HadOneDivisor = false;
7035   bool AllDivisorsAreOnes = true;
7036   bool HadEvenDivisor = false;
7037   bool NeedToApplyOffset = false;
7038   bool AllDivisorsArePowerOfTwo = true;
7039   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7040 
7041   auto BuildSREMPattern = [&](ConstantSDNode *C) {
7042     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7043     if (C->isZero())
7044       return false;
7045 
7046     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7047 
7048     // WARNING: this fold is only valid for positive divisors!
7049     APInt D = C->getAPIntValue();
7050     if (D.isNegative())
7051       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
7052 
7053     HadIntMinDivisor |= D.isMinSignedValue();
7054 
7055     // If all divisors are ones, we will prefer to avoid the fold.
7056     HadOneDivisor |= D.isOne();
7057     AllDivisorsAreOnes &= D.isOne();
7058 
7059     // Decompose D into D0 * 2^K
7060     unsigned K = D.countr_zero();
7061     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7062     APInt D0 = D.lshr(K);
7063 
7064     if (!D.isMinSignedValue()) {
7065       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7066       // we don't care about this lane in this fold, we'll special-handle it.
7067       HadEvenDivisor |= (K != 0);
7068     }
7069 
7070     // D is a power-of-two if D0 is one. This includes INT_MIN.
7071     // If all divisors are power-of-two, we will prefer to avoid the fold.
7072     AllDivisorsArePowerOfTwo &= D0.isOne();
7073 
7074     // P = inv(D0, 2^W)
7075     // 2^W requires W + 1 bits, so we have to extend and then truncate.
7076     unsigned W = D.getBitWidth();
7077     APInt P = D0.multiplicativeInverse();
7078     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7079 
7080     // A = floor((2^(W - 1) - 1) / D0) & -2^K
7081     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7082     A.clearLowBits(K);
7083 
7084     if (!D.isMinSignedValue()) {
7085       // If divisor INT_MIN, then we don't care about this lane in this fold,
7086       // we'll special-handle it.
7087       NeedToApplyOffset |= A != 0;
7088     }
7089 
7090     // Q = floor((2 * A) / (2^K))
7091     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7092 
7093     assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7094            "We are expecting that A is always less than all-ones for SVT");
7095     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7096            "We are expecting that K is always less than all-ones for ShSVT");
7097 
7098     // If D was a power of two, apply the alternate constant derivation.
7099     if (D0.isOne()) {
7100       // A = 2^(W-1)
7101       A = APInt::getSignedMinValue(W);
7102       // - Q = 2^(W-K) - 1
7103       Q = APInt::getAllOnes(W - K).zext(W);
7104     }
7105 
7106     // If the divisor is 1 the result can be constant-folded. Likewise, we
7107     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7108     if (D.isOne()) {
7109       // Set P, A and K to a bogus values so we can try to splat them.
7110       P = 0;
7111       A = -1;
7112       K = -1;
7113 
7114       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
7115       Q = -1;
7116     }
7117 
7118     PAmts.push_back(DAG.getConstant(P, DL, SVT));
7119     AAmts.push_back(DAG.getConstant(A, DL, SVT));
7120     KAmts.push_back(
7121         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7122                               /*implicitTrunc=*/true),
7123                         DL, ShSVT));
7124     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7125     return true;
7126   };
7127 
7128   SDValue N = REMNode.getOperand(0);
7129   SDValue D = REMNode.getOperand(1);
7130 
7131   // Collect the values from each element.
7132   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7133     return SDValue();
7134 
7135   // If this is a srem by a one, avoid the fold since it can be constant-folded.
7136   if (AllDivisorsAreOnes)
7137     return SDValue();
7138 
7139   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7140   // since it can be best implemented as a bit test.
7141   if (AllDivisorsArePowerOfTwo)
7142     return SDValue();
7143 
7144   SDValue PVal, AVal, KVal, QVal;
7145   if (D.getOpcode() == ISD::BUILD_VECTOR) {
7146     if (HadOneDivisor) {
7147       // Try to turn PAmts into a splat, since we don't care about the values
7148       // that are currently '0'. If we can't, just keep '0'`s.
7149       turnVectorIntoSplatVector(PAmts, isNullConstant);
7150       // Try to turn AAmts into a splat, since we don't care about the
7151       // values that are currently '-1'. If we can't, change them to '0'`s.
7152       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
7153                                 DAG.getConstant(0, DL, SVT));
7154       // Try to turn KAmts into a splat, since we don't care about the values
7155       // that are currently '-1'. If we can't, change them to '0'`s.
7156       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
7157                                 DAG.getConstant(0, DL, ShSVT));
7158     }
7159 
7160     PVal = DAG.getBuildVector(VT, DL, PAmts);
7161     AVal = DAG.getBuildVector(VT, DL, AAmts);
7162     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7163     QVal = DAG.getBuildVector(VT, DL, QAmts);
7164   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7165     assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7166            QAmts.size() == 1 &&
7167            "Expected matchUnaryPredicate to return one element for scalable "
7168            "vectors");
7169     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7170     AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7171     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7172     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7173   } else {
7174     assert(isa<ConstantSDNode>(D) && "Expected a constant");
7175     PVal = PAmts[0];
7176     AVal = AAmts[0];
7177     KVal = KAmts[0];
7178     QVal = QAmts[0];
7179   }
7180 
7181   // (mul N, P)
7182   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7183   Created.push_back(Op0.getNode());
7184 
7185   if (NeedToApplyOffset) {
7186     // We need ADD to do this.
7187     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7188       return SDValue();
7189 
7190     // (add (mul N, P), A)
7191     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7192     Created.push_back(Op0.getNode());
7193   }
7194 
7195   // Rotate right only if any divisor was even. We avoid rotates for all-odd
7196   // divisors as a performance improvement, since rotating by 0 is a no-op.
7197   if (HadEvenDivisor) {
7198     // We need ROTR to do this.
7199     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7200       return SDValue();
7201     // SREM: (rotr (add (mul N, P), A), K)
7202     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7203     Created.push_back(Op0.getNode());
7204   }
7205 
7206   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7207   SDValue Fold =
7208       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7209                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7210 
7211   // If we didn't have lanes with INT_MIN divisor, then we're done.
7212   if (!HadIntMinDivisor)
7213     return Fold;
7214 
7215   // That fold is only valid for positive divisors. Which effectively means,
7216   // it is invalid for INT_MIN divisors. So if we have such a lane,
7217   // we must fix-up results for said lanes.
7218   assert(VT.isVector() && "Can/should only get here for vectors.");
7219 
7220   // NOTE: we avoid letting illegal types through even if we're before legalize
7221   // ops – legalization has a hard time producing good code for the code that
7222   // follows.
7223   if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7224       !isOperationLegalOrCustom(ISD::AND, VT) ||
7225       !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
7226       !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
7227     return SDValue();
7228 
7229   Created.push_back(Fold.getNode());
7230 
7231   SDValue IntMin = DAG.getConstant(
7232       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
7233   SDValue IntMax = DAG.getConstant(
7234       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
7235   SDValue Zero =
7236       DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
7237 
7238   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7239   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7240   Created.push_back(DivisorIsIntMin.getNode());
7241 
7242   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
7243   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7244   Created.push_back(Masked.getNode());
7245   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7246   Created.push_back(MaskedIsZero.getNode());
7247 
7248   // To produce final result we need to blend 2 vectors: 'SetCC' and
7249   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7250   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7251   // constant-folded, select can get lowered to a shuffle with constant mask.
7252   SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7253                                 MaskedIsZero, Fold);
7254 
7255   return Blended;
7256 }
7257 
7258 bool TargetLowering::
7259 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7260   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7261     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7262                                 "be a constant integer");
7263     return true;
7264   }
7265 
7266   return false;
7267 }
7268 
7269 SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7270                                          const DenormalMode &Mode) const {
7271   SDLoc DL(Op);
7272   EVT VT = Op.getValueType();
7273   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7274   SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7275 
7276   // This is specifically a check for the handling of denormal inputs, not the
7277   // result.
7278   if (Mode.Input == DenormalMode::PreserveSign ||
7279       Mode.Input == DenormalMode::PositiveZero) {
7280     // Test = X == 0.0
7281     return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7282   }
7283 
7284   // Testing it with denormal inputs to avoid wrong estimate.
7285   //
7286   // Test = fabs(X) < SmallestNormal
7287   const fltSemantics &FltSem = VT.getFltSemantics();
7288   APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7289   SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7290   SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7291   return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7292 }
7293 
7294 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7295                                              bool LegalOps, bool OptForSize,
7296                                              NegatibleCost &Cost,
7297                                              unsigned Depth) const {
7298   // fneg is removable even if it has multiple uses.
7299   if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7300     Cost = NegatibleCost::Cheaper;
7301     return Op.getOperand(0);
7302   }
7303 
7304   // Don't recurse exponentially.
7305   if (Depth > SelectionDAG::MaxRecursionDepth)
7306     return SDValue();
7307 
7308   // Pre-increment recursion depth for use in recursive calls.
7309   ++Depth;
7310   const SDNodeFlags Flags = Op->getFlags();
7311   const TargetOptions &Options = DAG.getTarget().Options;
7312   EVT VT = Op.getValueType();
7313   unsigned Opcode = Op.getOpcode();
7314 
7315   // Don't allow anything with multiple uses unless we know it is free.
7316   if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7317     bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7318                         isFPExtFree(VT, Op.getOperand(0).getValueType());
7319     if (!IsFreeExtend)
7320       return SDValue();
7321   }
7322 
7323   auto RemoveDeadNode = [&](SDValue N) {
7324     if (N && N.getNode()->use_empty())
7325       DAG.RemoveDeadNode(N.getNode());
7326   };
7327 
7328   SDLoc DL(Op);
7329 
7330   // Because getNegatedExpression can delete nodes we need a handle to keep
7331   // temporary nodes alive in case the recursion manages to create an identical
7332   // node.
7333   std::list<HandleSDNode> Handles;
7334 
7335   switch (Opcode) {
7336   case ISD::ConstantFP: {
7337     // Don't invert constant FP values after legalization unless the target says
7338     // the negated constant is legal.
7339     bool IsOpLegal =
7340         isOperationLegal(ISD::ConstantFP, VT) ||
7341         isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7342                      OptForSize);
7343 
7344     if (LegalOps && !IsOpLegal)
7345       break;
7346 
7347     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7348     V.changeSign();
7349     SDValue CFP = DAG.getConstantFP(V, DL, VT);
7350 
7351     // If we already have the use of the negated floating constant, it is free
7352     // to negate it even it has multiple uses.
7353     if (!Op.hasOneUse() && CFP.use_empty())
7354       break;
7355     Cost = NegatibleCost::Neutral;
7356     return CFP;
7357   }
7358   case ISD::BUILD_VECTOR: {
7359     // Only permit BUILD_VECTOR of constants.
7360     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7361           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7362         }))
7363       break;
7364 
7365     bool IsOpLegal =
7366         (isOperationLegal(ISD::ConstantFP, VT) &&
7367          isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
7368         llvm::all_of(Op->op_values(), [&](SDValue N) {
7369           return N.isUndef() ||
7370                  isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7371                               OptForSize);
7372         });
7373 
7374     if (LegalOps && !IsOpLegal)
7375       break;
7376 
7377     SmallVector<SDValue, 4> Ops;
7378     for (SDValue C : Op->op_values()) {
7379       if (C.isUndef()) {
7380         Ops.push_back(C);
7381         continue;
7382       }
7383       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7384       V.changeSign();
7385       Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7386     }
7387     Cost = NegatibleCost::Neutral;
7388     return DAG.getBuildVector(VT, DL, Ops);
7389   }
7390   case ISD::FADD: {
7391     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7392       break;
7393 
7394     // After operation legalization, it might not be legal to create new FSUBs.
7395     if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7396       break;
7397     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7398 
7399     // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7400     NegatibleCost CostX = NegatibleCost::Expensive;
7401     SDValue NegX =
7402         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7403     // Prevent this node from being deleted by the next call.
7404     if (NegX)
7405       Handles.emplace_back(NegX);
7406 
7407     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7408     NegatibleCost CostY = NegatibleCost::Expensive;
7409     SDValue NegY =
7410         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7411 
7412     // We're done with the handles.
7413     Handles.clear();
7414 
7415     // Negate the X if its cost is less or equal than Y.
7416     if (NegX && (CostX <= CostY)) {
7417       Cost = CostX;
7418       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7419       if (NegY != N)
7420         RemoveDeadNode(NegY);
7421       return N;
7422     }
7423 
7424     // Negate the Y if it is not expensive.
7425     if (NegY) {
7426       Cost = CostY;
7427       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7428       if (NegX != N)
7429         RemoveDeadNode(NegX);
7430       return N;
7431     }
7432     break;
7433   }
7434   case ISD::FSUB: {
7435     // We can't turn -(A-B) into B-A when we honor signed zeros.
7436     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7437       break;
7438 
7439     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7440     // fold (fneg (fsub 0, Y)) -> Y
7441     if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7442       if (C->isZero()) {
7443         Cost = NegatibleCost::Cheaper;
7444         return Y;
7445       }
7446 
7447     // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7448     Cost = NegatibleCost::Neutral;
7449     return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7450   }
7451   case ISD::FMUL:
7452   case ISD::FDIV: {
7453     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7454 
7455     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7456     NegatibleCost CostX = NegatibleCost::Expensive;
7457     SDValue NegX =
7458         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7459     // Prevent this node from being deleted by the next call.
7460     if (NegX)
7461       Handles.emplace_back(NegX);
7462 
7463     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7464     NegatibleCost CostY = NegatibleCost::Expensive;
7465     SDValue NegY =
7466         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7467 
7468     // We're done with the handles.
7469     Handles.clear();
7470 
7471     // Negate the X if its cost is less or equal than Y.
7472     if (NegX && (CostX <= CostY)) {
7473       Cost = CostX;
7474       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7475       if (NegY != N)
7476         RemoveDeadNode(NegY);
7477       return N;
7478     }
7479 
7480     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7481     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7482       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7483         break;
7484 
7485     // Negate the Y if it is not expensive.
7486     if (NegY) {
7487       Cost = CostY;
7488       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7489       if (NegX != N)
7490         RemoveDeadNode(NegX);
7491       return N;
7492     }
7493     break;
7494   }
7495   case ISD::FMA:
7496   case ISD::FMAD: {
7497     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7498       break;
7499 
7500     SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7501     NegatibleCost CostZ = NegatibleCost::Expensive;
7502     SDValue NegZ =
7503         getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7504     // Give up if fail to negate the Z.
7505     if (!NegZ)
7506       break;
7507 
7508     // Prevent this node from being deleted by the next two calls.
7509     Handles.emplace_back(NegZ);
7510 
7511     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7512     NegatibleCost CostX = NegatibleCost::Expensive;
7513     SDValue NegX =
7514         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7515     // Prevent this node from being deleted by the next call.
7516     if (NegX)
7517       Handles.emplace_back(NegX);
7518 
7519     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7520     NegatibleCost CostY = NegatibleCost::Expensive;
7521     SDValue NegY =
7522         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7523 
7524     // We're done with the handles.
7525     Handles.clear();
7526 
7527     // Negate the X if its cost is less or equal than Y.
7528     if (NegX && (CostX <= CostY)) {
7529       Cost = std::min(CostX, CostZ);
7530       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7531       if (NegY != N)
7532         RemoveDeadNode(NegY);
7533       return N;
7534     }
7535 
7536     // Negate the Y if it is not expensive.
7537     if (NegY) {
7538       Cost = std::min(CostY, CostZ);
7539       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7540       if (NegX != N)
7541         RemoveDeadNode(NegX);
7542       return N;
7543     }
7544     break;
7545   }
7546 
7547   case ISD::FP_EXTEND:
7548   case ISD::FSIN:
7549     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7550                                             OptForSize, Cost, Depth))
7551       return DAG.getNode(Opcode, DL, VT, NegV);
7552     break;
7553   case ISD::FP_ROUND:
7554     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7555                                             OptForSize, Cost, Depth))
7556       return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7557     break;
7558   case ISD::SELECT:
7559   case ISD::VSELECT: {
7560     // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7561     // iff at least one cost is cheaper and the other is neutral/cheaper
7562     SDValue LHS = Op.getOperand(1);
7563     NegatibleCost CostLHS = NegatibleCost::Expensive;
7564     SDValue NegLHS =
7565         getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7566     if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7567       RemoveDeadNode(NegLHS);
7568       break;
7569     }
7570 
7571     // Prevent this node from being deleted by the next call.
7572     Handles.emplace_back(NegLHS);
7573 
7574     SDValue RHS = Op.getOperand(2);
7575     NegatibleCost CostRHS = NegatibleCost::Expensive;
7576     SDValue NegRHS =
7577         getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7578 
7579     // We're done with the handles.
7580     Handles.clear();
7581 
7582     if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7583         (CostLHS != NegatibleCost::Cheaper &&
7584          CostRHS != NegatibleCost::Cheaper)) {
7585       RemoveDeadNode(NegLHS);
7586       RemoveDeadNode(NegRHS);
7587       break;
7588     }
7589 
7590     Cost = std::min(CostLHS, CostRHS);
7591     return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7592   }
7593   }
7594 
7595   return SDValue();
7596 }
7597 
7598 //===----------------------------------------------------------------------===//
7599 // Legalization Utilities
7600 //===----------------------------------------------------------------------===//
7601 
7602 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7603                                     SDValue LHS, SDValue RHS,
7604                                     SmallVectorImpl<SDValue> &Result,
7605                                     EVT HiLoVT, SelectionDAG &DAG,
7606                                     MulExpansionKind Kind, SDValue LL,
7607                                     SDValue LH, SDValue RL, SDValue RH) const {
7608   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7609          Opcode == ISD::SMUL_LOHI);
7610 
7611   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7612                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
7613   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7614                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
7615   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7616                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
7617   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7618                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
7619 
7620   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7621     return false;
7622 
7623   unsigned OuterBitSize = VT.getScalarSizeInBits();
7624   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7625 
7626   // LL, LH, RL, and RH must be either all NULL or all set to a value.
7627   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7628          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7629 
7630   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7631   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7632                           bool Signed) -> bool {
7633     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7634       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7635       Hi = SDValue(Lo.getNode(), 1);
7636       return true;
7637     }
7638     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7639       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7640       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7641       return true;
7642     }
7643     return false;
7644   };
7645 
7646   SDValue Lo, Hi;
7647 
7648   if (!LL.getNode() && !RL.getNode() &&
7649       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7650     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7651     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7652   }
7653 
7654   if (!LL.getNode())
7655     return false;
7656 
7657   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7658   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7659       DAG.MaskedValueIsZero(RHS, HighMask)) {
7660     // The inputs are both zero-extended.
7661     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7662       Result.push_back(Lo);
7663       Result.push_back(Hi);
7664       if (Opcode != ISD::MUL) {
7665         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7666         Result.push_back(Zero);
7667         Result.push_back(Zero);
7668       }
7669       return true;
7670     }
7671   }
7672 
7673   if (!VT.isVector() && Opcode == ISD::MUL &&
7674       DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7675       DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7676     // The input values are both sign-extended.
7677     // TODO non-MUL case?
7678     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7679       Result.push_back(Lo);
7680       Result.push_back(Hi);
7681       return true;
7682     }
7683   }
7684 
7685   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7686   SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7687 
7688   if (!LH.getNode() && !RH.getNode() &&
7689       isOperationLegalOrCustom(ISD::SRL, VT) &&
7690       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7691     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7692     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7693     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7694     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7695   }
7696 
7697   if (!LH.getNode())
7698     return false;
7699 
7700   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7701     return false;
7702 
7703   Result.push_back(Lo);
7704 
7705   if (Opcode == ISD::MUL) {
7706     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7707     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7708     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7709     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7710     Result.push_back(Hi);
7711     return true;
7712   }
7713 
7714   // Compute the full width result.
7715   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7716     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7717     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7718     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7719     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7720   };
7721 
7722   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7723   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7724     return false;
7725 
7726   // This is effectively the add part of a multiply-add of half-sized operands,
7727   // so it cannot overflow.
7728   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7729 
7730   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7731     return false;
7732 
7733   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7734   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7735 
7736   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7737                   isOperationLegalOrCustom(ISD::ADDE, VT));
7738   if (UseGlue)
7739     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7740                        Merge(Lo, Hi));
7741   else
7742     Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7743                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7744 
7745   SDValue Carry = Next.getValue(1);
7746   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7747   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7748 
7749   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7750     return false;
7751 
7752   if (UseGlue)
7753     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7754                      Carry);
7755   else
7756     Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7757                      Zero, Carry);
7758 
7759   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7760 
7761   if (Opcode == ISD::SMUL_LOHI) {
7762     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7763                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7764     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7765 
7766     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7767                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7768     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7769   }
7770 
7771   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7772   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7773   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7774   return true;
7775 }
7776 
7777 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7778                                SelectionDAG &DAG, MulExpansionKind Kind,
7779                                SDValue LL, SDValue LH, SDValue RL,
7780                                SDValue RH) const {
7781   SmallVector<SDValue, 2> Result;
7782   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7783                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7784                            DAG, Kind, LL, LH, RL, RH);
7785   if (Ok) {
7786     assert(Result.size() == 2);
7787     Lo = Result[0];
7788     Hi = Result[1];
7789   }
7790   return Ok;
7791 }
7792 
7793 // Optimize unsigned division or remainder by constants for types twice as large
7794 // as a legal VT.
7795 //
7796 // If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7797 // can be computed
7798 // as:
7799 //   Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7800 //   Remainder = Sum % Constant
7801 // This is based on "Remainder by Summing Digits" from Hacker's Delight.
7802 //
7803 // For division, we can compute the remainder using the algorithm described
7804 // above, subtract it from the dividend to get an exact multiple of Constant.
7805 // Then multiply that exact multiply by the multiplicative inverse modulo
7806 // (1 << (BitWidth / 2)) to get the quotient.
7807 
7808 // If Constant is even, we can shift right the dividend and the divisor by the
7809 // number of trailing zeros in Constant before applying the remainder algorithm.
7810 // If we're after the quotient, we can subtract this value from the shifted
7811 // dividend and multiply by the multiplicative inverse of the shifted divisor.
7812 // If we want the remainder, we shift the value left by the number of trailing
7813 // zeros and add the bits that were shifted out of the dividend.
7814 bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7815                                             SmallVectorImpl<SDValue> &Result,
7816                                             EVT HiLoVT, SelectionDAG &DAG,
7817                                             SDValue LL, SDValue LH) const {
7818   unsigned Opcode = N->getOpcode();
7819   EVT VT = N->getValueType(0);
7820 
7821   // TODO: Support signed division/remainder.
7822   if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7823     return false;
7824   assert(
7825       (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7826       "Unexpected opcode");
7827 
7828   auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7829   if (!CN)
7830     return false;
7831 
7832   APInt Divisor = CN->getAPIntValue();
7833   unsigned BitWidth = Divisor.getBitWidth();
7834   unsigned HBitWidth = BitWidth / 2;
7835   assert(VT.getScalarSizeInBits() == BitWidth &&
7836          HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7837 
7838   // Divisor needs to less than (1 << HBitWidth).
7839   APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7840   if (Divisor.uge(HalfMaxPlus1))
7841     return false;
7842 
7843   // We depend on the UREM by constant optimization in DAGCombiner that requires
7844   // high multiply.
7845   if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7846       !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
7847     return false;
7848 
7849   // Don't expand if optimizing for size.
7850   if (DAG.shouldOptForSize())
7851     return false;
7852 
7853   // Early out for 0 or 1 divisors.
7854   if (Divisor.ule(1))
7855     return false;
7856 
7857   // If the divisor is even, shift it until it becomes odd.
7858   unsigned TrailingZeros = 0;
7859   if (!Divisor[0]) {
7860     TrailingZeros = Divisor.countr_zero();
7861     Divisor.lshrInPlace(TrailingZeros);
7862   }
7863 
7864   SDLoc dl(N);
7865   SDValue Sum;
7866   SDValue PartialRem;
7867 
7868   // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7869   // then add in the carry.
7870   // TODO: If we can't split it in half, we might be able to split into 3 or
7871   // more pieces using a smaller bit width.
7872   if (HalfMaxPlus1.urem(Divisor).isOne()) {
7873     assert(!LL == !LH && "Expected both input halves or no input halves!");
7874     if (!LL)
7875       std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7876 
7877     // Shift the input by the number of TrailingZeros in the divisor. The
7878     // shifted out bits will be added to the remainder later.
7879     if (TrailingZeros) {
7880       // Save the shifted off bits if we need the remainder.
7881       if (Opcode != ISD::UDIV) {
7882         APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7883         PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7884                                  DAG.getConstant(Mask, dl, HiLoVT));
7885       }
7886 
7887       LL = DAG.getNode(
7888           ISD::OR, dl, HiLoVT,
7889           DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7890                       DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7891           DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7892                       DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7893                                                  HiLoVT, dl)));
7894       LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7895                        DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7896     }
7897 
7898     // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7899     EVT SetCCType =
7900         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7901     if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
7902       SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7903       Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7904       Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7905                         DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7906     } else {
7907       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7908       SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7909       // If the boolean for the target is 0 or 1, we can add the setcc result
7910       // directly.
7911       if (getBooleanContents(HiLoVT) ==
7912           TargetLoweringBase::ZeroOrOneBooleanContent)
7913         Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7914       else
7915         Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7916                               DAG.getConstant(0, dl, HiLoVT));
7917       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7918     }
7919   }
7920 
7921   // If we didn't find a sum, we can't do the expansion.
7922   if (!Sum)
7923     return false;
7924 
7925   // Perform a HiLoVT urem on the Sum using truncated divisor.
7926   SDValue RemL =
7927       DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7928                   DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7929   SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7930 
7931   if (Opcode != ISD::UREM) {
7932     // Subtract the remainder from the shifted dividend.
7933     SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7934     SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7935 
7936     Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7937 
7938     // Multiply by the multiplicative inverse of the divisor modulo
7939     // (1 << BitWidth).
7940     APInt MulFactor = Divisor.multiplicativeInverse();
7941 
7942     SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7943                                    DAG.getConstant(MulFactor, dl, VT));
7944 
7945     // Split the quotient into low and high parts.
7946     SDValue QuotL, QuotH;
7947     std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7948     Result.push_back(QuotL);
7949     Result.push_back(QuotH);
7950   }
7951 
7952   if (Opcode != ISD::UDIV) {
7953     // If we shifted the input, shift the remainder left and add the bits we
7954     // shifted off the input.
7955     if (TrailingZeros) {
7956       APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7957       RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7958                          DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7959       RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7960     }
7961     Result.push_back(RemL);
7962     Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7963   }
7964 
7965   return true;
7966 }
7967 
7968 // Check that (every element of) Z is undef or not an exact multiple of BW.
7969 static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7970   return ISD::matchUnaryPredicate(
7971       Z,
7972       [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7973       true);
7974 }
7975 
7976 static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7977   EVT VT = Node->getValueType(0);
7978   SDValue ShX, ShY;
7979   SDValue ShAmt, InvShAmt;
7980   SDValue X = Node->getOperand(0);
7981   SDValue Y = Node->getOperand(1);
7982   SDValue Z = Node->getOperand(2);
7983   SDValue Mask = Node->getOperand(3);
7984   SDValue VL = Node->getOperand(4);
7985 
7986   unsigned BW = VT.getScalarSizeInBits();
7987   bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7988   SDLoc DL(SDValue(Node, 0));
7989 
7990   EVT ShVT = Z.getValueType();
7991   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7992     // fshl: X << C | Y >> (BW - C)
7993     // fshr: X << (BW - C) | Y >> C
7994     // where C = Z % BW is not zero
7995     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7996     ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7997     InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7998     ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7999                       VL);
8000     ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8001                       VL);
8002   } else {
8003     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8004     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8005     SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8006     if (isPowerOf2_32(BW)) {
8007       // Z % BW -> Z & (BW - 1)
8008       ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8009       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8010       SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8011                                  DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8012       InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8013     } else {
8014       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8015       ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8016       InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8017     }
8018 
8019     SDValue One = DAG.getConstant(1, DL, ShVT);
8020     if (IsFSHL) {
8021       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8022       SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8023       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8024     } else {
8025       SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8026       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8027       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8028     }
8029   }
8030   return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8031 }
8032 
8033 SDValue TargetLowering::expandFunnelShift(SDNode *Node,
8034                                           SelectionDAG &DAG) const {
8035   if (Node->isVPOpcode())
8036     return expandVPFunnelShift(Node, DAG);
8037 
8038   EVT VT = Node->getValueType(0);
8039 
8040   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8041                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
8042                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
8043                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
8044     return SDValue();
8045 
8046   SDValue X = Node->getOperand(0);
8047   SDValue Y = Node->getOperand(1);
8048   SDValue Z = Node->getOperand(2);
8049 
8050   unsigned BW = VT.getScalarSizeInBits();
8051   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8052   SDLoc DL(SDValue(Node, 0));
8053 
8054   EVT ShVT = Z.getValueType();
8055 
8056   // If a funnel shift in the other direction is more supported, use it.
8057   unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8058   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8059       isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8060     if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8061       // fshl X, Y, Z -> fshr X, Y, -Z
8062       // fshr X, Y, Z -> fshl X, Y, -Z
8063       SDValue Zero = DAG.getConstant(0, DL, ShVT);
8064       Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8065     } else {
8066       // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8067       // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8068       SDValue One = DAG.getConstant(1, DL, ShVT);
8069       if (IsFSHL) {
8070         Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8071         X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8072       } else {
8073         X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8074         Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8075       }
8076       Z = DAG.getNOT(DL, Z, ShVT);
8077     }
8078     return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8079   }
8080 
8081   SDValue ShX, ShY;
8082   SDValue ShAmt, InvShAmt;
8083   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8084     // fshl: X << C | Y >> (BW - C)
8085     // fshr: X << (BW - C) | Y >> C
8086     // where C = Z % BW is not zero
8087     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8088     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8089     InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8090     ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8091     ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8092   } else {
8093     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8094     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8095     SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8096     if (isPowerOf2_32(BW)) {
8097       // Z % BW -> Z & (BW - 1)
8098       ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8099       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8100       InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8101     } else {
8102       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8103       ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8104       InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8105     }
8106 
8107     SDValue One = DAG.getConstant(1, DL, ShVT);
8108     if (IsFSHL) {
8109       ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8110       SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8111       ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8112     } else {
8113       SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8114       ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8115       ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8116     }
8117   }
8118   return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8119 }
8120 
8121 // TODO: Merge with expandFunnelShift.
8122 SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8123                                   SelectionDAG &DAG) const {
8124   EVT VT = Node->getValueType(0);
8125   unsigned EltSizeInBits = VT.getScalarSizeInBits();
8126   bool IsLeft = Node->getOpcode() == ISD::ROTL;
8127   SDValue Op0 = Node->getOperand(0);
8128   SDValue Op1 = Node->getOperand(1);
8129   SDLoc DL(SDValue(Node, 0));
8130 
8131   EVT ShVT = Op1.getValueType();
8132   SDValue Zero = DAG.getConstant(0, DL, ShVT);
8133 
8134   // If a rotate in the other direction is more supported, use it.
8135   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8136   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8137       isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8138     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8139     return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8140   }
8141 
8142   if (!AllowVectorOps && VT.isVector() &&
8143       (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8144        !isOperationLegalOrCustom(ISD::SRL, VT) ||
8145        !isOperationLegalOrCustom(ISD::SUB, VT) ||
8146        !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
8147        !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
8148     return SDValue();
8149 
8150   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8151   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8152   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8153   SDValue ShVal;
8154   SDValue HsVal;
8155   if (isPowerOf2_32(EltSizeInBits)) {
8156     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8157     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8158     SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8159     SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8160     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8161     SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8162     HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8163   } else {
8164     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8165     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8166     SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8167     SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8168     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8169     SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8170     SDValue One = DAG.getConstant(1, DL, ShVT);
8171     HsVal =
8172         DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8173   }
8174   return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8175 }
8176 
8177 void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8178                                       SelectionDAG &DAG) const {
8179   assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8180   EVT VT = Node->getValueType(0);
8181   unsigned VTBits = VT.getScalarSizeInBits();
8182   assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8183 
8184   bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8185   bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8186   SDValue ShOpLo = Node->getOperand(0);
8187   SDValue ShOpHi = Node->getOperand(1);
8188   SDValue ShAmt = Node->getOperand(2);
8189   EVT ShAmtVT = ShAmt.getValueType();
8190   EVT ShAmtCCVT =
8191       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8192   SDLoc dl(Node);
8193 
8194   // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8195   // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8196   // away during isel.
8197   SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8198                                   DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8199   SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8200                                      DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8201                        : DAG.getConstant(0, dl, VT);
8202 
8203   SDValue Tmp2, Tmp3;
8204   if (IsSHL) {
8205     Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8206     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8207   } else {
8208     Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8209     Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8210   }
8211 
8212   // If the shift amount is larger or equal than the width of a part we don't
8213   // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8214   // values for large shift amounts.
8215   SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8216                                 DAG.getConstant(VTBits, dl, ShAmtVT));
8217   SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8218                               DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8219 
8220   if (IsSHL) {
8221     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8222     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8223   } else {
8224     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8225     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8226   }
8227 }
8228 
8229 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8230                                       SelectionDAG &DAG) const {
8231   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8232   SDValue Src = Node->getOperand(OpNo);
8233   EVT SrcVT = Src.getValueType();
8234   EVT DstVT = Node->getValueType(0);
8235   SDLoc dl(SDValue(Node, 0));
8236 
8237   // FIXME: Only f32 to i64 conversions are supported.
8238   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8239     return false;
8240 
8241   if (Node->isStrictFPOpcode())
8242     // When a NaN is converted to an integer a trap is allowed. We can't
8243     // use this expansion here because it would eliminate that trap. Other
8244     // traps are also allowed and cannot be eliminated. See
8245     // IEEE 754-2008 sec 5.8.
8246     return false;
8247 
8248   // Expand f32 -> i64 conversion
8249   // This algorithm comes from compiler-rt's implementation of fixsfdi:
8250   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8251   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8252   EVT IntVT = SrcVT.changeTypeToInteger();
8253   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8254 
8255   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8256   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8257   SDValue Bias = DAG.getConstant(127, dl, IntVT);
8258   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8259   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8260   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8261 
8262   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8263 
8264   SDValue ExponentBits = DAG.getNode(
8265       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8266       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8267   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8268 
8269   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8270                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8271                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8272   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8273 
8274   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8275                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8276                           DAG.getConstant(0x00800000, dl, IntVT));
8277 
8278   R = DAG.getZExtOrTrunc(R, dl, DstVT);
8279 
8280   R = DAG.getSelectCC(
8281       dl, Exponent, ExponentLoBit,
8282       DAG.getNode(ISD::SHL, dl, DstVT, R,
8283                   DAG.getZExtOrTrunc(
8284                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8285                       dl, IntShVT)),
8286       DAG.getNode(ISD::SRL, dl, DstVT, R,
8287                   DAG.getZExtOrTrunc(
8288                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8289                       dl, IntShVT)),
8290       ISD::SETGT);
8291 
8292   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8293                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8294 
8295   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8296                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8297   return true;
8298 }
8299 
8300 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8301                                       SDValue &Chain,
8302                                       SelectionDAG &DAG) const {
8303   SDLoc dl(SDValue(Node, 0));
8304   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8305   SDValue Src = Node->getOperand(OpNo);
8306 
8307   EVT SrcVT = Src.getValueType();
8308   EVT DstVT = Node->getValueType(0);
8309   EVT SetCCVT =
8310       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8311   EVT DstSetCCVT =
8312       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8313 
8314   // Only expand vector types if we have the appropriate vector bit operations.
8315   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8316                                                    ISD::FP_TO_SINT;
8317   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8318                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
8319     return false;
8320 
8321   // If the maximum float value is smaller then the signed integer range,
8322   // the destination signmask can't be represented by the float, so we can
8323   // just use FP_TO_SINT directly.
8324   const fltSemantics &APFSem = SrcVT.getFltSemantics();
8325   APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8326   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8327   if (APFloat::opOverflow &
8328       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8329     if (Node->isStrictFPOpcode()) {
8330       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8331                            { Node->getOperand(0), Src });
8332       Chain = Result.getValue(1);
8333     } else
8334       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8335     return true;
8336   }
8337 
8338   // Don't expand it if there isn't cheap fsub instruction.
8339   if (!isOperationLegalOrCustom(
8340           Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8341     return false;
8342 
8343   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8344   SDValue Sel;
8345 
8346   if (Node->isStrictFPOpcode()) {
8347     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8348                        Node->getOperand(0), /*IsSignaling*/ true);
8349     Chain = Sel.getValue(1);
8350   } else {
8351     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8352   }
8353 
8354   bool Strict = Node->isStrictFPOpcode() ||
8355                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8356 
8357   if (Strict) {
8358     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8359     // signmask then offset (the result of which should be fully representable).
8360     // Sel = Src < 0x8000000000000000
8361     // FltOfs = select Sel, 0, 0x8000000000000000
8362     // IntOfs = select Sel, 0, 0x8000000000000000
8363     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8364 
8365     // TODO: Should any fast-math-flags be set for the FSUB?
8366     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8367                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8368     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8369     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8370                                    DAG.getConstant(0, dl, DstVT),
8371                                    DAG.getConstant(SignMask, dl, DstVT));
8372     SDValue SInt;
8373     if (Node->isStrictFPOpcode()) {
8374       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8375                                 { Chain, Src, FltOfs });
8376       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8377                          { Val.getValue(1), Val });
8378       Chain = SInt.getValue(1);
8379     } else {
8380       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8381       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8382     }
8383     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8384   } else {
8385     // Expand based on maximum range of FP_TO_SINT:
8386     // True = fp_to_sint(Src)
8387     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8388     // Result = select (Src < 0x8000000000000000), True, False
8389 
8390     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8391     // TODO: Should any fast-math-flags be set for the FSUB?
8392     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8393                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8394     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8395                         DAG.getConstant(SignMask, dl, DstVT));
8396     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8397     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8398   }
8399   return true;
8400 }
8401 
8402 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8403                                       SDValue &Chain, SelectionDAG &DAG) const {
8404   // This transform is not correct for converting 0 when rounding mode is set
8405   // to round toward negative infinity which will produce -0.0. So disable
8406   // under strictfp.
8407   if (Node->isStrictFPOpcode())
8408     return false;
8409 
8410   SDValue Src = Node->getOperand(0);
8411   EVT SrcVT = Src.getValueType();
8412   EVT DstVT = Node->getValueType(0);
8413 
8414   // If the input is known to be non-negative and SINT_TO_FP is legal then use
8415   // it.
8416   if (Node->getFlags().hasNonNeg() &&
8417       isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT)) {
8418     Result =
8419         DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8420     return true;
8421   }
8422 
8423   if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8424     return false;
8425 
8426   // Only expand vector types if we have the appropriate vector bit
8427   // operations.
8428   if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8429                            !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
8430                            !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
8431                            !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
8432                            !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
8433     return false;
8434 
8435   SDLoc dl(SDValue(Node, 0));
8436 
8437   // Implementation of unsigned i64 to f64 following the algorithm in
8438   // __floatundidf in compiler_rt.  This implementation performs rounding
8439   // correctly in all rounding modes with the exception of converting 0
8440   // when rounding toward negative infinity. In that case the fsub will
8441   // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8442   // incorrect.
8443   SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8444   SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8445       llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8446   SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8447   SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8448   SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8449 
8450   SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8451   SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8452   SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8453   SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8454   SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8455   SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8456   SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8457   Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8458   return true;
8459 }
8460 
8461 SDValue
8462 TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8463                                                SelectionDAG &DAG) const {
8464   unsigned Opcode = Node->getOpcode();
8465   assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8466           Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8467          "Wrong opcode");
8468 
8469   if (Node->getFlags().hasNoNaNs()) {
8470     ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8471     EVT VT = Node->getValueType(0);
8472     if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8473          !isOperationLegalOrCustom(ISD::VSELECT, VT)) &&
8474         VT.isVector())
8475       return SDValue();
8476     SDValue Op1 = Node->getOperand(0);
8477     SDValue Op2 = Node->getOperand(1);
8478     SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8479     // Copy FMF flags, but always set the no-signed-zeros flag
8480     // as this is implied by the FMINNUM/FMAXNUM semantics.
8481     SelCC->setFlags(Node->getFlags() | SDNodeFlags::NoSignedZeros);
8482     return SelCC;
8483   }
8484 
8485   return SDValue();
8486 }
8487 
8488 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8489                                               SelectionDAG &DAG) const {
8490   if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8491     return Expanded;
8492 
8493   EVT VT = Node->getValueType(0);
8494   if (VT.isScalableVector())
8495     report_fatal_error(
8496         "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8497 
8498   SDLoc dl(Node);
8499   unsigned NewOp =
8500       Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8501 
8502   if (isOperationLegalOrCustom(NewOp, VT)) {
8503     SDValue Quiet0 = Node->getOperand(0);
8504     SDValue Quiet1 = Node->getOperand(1);
8505 
8506     if (!Node->getFlags().hasNoNaNs()) {
8507       // Insert canonicalizes if it's possible we need to quiet to get correct
8508       // sNaN behavior.
8509       if (!DAG.isKnownNeverSNaN(Quiet0)) {
8510         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8511                              Node->getFlags());
8512       }
8513       if (!DAG.isKnownNeverSNaN(Quiet1)) {
8514         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8515                              Node->getFlags());
8516       }
8517     }
8518 
8519     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8520   }
8521 
8522   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8523   // instead if there are no NaNs and there can't be an incompatible zero
8524   // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8525   if ((Node->getFlags().hasNoNaNs() ||
8526        (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8527         DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8528       (Node->getFlags().hasNoSignedZeros() ||
8529        DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8530        DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8531     unsigned IEEE2018Op =
8532         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8533     if (isOperationLegalOrCustom(IEEE2018Op, VT))
8534       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8535                          Node->getOperand(1), Node->getFlags());
8536   }
8537 
8538   if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8539     return SelCC;
8540 
8541   return SDValue();
8542 }
8543 
8544 SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8545                                                 SelectionDAG &DAG) const {
8546   if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8547     return Expanded;
8548 
8549   SDLoc DL(N);
8550   SDValue LHS = N->getOperand(0);
8551   SDValue RHS = N->getOperand(1);
8552   unsigned Opc = N->getOpcode();
8553   EVT VT = N->getValueType(0);
8554   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8555   bool IsMax = Opc == ISD::FMAXIMUM;
8556   SDNodeFlags Flags = N->getFlags();
8557 
8558   // First, implement comparison not propagating NaN. If no native fmin or fmax
8559   // available, use plain select with setcc instead.
8560   SDValue MinMax;
8561   unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8562   unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8563 
8564   // FIXME: We should probably define fminnum/fmaxnum variants with correct
8565   // signed zero behavior.
8566   bool MinMaxMustRespectOrderedZero = false;
8567 
8568   if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8569     MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8570     MinMaxMustRespectOrderedZero = true;
8571   } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8572     MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8573   } else {
8574     if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8575       return DAG.UnrollVectorOp(N);
8576 
8577     // NaN (if exists) will be propagated later, so orderness doesn't matter.
8578     SDValue Compare =
8579         DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8580     MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8581   }
8582 
8583   // Propagate any NaN of both operands
8584   if (!N->getFlags().hasNoNaNs() &&
8585       (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8586     ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8587                                         APFloat::getNaN(VT.getFltSemantics()));
8588     MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8589                            DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8590   }
8591 
8592   // fminimum/fmaximum requires -0.0 less than +0.0
8593   if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8594       !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8595     SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8596                                   DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8597     SDValue TestZero =
8598         DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8599     SDValue LCmp = DAG.getSelect(
8600         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8601         MinMax, Flags);
8602     SDValue RCmp = DAG.getSelect(
8603         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8604         LCmp, Flags);
8605     MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8606   }
8607 
8608   return MinMax;
8609 }
8610 
8611 SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
8612                                                       SelectionDAG &DAG) const {
8613   SDLoc DL(Node);
8614   SDValue LHS = Node->getOperand(0);
8615   SDValue RHS = Node->getOperand(1);
8616   unsigned Opc = Node->getOpcode();
8617   EVT VT = Node->getValueType(0);
8618   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8619   bool IsMax = Opc == ISD::FMAXIMUMNUM;
8620   const TargetOptions &Options = DAG.getTarget().Options;
8621   SDNodeFlags Flags = Node->getFlags();
8622 
8623   unsigned NewOp =
8624       Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8625 
8626   if (isOperationLegalOrCustom(NewOp, VT)) {
8627     if (!Flags.hasNoNaNs()) {
8628       // Insert canonicalizes if it's possible we need to quiet to get correct
8629       // sNaN behavior.
8630       if (!DAG.isKnownNeverSNaN(LHS)) {
8631         LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8632       }
8633       if (!DAG.isKnownNeverSNaN(RHS)) {
8634         RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8635       }
8636     }
8637 
8638     return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8639   }
8640 
8641   // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8642   // same behaviors for all of other cases: +0.0 vs -0.0 included.
8643   if (Flags.hasNoNaNs() ||
8644       (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8645     unsigned IEEE2019Op =
8646         Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8647     if (isOperationLegalOrCustom(IEEE2019Op, VT))
8648       return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8649   }
8650 
8651   // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8652   // either one for +0.0 vs -0.0.
8653   if ((Flags.hasNoNaNs() ||
8654        (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8655       (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8656        DAG.isKnownNeverZeroFloat(RHS))) {
8657     unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8658     if (isOperationLegalOrCustom(IEEE2008Op, VT))
8659       return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8660   }
8661 
8662   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8663     return DAG.UnrollVectorOp(Node);
8664 
8665   // If only one operand is NaN, override it with another operand.
8666   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8667     LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8668   }
8669   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8670     RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8671   }
8672 
8673   SDValue MinMax =
8674       DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8675   // If MinMax is NaN, let's quiet it.
8676   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
8677       !DAG.isKnownNeverNaN(RHS)) {
8678     MinMax = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
8679   }
8680 
8681   // Fixup signed zero behavior.
8682   if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8683       DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
8684     return MinMax;
8685   }
8686   SDValue TestZero =
8687       DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8688   SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8689                                 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8690   SDValue LCmp = DAG.getSelect(
8691       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8692       MinMax, Flags);
8693   SDValue RCmp = DAG.getSelect(
8694       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8695       Flags);
8696   return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8697 }
8698 
8699 /// Returns a true value if if this FPClassTest can be performed with an ordered
8700 /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8701 /// std::nullopt if it cannot be performed as a compare with 0.
8702 static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8703                                            const fltSemantics &Semantics,
8704                                            const MachineFunction &MF) {
8705   FPClassTest OrderedMask = Test & ~fcNan;
8706   FPClassTest NanTest = Test & fcNan;
8707   bool IsOrdered = NanTest == fcNone;
8708   bool IsUnordered = NanTest == fcNan;
8709 
8710   // Skip cases that are testing for only a qnan or snan.
8711   if (!IsOrdered && !IsUnordered)
8712     return std::nullopt;
8713 
8714   if (OrderedMask == fcZero &&
8715       MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8716     return IsOrdered;
8717   if (OrderedMask == (fcZero | fcSubnormal) &&
8718       MF.getDenormalMode(Semantics).inputsAreZero())
8719     return IsOrdered;
8720   return std::nullopt;
8721 }
8722 
8723 SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8724                                          const FPClassTest OrigTestMask,
8725                                          SDNodeFlags Flags, const SDLoc &DL,
8726                                          SelectionDAG &DAG) const {
8727   EVT OperandVT = Op.getValueType();
8728   assert(OperandVT.isFloatingPoint());
8729   FPClassTest Test = OrigTestMask;
8730 
8731   // Degenerated cases.
8732   if (Test == fcNone)
8733     return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8734   if (Test == fcAllFlags)
8735     return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8736 
8737   // PPC double double is a pair of doubles, of which the higher part determines
8738   // the value class.
8739   if (OperandVT == MVT::ppcf128) {
8740     Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8741                      DAG.getConstant(1, DL, MVT::i32));
8742     OperandVT = MVT::f64;
8743   }
8744 
8745   // Floating-point type properties.
8746   EVT ScalarFloatVT = OperandVT.getScalarType();
8747   const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8748   const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8749   bool IsF80 = (ScalarFloatVT == MVT::f80);
8750 
8751   // Some checks can be implemented using float comparisons, if floating point
8752   // exceptions are ignored.
8753   if (Flags.hasNoFPExcept() &&
8754       isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8755     FPClassTest FPTestMask = Test;
8756     bool IsInvertedFP = false;
8757 
8758     if (FPClassTest InvertedFPCheck =
8759             invertFPClassTestIfSimpler(FPTestMask, true)) {
8760       FPTestMask = InvertedFPCheck;
8761       IsInvertedFP = true;
8762     }
8763 
8764     ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8765     ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8766 
8767     // See if we can fold an | fcNan into an unordered compare.
8768     FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8769 
8770     // Can't fold the ordered check if we're only testing for snan or qnan
8771     // individually.
8772     if ((FPTestMask & fcNan) != fcNan)
8773       OrderedFPTestMask = FPTestMask;
8774 
8775     const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8776 
8777     if (std::optional<bool> IsCmp0 =
8778             isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8779         IsCmp0 && (isCondCodeLegalOrCustom(
8780                       *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8781                       OperandVT.getScalarType().getSimpleVT()))) {
8782 
8783       // If denormals could be implicitly treated as 0, this is not equivalent
8784       // to a compare with 0 since it will also be true for denormals.
8785       return DAG.getSetCC(DL, ResultVT, Op,
8786                           DAG.getConstantFP(0.0, DL, OperandVT),
8787                           *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8788     }
8789 
8790     if (FPTestMask == fcNan &&
8791         isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
8792                                 OperandVT.getScalarType().getSimpleVT()))
8793       return DAG.getSetCC(DL, ResultVT, Op, Op,
8794                           IsInvertedFP ? ISD::SETO : ISD::SETUO);
8795 
8796     bool IsOrderedInf = FPTestMask == fcInf;
8797     if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8798         isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8799                                              : UnorderedCmpOpcode,
8800                                 OperandVT.getScalarType().getSimpleVT()) &&
8801         isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8802         (isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) ||
8803          (OperandVT.isVector() &&
8804           isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) {
8805       // isinf(x) --> fabs(x) == inf
8806       SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8807       SDValue Inf =
8808           DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8809       return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8810                           IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8811     }
8812 
8813     if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8814         isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8815                                           : UnorderedCmpOpcode,
8816                                 OperandVT.getSimpleVT())) {
8817       // isposinf(x) --> x == inf
8818       // isneginf(x) --> x == -inf
8819       // isposinf(x) || nan --> x u== inf
8820       // isneginf(x) || nan --> x u== -inf
8821 
8822       SDValue Inf = DAG.getConstantFP(
8823           APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
8824           OperandVT);
8825       return DAG.getSetCC(DL, ResultVT, Op, Inf,
8826                           IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8827     }
8828 
8829     if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8830       // TODO: Could handle ordered case, but it produces worse code for
8831       // x86. Maybe handle ordered if fabs is free?
8832 
8833       ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8834       ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8835 
8836       if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8837                                   OperandVT.getScalarType().getSimpleVT())) {
8838         // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8839 
8840         // TODO: Maybe only makes sense if fabs is free. Integer test of
8841         // exponent bits seems better for x86.
8842         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8843         SDValue SmallestNormal = DAG.getConstantFP(
8844             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8845         return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8846                             IsOrdered ? OrderedOp : UnorderedOp);
8847       }
8848     }
8849 
8850     if (FPTestMask == fcNormal) {
8851       // TODO: Handle unordered
8852       ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8853       ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
8854 
8855       if (isCondCodeLegalOrCustom(IsFiniteOp,
8856                                   OperandVT.getScalarType().getSimpleVT()) &&
8857           isCondCodeLegalOrCustom(IsNormalOp,
8858                                   OperandVT.getScalarType().getSimpleVT()) &&
8859           isFAbsFree(OperandVT)) {
8860         // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
8861         SDValue Inf =
8862             DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8863         SDValue SmallestNormal = DAG.getConstantFP(
8864             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8865 
8866         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8867         SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
8868         SDValue IsNormal =
8869             DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
8870         unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
8871         return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
8872       }
8873     }
8874   }
8875 
8876   // Some checks may be represented as inversion of simpler check, for example
8877   // "inf|normal|subnormal|zero" => !"nan".
8878   bool IsInverted = false;
8879 
8880   if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8881     Test = InvertedCheck;
8882     IsInverted = true;
8883   }
8884 
8885   // In the general case use integer operations.
8886   unsigned BitSize = OperandVT.getScalarSizeInBits();
8887   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8888   if (OperandVT.isVector())
8889     IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8890                              OperandVT.getVectorElementCount());
8891   SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8892 
8893   // Various masks.
8894   APInt SignBit = APInt::getSignMask(BitSize);
8895   APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
8896   APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8897   const unsigned ExplicitIntBitInF80 = 63;
8898   APInt ExpMask = Inf;
8899   if (IsF80)
8900     ExpMask.clearBit(ExplicitIntBitInF80);
8901   APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8902   APInt QNaNBitMask =
8903       APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8904   APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8905 
8906   SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8907   SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8908   SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8909   SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8910   SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8911   SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8912 
8913   SDValue Res;
8914   const auto appendResult = [&](SDValue PartialRes) {
8915     if (PartialRes) {
8916       if (Res)
8917         Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8918       else
8919         Res = PartialRes;
8920     }
8921   };
8922 
8923   SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8924   const auto getIntBitIsSet = [&]() -> SDValue {
8925     if (!IntBitIsSetV) {
8926       APInt IntBitMask(BitSize, 0);
8927       IntBitMask.setBit(ExplicitIntBitInF80);
8928       SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8929       SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8930       IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8931     }
8932     return IntBitIsSetV;
8933   };
8934 
8935   // Split the value into sign bit and absolute value.
8936   SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8937   SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8938                                DAG.getConstant(0, DL, IntVT), ISD::SETLT);
8939 
8940   // Tests that involve more than one class should be processed first.
8941   SDValue PartialRes;
8942 
8943   if (IsF80)
8944     ; // Detect finite numbers of f80 by checking individual classes because
8945       // they have different settings of the explicit integer bit.
8946   else if ((Test & fcFinite) == fcFinite) {
8947     // finite(V) ==> abs(V) < exp_mask
8948     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8949     Test &= ~fcFinite;
8950   } else if ((Test & fcFinite) == fcPosFinite) {
8951     // finite(V) && V > 0 ==> V < exp_mask
8952     PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8953     Test &= ~fcPosFinite;
8954   } else if ((Test & fcFinite) == fcNegFinite) {
8955     // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8956     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8957     PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8958     Test &= ~fcNegFinite;
8959   }
8960   appendResult(PartialRes);
8961 
8962   if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8963     // fcZero | fcSubnormal => test all exponent bits are 0
8964     // TODO: Handle sign bit specific cases
8965     if (PartialCheck == (fcZero | fcSubnormal)) {
8966       SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8967       SDValue ExpIsZero =
8968           DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8969       appendResult(ExpIsZero);
8970       Test &= ~PartialCheck & fcAllFlags;
8971     }
8972   }
8973 
8974   // Check for individual classes.
8975 
8976   if (unsigned PartialCheck = Test & fcZero) {
8977     if (PartialCheck == fcPosZero)
8978       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8979     else if (PartialCheck == fcZero)
8980       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8981     else // ISD::fcNegZero
8982       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8983     appendResult(PartialRes);
8984   }
8985 
8986   if (unsigned PartialCheck = Test & fcSubnormal) {
8987     // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8988     // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8989     SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8990     SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8991     SDValue VMinusOneV =
8992         DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8993     PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8994     if (PartialCheck == fcNegSubnormal)
8995       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8996     appendResult(PartialRes);
8997   }
8998 
8999   if (unsigned PartialCheck = Test & fcInf) {
9000     if (PartialCheck == fcPosInf)
9001       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9002     else if (PartialCheck == fcInf)
9003       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9004     else { // ISD::fcNegInf
9005       APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9006       SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9007       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9008     }
9009     appendResult(PartialRes);
9010   }
9011 
9012   if (unsigned PartialCheck = Test & fcNan) {
9013     APInt InfWithQnanBit = Inf | QNaNBitMask;
9014     SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9015     if (PartialCheck == fcNan) {
9016       // isnan(V) ==> abs(V) > int(inf)
9017       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9018       if (IsF80) {
9019         // Recognize unsupported values as NaNs for compatibility with glibc.
9020         // In them (exp(V)==0) == int_bit.
9021         SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9022         SDValue ExpIsZero =
9023             DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9024         SDValue IsPseudo =
9025             DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9026         PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9027       }
9028     } else if (PartialCheck == fcQNan) {
9029       // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9030       PartialRes =
9031           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9032     } else { // ISD::fcSNan
9033       // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9034       //                    abs(V) < (unsigned(Inf) | quiet_bit)
9035       SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9036       SDValue IsNotQnan =
9037           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9038       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9039     }
9040     appendResult(PartialRes);
9041   }
9042 
9043   if (unsigned PartialCheck = Test & fcNormal) {
9044     // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9045     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9046     SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9047     SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9048     APInt ExpLimit = ExpMask - ExpLSB;
9049     SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9050     PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9051     if (PartialCheck == fcNegNormal)
9052       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9053     else if (PartialCheck == fcPosNormal) {
9054       SDValue PosSignV =
9055           DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
9056       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9057     }
9058     if (IsF80)
9059       PartialRes =
9060           DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9061     appendResult(PartialRes);
9062   }
9063 
9064   if (!Res)
9065     return DAG.getConstant(IsInverted, DL, ResultVT);
9066   if (IsInverted)
9067     Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
9068   return Res;
9069 }
9070 
9071 // Only expand vector types if we have the appropriate vector bit operations.
9072 static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9073   assert(VT.isVector() && "Expected vector type");
9074   unsigned Len = VT.getScalarSizeInBits();
9075   return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9076          TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
9077          TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
9078          (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9079          TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
9080 }
9081 
9082 SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9083   SDLoc dl(Node);
9084   EVT VT = Node->getValueType(0);
9085   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9086   SDValue Op = Node->getOperand(0);
9087   unsigned Len = VT.getScalarSizeInBits();
9088   assert(VT.isInteger() && "CTPOP not implemented for this type.");
9089 
9090   // TODO: Add support for irregular type lengths.
9091   if (!(Len <= 128 && Len % 8 == 0))
9092     return SDValue();
9093 
9094   // Only expand vector types if we have the appropriate vector bit operations.
9095   if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9096     return SDValue();
9097 
9098   // This is the "best" algorithm from
9099   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9100   SDValue Mask55 =
9101       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9102   SDValue Mask33 =
9103       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9104   SDValue Mask0F =
9105       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9106 
9107   // v = v - ((v >> 1) & 0x55555555...)
9108   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9109                    DAG.getNode(ISD::AND, dl, VT,
9110                                DAG.getNode(ISD::SRL, dl, VT, Op,
9111                                            DAG.getConstant(1, dl, ShVT)),
9112                                Mask55));
9113   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9114   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9115                    DAG.getNode(ISD::AND, dl, VT,
9116                                DAG.getNode(ISD::SRL, dl, VT, Op,
9117                                            DAG.getConstant(2, dl, ShVT)),
9118                                Mask33));
9119   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9120   Op = DAG.getNode(ISD::AND, dl, VT,
9121                    DAG.getNode(ISD::ADD, dl, VT, Op,
9122                                DAG.getNode(ISD::SRL, dl, VT, Op,
9123                                            DAG.getConstant(4, dl, ShVT))),
9124                    Mask0F);
9125 
9126   if (Len <= 8)
9127     return Op;
9128 
9129   // Avoid the multiply if we only have 2 bytes to add.
9130   // TODO: Only doing this for scalars because vectors weren't as obviously
9131   // improved.
9132   if (Len == 16 && !VT.isVector()) {
9133     // v = (v + (v >> 8)) & 0x00FF;
9134     return DAG.getNode(ISD::AND, dl, VT,
9135                      DAG.getNode(ISD::ADD, dl, VT, Op,
9136                                  DAG.getNode(ISD::SRL, dl, VT, Op,
9137                                              DAG.getConstant(8, dl, ShVT))),
9138                      DAG.getConstant(0xFF, dl, VT));
9139   }
9140 
9141   // v = (v * 0x01010101...) >> (Len - 8)
9142   SDValue V;
9143   if (isOperationLegalOrCustomOrPromote(
9144           ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9145     SDValue Mask01 =
9146         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9147     V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9148   } else {
9149     V = Op;
9150     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9151       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9152       V = DAG.getNode(ISD::ADD, dl, VT, V,
9153                       DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9154     }
9155   }
9156   return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9157 }
9158 
9159 SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9160   SDLoc dl(Node);
9161   EVT VT = Node->getValueType(0);
9162   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9163   SDValue Op = Node->getOperand(0);
9164   SDValue Mask = Node->getOperand(1);
9165   SDValue VL = Node->getOperand(2);
9166   unsigned Len = VT.getScalarSizeInBits();
9167   assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9168 
9169   // TODO: Add support for irregular type lengths.
9170   if (!(Len <= 128 && Len % 8 == 0))
9171     return SDValue();
9172 
9173   // This is same algorithm of expandCTPOP from
9174   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9175   SDValue Mask55 =
9176       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9177   SDValue Mask33 =
9178       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9179   SDValue Mask0F =
9180       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9181 
9182   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9183 
9184   // v = v - ((v >> 1) & 0x55555555...)
9185   Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9186                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9187                                  DAG.getConstant(1, dl, ShVT), Mask, VL),
9188                      Mask55, Mask, VL);
9189   Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9190 
9191   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9192   Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9193   Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9194                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9195                                  DAG.getConstant(2, dl, ShVT), Mask, VL),
9196                      Mask33, Mask, VL);
9197   Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9198 
9199   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9200   Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9201                      Mask, VL),
9202   Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9203   Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9204 
9205   if (Len <= 8)
9206     return Op;
9207 
9208   // v = (v * 0x01010101...) >> (Len - 8)
9209   SDValue V;
9210   if (isOperationLegalOrCustomOrPromote(
9211           ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9212     SDValue Mask01 =
9213         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9214     V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9215   } else {
9216     V = Op;
9217     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9218       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9219       V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9220                       DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9221                       Mask, VL);
9222     }
9223   }
9224   return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9225                      Mask, VL);
9226 }
9227 
9228 SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9229   SDLoc dl(Node);
9230   EVT VT = Node->getValueType(0);
9231   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9232   SDValue Op = Node->getOperand(0);
9233   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9234 
9235   // If the non-ZERO_UNDEF version is supported we can use that instead.
9236   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9237       isOperationLegalOrCustom(ISD::CTLZ, VT))
9238     return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9239 
9240   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9241   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
9242     EVT SetCCVT =
9243         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9244     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9245     SDValue Zero = DAG.getConstant(0, dl, VT);
9246     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9247     return DAG.getSelect(dl, VT, SrcIsZero,
9248                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9249   }
9250 
9251   // Only expand vector types if we have the appropriate vector bit operations.
9252   // This includes the operations needed to expand CTPOP if it isn't supported.
9253   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9254                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9255                          !canExpandVectorCTPOP(*this, VT)) ||
9256                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
9257                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
9258     return SDValue();
9259 
9260   // for now, we do this:
9261   // x = x | (x >> 1);
9262   // x = x | (x >> 2);
9263   // ...
9264   // x = x | (x >>16);
9265   // x = x | (x >>32); // for 64-bit input
9266   // return popcount(~x);
9267   //
9268   // Ref: "Hacker's Delight" by Henry Warren
9269   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9270     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9271     Op = DAG.getNode(ISD::OR, dl, VT, Op,
9272                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9273   }
9274   Op = DAG.getNOT(dl, Op, VT);
9275   return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9276 }
9277 
9278 SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9279   SDLoc dl(Node);
9280   EVT VT = Node->getValueType(0);
9281   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9282   SDValue Op = Node->getOperand(0);
9283   SDValue Mask = Node->getOperand(1);
9284   SDValue VL = Node->getOperand(2);
9285   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9286 
9287   // do this:
9288   // x = x | (x >> 1);
9289   // x = x | (x >> 2);
9290   // ...
9291   // x = x | (x >>16);
9292   // x = x | (x >>32); // for 64-bit input
9293   // return popcount(~x);
9294   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9295     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9296     Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9297                      DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9298                      VL);
9299   }
9300   Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9301                    Mask, VL);
9302   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9303 }
9304 
9305 SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9306                                         const SDLoc &DL, EVT VT, SDValue Op,
9307                                         unsigned BitWidth) const {
9308   if (BitWidth != 32 && BitWidth != 64)
9309     return SDValue();
9310   APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9311                                   : APInt(64, 0x0218A392CD3D5DBFULL);
9312   const DataLayout &TD = DAG.getDataLayout();
9313   MachinePointerInfo PtrInfo =
9314       MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
9315   unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9316   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9317   SDValue Lookup = DAG.getNode(
9318       ISD::SRL, DL, VT,
9319       DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9320                   DAG.getConstant(DeBruijn, DL, VT)),
9321       DAG.getConstant(ShiftAmt, DL, VT));
9322   Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
9323 
9324   SmallVector<uint8_t> Table(BitWidth, 0);
9325   for (unsigned i = 0; i < BitWidth; i++) {
9326     APInt Shl = DeBruijn.shl(i);
9327     APInt Lshr = Shl.lshr(ShiftAmt);
9328     Table[Lshr.getZExtValue()] = i;
9329   }
9330 
9331   // Create a ConstantArray in Constant Pool
9332   auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9333   SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9334                                       TD.getPrefTypeAlign(CA->getType()));
9335   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9336                                    DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9337                                    PtrInfo, MVT::i8);
9338   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9339     return ExtLoad;
9340 
9341   EVT SetCCVT =
9342       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9343   SDValue Zero = DAG.getConstant(0, DL, VT);
9344   SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9345   return DAG.getSelect(DL, VT, SrcIsZero,
9346                        DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9347 }
9348 
9349 SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9350   SDLoc dl(Node);
9351   EVT VT = Node->getValueType(0);
9352   SDValue Op = Node->getOperand(0);
9353   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9354 
9355   // If the non-ZERO_UNDEF version is supported we can use that instead.
9356   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9357       isOperationLegalOrCustom(ISD::CTTZ, VT))
9358     return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9359 
9360   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9361   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
9362     EVT SetCCVT =
9363         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9364     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9365     SDValue Zero = DAG.getConstant(0, dl, VT);
9366     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9367     return DAG.getSelect(dl, VT, SrcIsZero,
9368                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9369   }
9370 
9371   // Only expand vector types if we have the appropriate vector bit operations.
9372   // This includes the operations needed to expand CTPOP if it isn't supported.
9373   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9374                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9375                          !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
9376                          !canExpandVectorCTPOP(*this, VT)) ||
9377                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
9378                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
9379                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9380     return SDValue();
9381 
9382   // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9383   if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9384       !isOperationLegal(ISD::CTLZ, VT))
9385     if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9386       return V;
9387 
9388   // for now, we use: { return popcount(~x & (x - 1)); }
9389   // unless the target has ctlz but not ctpop, in which case we use:
9390   // { return 32 - nlz(~x & (x-1)); }
9391   // Ref: "Hacker's Delight" by Henry Warren
9392   SDValue Tmp = DAG.getNode(
9393       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9394       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9395 
9396   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9397   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
9398     return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9399                        DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9400   }
9401 
9402   return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9403 }
9404 
9405 SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9406   SDValue Op = Node->getOperand(0);
9407   SDValue Mask = Node->getOperand(1);
9408   SDValue VL = Node->getOperand(2);
9409   SDLoc dl(Node);
9410   EVT VT = Node->getValueType(0);
9411 
9412   // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9413   SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9414                             DAG.getAllOnesConstant(dl, VT), Mask, VL);
9415   SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9416                                  DAG.getConstant(1, dl, VT), Mask, VL);
9417   SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9418   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9419 }
9420 
9421 SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9422                                              SelectionDAG &DAG) const {
9423   // %cond = to_bool_vec %source
9424   // %splat = splat /*val=*/VL
9425   // %tz = step_vector
9426   // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9427   // %r = vp.reduce.umin %v
9428   SDLoc DL(N);
9429   SDValue Source = N->getOperand(0);
9430   SDValue Mask = N->getOperand(1);
9431   SDValue EVL = N->getOperand(2);
9432   EVT SrcVT = Source.getValueType();
9433   EVT ResVT = N->getValueType(0);
9434   EVT ResVecVT =
9435       EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9436 
9437   // Convert to boolean vector.
9438   if (SrcVT.getScalarType() != MVT::i1) {
9439     SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9440     SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9441                              SrcVT.getVectorElementCount());
9442     Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9443                          DAG.getCondCode(ISD::SETNE), Mask, EVL);
9444   }
9445 
9446   SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9447   SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9448   SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9449   SDValue Select =
9450       DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9451   return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9452 }
9453 
9454 SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9455                                   bool IsNegative) const {
9456   SDLoc dl(N);
9457   EVT VT = N->getValueType(0);
9458   SDValue Op = N->getOperand(0);
9459 
9460   // abs(x) -> smax(x,sub(0,x))
9461   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9462       isOperationLegal(ISD::SMAX, VT)) {
9463     SDValue Zero = DAG.getConstant(0, dl, VT);
9464     Op = DAG.getFreeze(Op);
9465     return DAG.getNode(ISD::SMAX, dl, VT, Op,
9466                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9467   }
9468 
9469   // abs(x) -> umin(x,sub(0,x))
9470   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9471       isOperationLegal(ISD::UMIN, VT)) {
9472     SDValue Zero = DAG.getConstant(0, dl, VT);
9473     Op = DAG.getFreeze(Op);
9474     return DAG.getNode(ISD::UMIN, dl, VT, Op,
9475                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9476   }
9477 
9478   // 0 - abs(x) -> smin(x, sub(0,x))
9479   if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9480       isOperationLegal(ISD::SMIN, VT)) {
9481     SDValue Zero = DAG.getConstant(0, dl, VT);
9482     Op = DAG.getFreeze(Op);
9483     return DAG.getNode(ISD::SMIN, dl, VT, Op,
9484                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9485   }
9486 
9487   // Only expand vector types if we have the appropriate vector operations.
9488   if (VT.isVector() &&
9489       (!isOperationLegalOrCustom(ISD::SRA, VT) ||
9490        (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9491        (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9492        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9493     return SDValue();
9494 
9495   Op = DAG.getFreeze(Op);
9496   SDValue Shift = DAG.getNode(
9497       ISD::SRA, dl, VT, Op,
9498       DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9499   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9500 
9501   // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9502   if (!IsNegative)
9503     return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9504 
9505   // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9506   return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9507 }
9508 
9509 SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9510   SDLoc dl(N);
9511   EVT VT = N->getValueType(0);
9512   SDValue LHS = DAG.getFreeze(N->getOperand(0));
9513   SDValue RHS = DAG.getFreeze(N->getOperand(1));
9514   bool IsSigned = N->getOpcode() == ISD::ABDS;
9515 
9516   // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9517   // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9518   unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9519   unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9520   if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9521     SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9522     SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9523     return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9524   }
9525 
9526   // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9527   if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9528     return DAG.getNode(ISD::OR, dl, VT,
9529                        DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9530                        DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9531 
9532   // If the subtract doesn't overflow then just use abs(sub())
9533   // NOTE: don't use frozen operands for value tracking.
9534   bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9535                        DAG.SignBitIsZero(N->getOperand(0));
9536 
9537   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
9538                              N->getOperand(1)))
9539     return DAG.getNode(ISD::ABS, dl, VT,
9540                        DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9541 
9542   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
9543                              N->getOperand(0)))
9544     return DAG.getNode(ISD::ABS, dl, VT,
9545                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9546 
9547   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9548   ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9549   SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9550 
9551   // Branchless expansion iff cmp result is allbits:
9552   // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9553   // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9554   if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9555     SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9556     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9557     return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9558   }
9559 
9560   // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9561   // flag if the (scalar) type is illegal as this is more likely to legalize
9562   // cleanly:
9563   // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9564   if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9565     SDValue USubO =
9566         DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9567     SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9568     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9569     return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9570   }
9571 
9572   // FIXME: Should really try to split the vector in case it's legal on a
9573   // subvector.
9574   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9575     return DAG.UnrollVectorOp(N);
9576 
9577   // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9578   // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9579   return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9580                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9581 }
9582 
9583 SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
9584   SDLoc dl(N);
9585   EVT VT = N->getValueType(0);
9586   SDValue LHS = N->getOperand(0);
9587   SDValue RHS = N->getOperand(1);
9588 
9589   unsigned Opc = N->getOpcode();
9590   bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9591   bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9592   unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9593   unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9594   unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9595   unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9596   assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9597           Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9598          "Unknown AVG node");
9599 
9600   // If the operands are already extended, we can add+shift.
9601   bool IsExt =
9602       (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9603        DAG.ComputeNumSignBits(RHS) >= 2) ||
9604       (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9605        DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9606   if (IsExt) {
9607     SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9608     if (!IsFloor)
9609       Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9610     return DAG.getNode(ShiftOpc, dl, VT, Sum,
9611                        DAG.getShiftAmountConstant(1, VT, dl));
9612   }
9613 
9614   // For scalars, see if we can efficiently extend/truncate to use add+shift.
9615   if (VT.isScalarInteger()) {
9616     unsigned BW = VT.getScalarSizeInBits();
9617     EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9618     if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9619       LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9620       RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9621       SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9622       if (!IsFloor)
9623         Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9624                           DAG.getConstant(1, dl, ExtVT));
9625       // Just use SRL as we will be truncating away the extended sign bits.
9626       Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9627                         DAG.getShiftAmountConstant(1, ExtVT, dl));
9628       return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9629     }
9630   }
9631 
9632   // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9633   if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9634     SDValue UAddWithOverflow =
9635         DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9636 
9637     SDValue Sum = UAddWithOverflow.getValue(0);
9638     SDValue Overflow = UAddWithOverflow.getValue(1);
9639 
9640     // Right shift the sum by 1
9641     SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9642                                   DAG.getShiftAmountConstant(1, VT, dl));
9643 
9644     SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9645     SDValue OverflowShl = DAG.getNode(
9646         ISD::SHL, dl, VT, ZeroExtOverflow,
9647         DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9648 
9649     return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9650   }
9651 
9652   // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9653   // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9654   // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9655   // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9656   LHS = DAG.getFreeze(LHS);
9657   RHS = DAG.getFreeze(RHS);
9658   SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9659   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9660   SDValue Shift =
9661       DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9662   return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9663 }
9664 
9665 SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9666   SDLoc dl(N);
9667   EVT VT = N->getValueType(0);
9668   SDValue Op = N->getOperand(0);
9669 
9670   if (!VT.isSimple())
9671     return SDValue();
9672 
9673   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9674   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9675   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9676   default:
9677     return SDValue();
9678   case MVT::i16:
9679     // Use a rotate by 8. This can be further expanded if necessary.
9680     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9681   case MVT::i32:
9682     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9683     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9684                        DAG.getConstant(0xFF00, dl, VT));
9685     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9686     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9687     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9688     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9689     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9690     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9691     return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9692   case MVT::i64:
9693     Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9694     Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9695                        DAG.getConstant(255ULL<<8, dl, VT));
9696     Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9697     Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9698                        DAG.getConstant(255ULL<<16, dl, VT));
9699     Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9700     Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9701                        DAG.getConstant(255ULL<<24, dl, VT));
9702     Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9703     Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9704     Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9705                        DAG.getConstant(255ULL<<24, dl, VT));
9706     Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9707     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9708                        DAG.getConstant(255ULL<<16, dl, VT));
9709     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9710     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9711                        DAG.getConstant(255ULL<<8, dl, VT));
9712     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9713     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9714     Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9715     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9716     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9717     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9718     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9719     return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9720   }
9721 }
9722 
9723 SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9724   SDLoc dl(N);
9725   EVT VT = N->getValueType(0);
9726   SDValue Op = N->getOperand(0);
9727   SDValue Mask = N->getOperand(1);
9728   SDValue EVL = N->getOperand(2);
9729 
9730   if (!VT.isSimple())
9731     return SDValue();
9732 
9733   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9734   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9735   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9736   default:
9737     return SDValue();
9738   case MVT::i16:
9739     Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9740                        Mask, EVL);
9741     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9742                        Mask, EVL);
9743     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9744   case MVT::i32:
9745     Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9746                        Mask, EVL);
9747     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9748                        Mask, EVL);
9749     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9750                        Mask, EVL);
9751     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9752                        Mask, EVL);
9753     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9754                        DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9755     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9756                        Mask, EVL);
9757     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9758     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9759     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9760   case MVT::i64:
9761     Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9762                        Mask, EVL);
9763     Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9764                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9765     Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9766                        Mask, EVL);
9767     Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9768                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9769     Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9770                        Mask, EVL);
9771     Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9772                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9773     Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9774                        Mask, EVL);
9775     Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9776                        Mask, EVL);
9777     Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9778                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9779     Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9780                        Mask, EVL);
9781     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9782                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9783     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9784                        Mask, EVL);
9785     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9786                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9787     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9788                        Mask, EVL);
9789     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9790     Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9791     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9792     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9793     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9794     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9795     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9796   }
9797 }
9798 
9799 SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9800   SDLoc dl(N);
9801   EVT VT = N->getValueType(0);
9802   SDValue Op = N->getOperand(0);
9803   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9804   unsigned Sz = VT.getScalarSizeInBits();
9805 
9806   SDValue Tmp, Tmp2, Tmp3;
9807 
9808   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9809   // and finally the i1 pairs.
9810   // TODO: We can easily support i4/i2 legal types if any target ever does.
9811   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9812     // Create the masks - repeating the pattern every byte.
9813     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9814     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9815     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9816 
9817     // BSWAP if the type is wider than a single byte.
9818     Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9819 
9820     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9821     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9822     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9823     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9824     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9825     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9826 
9827     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9828     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9829     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9830     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9831     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9832     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9833 
9834     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9835     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9836     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9837     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9838     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9839     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9840     return Tmp;
9841   }
9842 
9843   Tmp = DAG.getConstant(0, dl, VT);
9844   for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9845     if (I < J)
9846       Tmp2 =
9847           DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9848     else
9849       Tmp2 =
9850           DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9851 
9852     APInt Shift = APInt::getOneBitSet(Sz, J);
9853     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9854     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9855   }
9856 
9857   return Tmp;
9858 }
9859 
9860 SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9861   assert(N->getOpcode() == ISD::VP_BITREVERSE);
9862 
9863   SDLoc dl(N);
9864   EVT VT = N->getValueType(0);
9865   SDValue Op = N->getOperand(0);
9866   SDValue Mask = N->getOperand(1);
9867   SDValue EVL = N->getOperand(2);
9868   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9869   unsigned Sz = VT.getScalarSizeInBits();
9870 
9871   SDValue Tmp, Tmp2, Tmp3;
9872 
9873   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9874   // and finally the i1 pairs.
9875   // TODO: We can easily support i4/i2 legal types if any target ever does.
9876   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9877     // Create the masks - repeating the pattern every byte.
9878     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9879     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9880     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9881 
9882     // BSWAP if the type is wider than a single byte.
9883     Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9884 
9885     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9886     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9887                        Mask, EVL);
9888     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9889                        DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9890     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9891                        Mask, EVL);
9892     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9893                        Mask, EVL);
9894     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9895 
9896     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9897     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9898                        Mask, EVL);
9899     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9900                        DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9901     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9902                        Mask, EVL);
9903     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9904                        Mask, EVL);
9905     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9906 
9907     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9908     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9909                        Mask, EVL);
9910     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9911                        DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9912     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9913                        Mask, EVL);
9914     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9915                        Mask, EVL);
9916     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9917     return Tmp;
9918   }
9919   return SDValue();
9920 }
9921 
9922 std::pair<SDValue, SDValue>
9923 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9924                                     SelectionDAG &DAG) const {
9925   SDLoc SL(LD);
9926   SDValue Chain = LD->getChain();
9927   SDValue BasePTR = LD->getBasePtr();
9928   EVT SrcVT = LD->getMemoryVT();
9929   EVT DstVT = LD->getValueType(0);
9930   ISD::LoadExtType ExtType = LD->getExtensionType();
9931 
9932   if (SrcVT.isScalableVector())
9933     report_fatal_error("Cannot scalarize scalable vector loads");
9934 
9935   unsigned NumElem = SrcVT.getVectorNumElements();
9936 
9937   EVT SrcEltVT = SrcVT.getScalarType();
9938   EVT DstEltVT = DstVT.getScalarType();
9939 
9940   // A vector must always be stored in memory as-is, i.e. without any padding
9941   // between the elements, since various code depend on it, e.g. in the
9942   // handling of a bitcast of a vector type to int, which may be done with a
9943   // vector store followed by an integer load. A vector that does not have
9944   // elements that are byte-sized must therefore be stored as an integer
9945   // built out of the extracted vector elements.
9946   if (!SrcEltVT.isByteSized()) {
9947     unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9948     EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9949 
9950     unsigned NumSrcBits = SrcVT.getSizeInBits();
9951     EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9952 
9953     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9954     SDValue SrcEltBitMask = DAG.getConstant(
9955         APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9956 
9957     // Load the whole vector and avoid masking off the top bits as it makes
9958     // the codegen worse.
9959     SDValue Load =
9960         DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9961                        LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9962                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
9963 
9964     SmallVector<SDValue, 8> Vals;
9965     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9966       unsigned ShiftIntoIdx =
9967           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9968       SDValue ShiftAmount = DAG.getShiftAmountConstant(
9969           ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
9970       SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9971       SDValue Elt =
9972           DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9973       SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9974 
9975       if (ExtType != ISD::NON_EXTLOAD) {
9976         unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9977         Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9978       }
9979 
9980       Vals.push_back(Scalar);
9981     }
9982 
9983     SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9984     return std::make_pair(Value, Load.getValue(1));
9985   }
9986 
9987   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9988   assert(SrcEltVT.isByteSized());
9989 
9990   SmallVector<SDValue, 8> Vals;
9991   SmallVector<SDValue, 8> LoadChains;
9992 
9993   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9994     SDValue ScalarLoad =
9995         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9996                        LD->getPointerInfo().getWithOffset(Idx * Stride),
9997                        SrcEltVT, LD->getOriginalAlign(),
9998                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
9999 
10000     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10001 
10002     Vals.push_back(ScalarLoad.getValue(0));
10003     LoadChains.push_back(ScalarLoad.getValue(1));
10004   }
10005 
10006   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10007   SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10008 
10009   return std::make_pair(Value, NewChain);
10010 }
10011 
10012 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
10013                                              SelectionDAG &DAG) const {
10014   SDLoc SL(ST);
10015 
10016   SDValue Chain = ST->getChain();
10017   SDValue BasePtr = ST->getBasePtr();
10018   SDValue Value = ST->getValue();
10019   EVT StVT = ST->getMemoryVT();
10020 
10021   if (StVT.isScalableVector())
10022     report_fatal_error("Cannot scalarize scalable vector stores");
10023 
10024   // The type of the data we want to save
10025   EVT RegVT = Value.getValueType();
10026   EVT RegSclVT = RegVT.getScalarType();
10027 
10028   // The type of data as saved in memory.
10029   EVT MemSclVT = StVT.getScalarType();
10030 
10031   unsigned NumElem = StVT.getVectorNumElements();
10032 
10033   // A vector must always be stored in memory as-is, i.e. without any padding
10034   // between the elements, since various code depend on it, e.g. in the
10035   // handling of a bitcast of a vector type to int, which may be done with a
10036   // vector store followed by an integer load. A vector that does not have
10037   // elements that are byte-sized must therefore be stored as an integer
10038   // built out of the extracted vector elements.
10039   if (!MemSclVT.isByteSized()) {
10040     unsigned NumBits = StVT.getSizeInBits();
10041     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10042 
10043     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10044 
10045     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10046       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10047                                 DAG.getVectorIdxConstant(Idx, SL));
10048       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10049       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10050       unsigned ShiftIntoIdx =
10051           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10052       SDValue ShiftAmount =
10053           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10054       SDValue ShiftedElt =
10055           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10056       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10057     }
10058 
10059     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10060                         ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10061                         ST->getAAInfo());
10062   }
10063 
10064   // Store Stride in bytes
10065   unsigned Stride = MemSclVT.getSizeInBits() / 8;
10066   assert(Stride && "Zero stride!");
10067   // Extract each of the elements from the original vector and save them into
10068   // memory individually.
10069   SmallVector<SDValue, 8> Stores;
10070   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10071     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10072                               DAG.getVectorIdxConstant(Idx, SL));
10073 
10074     SDValue Ptr =
10075         DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10076 
10077     // This scalar TruncStore may be illegal, but we legalize it later.
10078     SDValue Store = DAG.getTruncStore(
10079         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10080         MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10081         ST->getAAInfo());
10082 
10083     Stores.push_back(Store);
10084   }
10085 
10086   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10087 }
10088 
10089 std::pair<SDValue, SDValue>
10090 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
10091   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10092          "unaligned indexed loads not implemented!");
10093   SDValue Chain = LD->getChain();
10094   SDValue Ptr = LD->getBasePtr();
10095   EVT VT = LD->getValueType(0);
10096   EVT LoadedVT = LD->getMemoryVT();
10097   SDLoc dl(LD);
10098   auto &MF = DAG.getMachineFunction();
10099 
10100   if (VT.isFloatingPoint() || VT.isVector()) {
10101     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10102     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10103       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10104           LoadedVT.isVector()) {
10105         // Scalarize the load and let the individual components be handled.
10106         return scalarizeVectorLoad(LD, DAG);
10107       }
10108 
10109       // Expand to a (misaligned) integer load of the same size,
10110       // then bitconvert to floating point or vector.
10111       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10112                                     LD->getMemOperand());
10113       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10114       if (LoadedVT != VT)
10115         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10116                              ISD::ANY_EXTEND, dl, VT, Result);
10117 
10118       return std::make_pair(Result, newLoad.getValue(1));
10119     }
10120 
10121     // Copy the value to a (aligned) stack slot using (unaligned) integer
10122     // loads and stores, then do a (aligned) load from the stack slot.
10123     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10124     unsigned LoadedBytes = LoadedVT.getStoreSize();
10125     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10126     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10127 
10128     // Make sure the stack slot is also aligned for the register type.
10129     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10130     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10131     SmallVector<SDValue, 8> Stores;
10132     SDValue StackPtr = StackBase;
10133     unsigned Offset = 0;
10134 
10135     EVT PtrVT = Ptr.getValueType();
10136     EVT StackPtrVT = StackPtr.getValueType();
10137 
10138     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10139     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10140 
10141     // Do all but one copies using the full register width.
10142     for (unsigned i = 1; i < NumRegs; i++) {
10143       // Load one integer register's worth from the original location.
10144       SDValue Load = DAG.getLoad(
10145           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10146           LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10147           LD->getAAInfo());
10148       // Follow the load with a store to the stack slot.  Remember the store.
10149       Stores.push_back(DAG.getStore(
10150           Load.getValue(1), dl, Load, StackPtr,
10151           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10152       // Increment the pointers.
10153       Offset += RegBytes;
10154 
10155       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10156       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10157     }
10158 
10159     // The last copy may be partial.  Do an extending load.
10160     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10161                                   8 * (LoadedBytes - Offset));
10162     SDValue Load =
10163         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10164                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
10165                        LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10166                        LD->getAAInfo());
10167     // Follow the load with a store to the stack slot.  Remember the store.
10168     // On big-endian machines this requires a truncating store to ensure
10169     // that the bits end up in the right place.
10170     Stores.push_back(DAG.getTruncStore(
10171         Load.getValue(1), dl, Load, StackPtr,
10172         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10173 
10174     // The order of the stores doesn't matter - say it with a TokenFactor.
10175     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10176 
10177     // Finally, perform the original load only redirected to the stack slot.
10178     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10179                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10180                           LoadedVT);
10181 
10182     // Callers expect a MERGE_VALUES node.
10183     return std::make_pair(Load, TF);
10184   }
10185 
10186   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10187          "Unaligned load of unsupported type.");
10188 
10189   // Compute the new VT that is half the size of the old one.  This is an
10190   // integer MVT.
10191   unsigned NumBits = LoadedVT.getSizeInBits();
10192   EVT NewLoadedVT;
10193   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10194   NumBits >>= 1;
10195 
10196   Align Alignment = LD->getOriginalAlign();
10197   unsigned IncrementSize = NumBits / 8;
10198   ISD::LoadExtType HiExtType = LD->getExtensionType();
10199 
10200   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10201   if (HiExtType == ISD::NON_EXTLOAD)
10202     HiExtType = ISD::ZEXTLOAD;
10203 
10204   // Load the value in two parts
10205   SDValue Lo, Hi;
10206   if (DAG.getDataLayout().isLittleEndian()) {
10207     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10208                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10209                         LD->getAAInfo());
10210 
10211     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10212     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10213                         LD->getPointerInfo().getWithOffset(IncrementSize),
10214                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10215                         LD->getAAInfo());
10216   } else {
10217     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10218                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10219                         LD->getAAInfo());
10220 
10221     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10222     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10223                         LD->getPointerInfo().getWithOffset(IncrementSize),
10224                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10225                         LD->getAAInfo());
10226   }
10227 
10228   // aggregate the two parts
10229   SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10230   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10231   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10232 
10233   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10234                              Hi.getValue(1));
10235 
10236   return std::make_pair(Result, TF);
10237 }
10238 
10239 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10240                                              SelectionDAG &DAG) const {
10241   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10242          "unaligned indexed stores not implemented!");
10243   SDValue Chain = ST->getChain();
10244   SDValue Ptr = ST->getBasePtr();
10245   SDValue Val = ST->getValue();
10246   EVT VT = Val.getValueType();
10247   Align Alignment = ST->getOriginalAlign();
10248   auto &MF = DAG.getMachineFunction();
10249   EVT StoreMemVT = ST->getMemoryVT();
10250 
10251   SDLoc dl(ST);
10252   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10253     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10254     if (isTypeLegal(intVT)) {
10255       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10256           StoreMemVT.isVector()) {
10257         // Scalarize the store and let the individual components be handled.
10258         SDValue Result = scalarizeVectorStore(ST, DAG);
10259         return Result;
10260       }
10261       // Expand to a bitconvert of the value to the integer type of the
10262       // same size, then a (misaligned) int store.
10263       // FIXME: Does not handle truncating floating point stores!
10264       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10265       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10266                             Alignment, ST->getMemOperand()->getFlags());
10267       return Result;
10268     }
10269     // Do a (aligned) store to a stack slot, then copy from the stack slot
10270     // to the final destination using (unaligned) integer loads and stores.
10271     MVT RegVT = getRegisterType(
10272         *DAG.getContext(),
10273         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10274     EVT PtrVT = Ptr.getValueType();
10275     unsigned StoredBytes = StoreMemVT.getStoreSize();
10276     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10277     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10278 
10279     // Make sure the stack slot is also aligned for the register type.
10280     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10281     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10282 
10283     // Perform the original store, only redirected to the stack slot.
10284     SDValue Store = DAG.getTruncStore(
10285         Chain, dl, Val, StackPtr,
10286         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10287 
10288     EVT StackPtrVT = StackPtr.getValueType();
10289 
10290     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10291     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10292     SmallVector<SDValue, 8> Stores;
10293     unsigned Offset = 0;
10294 
10295     // Do all but one copies using the full register width.
10296     for (unsigned i = 1; i < NumRegs; i++) {
10297       // Load one integer register's worth from the stack slot.
10298       SDValue Load = DAG.getLoad(
10299           RegVT, dl, Store, StackPtr,
10300           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10301       // Store it to the final location.  Remember the store.
10302       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10303                                     ST->getPointerInfo().getWithOffset(Offset),
10304                                     ST->getOriginalAlign(),
10305                                     ST->getMemOperand()->getFlags()));
10306       // Increment the pointers.
10307       Offset += RegBytes;
10308       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10309       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10310     }
10311 
10312     // The last store may be partial.  Do a truncating store.  On big-endian
10313     // machines this requires an extending load from the stack slot to ensure
10314     // that the bits are in the right place.
10315     EVT LoadMemVT =
10316         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10317 
10318     // Load from the stack slot.
10319     SDValue Load = DAG.getExtLoad(
10320         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10321         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10322 
10323     Stores.push_back(
10324         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10325                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10326                           ST->getOriginalAlign(),
10327                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10328     // The order of the stores doesn't matter - say it with a TokenFactor.
10329     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10330     return Result;
10331   }
10332 
10333   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10334          "Unaligned store of unknown type.");
10335   // Get the half-size VT
10336   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10337   unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10338   unsigned IncrementSize = NumBits / 8;
10339 
10340   // Divide the stored value in two parts.
10341   SDValue ShiftAmount =
10342       DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10343   SDValue Lo = Val;
10344   // If Val is a constant, replace the upper bits with 0. The SRL will constant
10345   // fold and not use the upper bits. A smaller constant may be easier to
10346   // materialize.
10347   if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10348     Lo = DAG.getNode(
10349         ISD::AND, dl, VT, Lo,
10350         DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10351                         VT));
10352   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10353 
10354   // Store the two parts
10355   SDValue Store1, Store2;
10356   Store1 = DAG.getTruncStore(Chain, dl,
10357                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10358                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10359                              ST->getMemOperand()->getFlags());
10360 
10361   Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10362   Store2 = DAG.getTruncStore(
10363       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10364       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10365       ST->getMemOperand()->getFlags(), ST->getAAInfo());
10366 
10367   SDValue Result =
10368       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10369   return Result;
10370 }
10371 
10372 SDValue
10373 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10374                                        const SDLoc &DL, EVT DataVT,
10375                                        SelectionDAG &DAG,
10376                                        bool IsCompressedMemory) const {
10377   SDValue Increment;
10378   EVT AddrVT = Addr.getValueType();
10379   EVT MaskVT = Mask.getValueType();
10380   assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10381          "Incompatible types of Data and Mask");
10382   if (IsCompressedMemory) {
10383     if (DataVT.isScalableVector())
10384       report_fatal_error(
10385           "Cannot currently handle compressed memory with scalable vectors");
10386     // Incrementing the pointer according to number of '1's in the mask.
10387     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10388     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10389     if (MaskIntVT.getSizeInBits() < 32) {
10390       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10391       MaskIntVT = MVT::i32;
10392     }
10393 
10394     // Count '1's with POPCNT.
10395     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10396     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10397     // Scale is an element size in bytes.
10398     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10399                                     AddrVT);
10400     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10401   } else if (DataVT.isScalableVector()) {
10402     Increment = DAG.getVScale(DL, AddrVT,
10403                               APInt(AddrVT.getFixedSizeInBits(),
10404                                     DataVT.getStoreSize().getKnownMinValue()));
10405   } else
10406     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10407 
10408   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10409 }
10410 
10411 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10412                                        EVT VecVT, const SDLoc &dl,
10413                                        ElementCount SubEC) {
10414   assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10415          "Cannot index a scalable vector within a fixed-width vector");
10416 
10417   unsigned NElts = VecVT.getVectorMinNumElements();
10418   unsigned NumSubElts = SubEC.getKnownMinValue();
10419   EVT IdxVT = Idx.getValueType();
10420 
10421   if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10422     // If this is a constant index and we know the value plus the number of the
10423     // elements in the subvector minus one is less than the minimum number of
10424     // elements then it's safe to return Idx.
10425     if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10426       if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10427         return Idx;
10428     SDValue VS =
10429         DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10430     unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10431     SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10432                               DAG.getConstant(NumSubElts, dl, IdxVT));
10433     return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10434   }
10435   if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10436     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10437     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10438                        DAG.getConstant(Imm, dl, IdxVT));
10439   }
10440   unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10441   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10442                      DAG.getConstant(MaxIndex, dl, IdxVT));
10443 }
10444 
10445 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
10446                                                 SDValue VecPtr, EVT VecVT,
10447                                                 SDValue Index) const {
10448   return getVectorSubVecPointer(
10449       DAG, VecPtr, VecVT,
10450       EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
10451       Index);
10452 }
10453 
10454 SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
10455                                                SDValue VecPtr, EVT VecVT,
10456                                                EVT SubVecVT,
10457                                                SDValue Index) const {
10458   SDLoc dl(Index);
10459   // Make sure the index type is big enough to compute in.
10460   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10461 
10462   EVT EltVT = VecVT.getVectorElementType();
10463 
10464   // Calculate the element offset and add it to the pointer.
10465   unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10466   assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10467          "Converting bits to bytes lost precision");
10468   assert(SubVecVT.getVectorElementType() == EltVT &&
10469          "Sub-vector must be a vector with matching element type");
10470   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10471                                   SubVecVT.getVectorElementCount());
10472 
10473   EVT IdxVT = Index.getValueType();
10474   if (SubVecVT.isScalableVector())
10475     Index =
10476         DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10477                     DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10478 
10479   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10480                       DAG.getConstant(EltSize, dl, IdxVT));
10481   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10482 }
10483 
10484 //===----------------------------------------------------------------------===//
10485 // Implementation of Emulated TLS Model
10486 //===----------------------------------------------------------------------===//
10487 
10488 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10489                                                 SelectionDAG &DAG) const {
10490   // Access to address of TLS varialbe xyz is lowered to a function call:
10491   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10492   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10493   PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10494   SDLoc dl(GA);
10495 
10496   ArgListTy Args;
10497   ArgListEntry Entry;
10498   const GlobalValue *GV =
10499       cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10500   SmallString<32> NameString("__emutls_v.");
10501   NameString += GV->getName();
10502   StringRef EmuTlsVarName(NameString);
10503   const GlobalVariable *EmuTlsVar =
10504       GV->getParent()->getNamedGlobal(EmuTlsVarName);
10505   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10506   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10507   Entry.Ty = VoidPtrType;
10508   Args.push_back(Entry);
10509 
10510   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10511 
10512   TargetLowering::CallLoweringInfo CLI(DAG);
10513   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10514   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10515   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10516 
10517   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10518   // At last for X86 targets, maybe good for other targets too?
10519   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10520   MFI.setAdjustsStack(true); // Is this only for X86 target?
10521   MFI.setHasCalls(true);
10522 
10523   assert((GA->getOffset() == 0) &&
10524          "Emulated TLS must have zero offset in GlobalAddressSDNode");
10525   return CallResult.first;
10526 }
10527 
10528 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10529                                                 SelectionDAG &DAG) const {
10530   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10531   if (!isCtlzFast())
10532     return SDValue();
10533   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10534   SDLoc dl(Op);
10535   if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10536     EVT VT = Op.getOperand(0).getValueType();
10537     SDValue Zext = Op.getOperand(0);
10538     if (VT.bitsLT(MVT::i32)) {
10539       VT = MVT::i32;
10540       Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10541     }
10542     unsigned Log2b = Log2_32(VT.getSizeInBits());
10543     SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10544     SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10545                               DAG.getConstant(Log2b, dl, MVT::i32));
10546     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10547   }
10548   return SDValue();
10549 }
10550 
10551 SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
10552   SDValue Op0 = Node->getOperand(0);
10553   SDValue Op1 = Node->getOperand(1);
10554   EVT VT = Op0.getValueType();
10555   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10556   unsigned Opcode = Node->getOpcode();
10557   SDLoc DL(Node);
10558 
10559   // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10560   if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10561       getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10562     Op0 = DAG.getFreeze(Op0);
10563     SDValue Zero = DAG.getConstant(0, DL, VT);
10564     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10565                        DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10566   }
10567 
10568   // umin(x,y) -> sub(x,usubsat(x,y))
10569   // TODO: Missing freeze(Op0)?
10570   if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10571       isOperationLegal(ISD::USUBSAT, VT)) {
10572     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10573                        DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10574   }
10575 
10576   // umax(x,y) -> add(x,usubsat(y,x))
10577   // TODO: Missing freeze(Op0)?
10578   if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10579       isOperationLegal(ISD::USUBSAT, VT)) {
10580     return DAG.getNode(ISD::ADD, DL, VT, Op0,
10581                        DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10582   }
10583 
10584   // FIXME: Should really try to split the vector in case it's legal on a
10585   // subvector.
10586   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10587     return DAG.UnrollVectorOp(Node);
10588 
10589   // Attempt to find an existing SETCC node that we can reuse.
10590   // TODO: Do we need a generic doesSETCCNodeExist?
10591   // TODO: Missing freeze(Op0)/freeze(Op1)?
10592   auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10593                          ISD::CondCode PrefCommuteCC,
10594                          ISD::CondCode AltCommuteCC) {
10595     SDVTList BoolVTList = DAG.getVTList(BoolVT);
10596     for (ISD::CondCode CC : {PrefCC, AltCC}) {
10597       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10598                             {Op0, Op1, DAG.getCondCode(CC)})) {
10599         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10600         return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10601       }
10602     }
10603     for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10604       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10605                             {Op0, Op1, DAG.getCondCode(CC)})) {
10606         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10607         return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10608       }
10609     }
10610     SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10611     return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10612   };
10613 
10614   // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10615   //                      -> Y = (A < B) ? B : A
10616   //                      -> Y = (A >= B) ? A : B
10617   //                      -> Y = (A <= B) ? B : A
10618   switch (Opcode) {
10619   case ISD::SMAX:
10620     return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10621   case ISD::SMIN:
10622     return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10623   case ISD::UMAX:
10624     return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10625   case ISD::UMIN:
10626     return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10627   }
10628 
10629   llvm_unreachable("How did we get here?");
10630 }
10631 
10632 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
10633   unsigned Opcode = Node->getOpcode();
10634   SDValue LHS = Node->getOperand(0);
10635   SDValue RHS = Node->getOperand(1);
10636   EVT VT = LHS.getValueType();
10637   SDLoc dl(Node);
10638 
10639   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10640   assert(VT.isInteger() && "Expected operands to be integers");
10641 
10642   // usub.sat(a, b) -> umax(a, b) - b
10643   if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10644     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10645     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10646   }
10647 
10648   // uadd.sat(a, b) -> umin(a, ~b) + b
10649   if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10650     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10651     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10652     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10653   }
10654 
10655   unsigned OverflowOp;
10656   switch (Opcode) {
10657   case ISD::SADDSAT:
10658     OverflowOp = ISD::SADDO;
10659     break;
10660   case ISD::UADDSAT:
10661     OverflowOp = ISD::UADDO;
10662     break;
10663   case ISD::SSUBSAT:
10664     OverflowOp = ISD::SSUBO;
10665     break;
10666   case ISD::USUBSAT:
10667     OverflowOp = ISD::USUBO;
10668     break;
10669   default:
10670     llvm_unreachable("Expected method to receive signed or unsigned saturation "
10671                      "addition or subtraction node.");
10672   }
10673 
10674   // FIXME: Should really try to split the vector in case it's legal on a
10675   // subvector.
10676   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10677     return DAG.UnrollVectorOp(Node);
10678 
10679   unsigned BitWidth = LHS.getScalarValueSizeInBits();
10680   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10681   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10682   SDValue SumDiff = Result.getValue(0);
10683   SDValue Overflow = Result.getValue(1);
10684   SDValue Zero = DAG.getConstant(0, dl, VT);
10685   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10686 
10687   if (Opcode == ISD::UADDSAT) {
10688     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10689       // (LHS + RHS) | OverflowMask
10690       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10691       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10692     }
10693     // Overflow ? 0xffff.... : (LHS + RHS)
10694     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10695   }
10696 
10697   if (Opcode == ISD::USUBSAT) {
10698     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10699       // (LHS - RHS) & ~OverflowMask
10700       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10701       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10702       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10703     }
10704     // Overflow ? 0 : (LHS - RHS)
10705     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10706   }
10707 
10708   if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10709     APInt MinVal = APInt::getSignedMinValue(BitWidth);
10710     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
10711 
10712     KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10713     KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10714 
10715     // If either of the operand signs are known, then they are guaranteed to
10716     // only saturate in one direction. If non-negative they will saturate
10717     // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10718     //
10719     // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10720     // sign of 'y' has to be flipped.
10721 
10722     bool LHSIsNonNegative = KnownLHS.isNonNegative();
10723     bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10724                                                    : KnownRHS.isNegative();
10725     if (LHSIsNonNegative || RHSIsNonNegative) {
10726       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10727       return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10728     }
10729 
10730     bool LHSIsNegative = KnownLHS.isNegative();
10731     bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10732                                                 : KnownRHS.isNonNegative();
10733     if (LHSIsNegative || RHSIsNegative) {
10734       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10735       return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10736     }
10737   }
10738 
10739   // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10740   APInt MinVal = APInt::getSignedMinValue(BitWidth);
10741   SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10742   SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10743                               DAG.getConstant(BitWidth - 1, dl, VT));
10744   Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10745   return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10746 }
10747 
10748 SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
10749   unsigned Opcode = Node->getOpcode();
10750   SDValue LHS = Node->getOperand(0);
10751   SDValue RHS = Node->getOperand(1);
10752   EVT VT = LHS.getValueType();
10753   EVT ResVT = Node->getValueType(0);
10754   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10755   SDLoc dl(Node);
10756 
10757   auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10758   auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10759   SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10760   SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10761 
10762   // We can't perform arithmetic on i1 values. Extending them would
10763   // probably result in worse codegen, so let's just use two selects instead.
10764   // Some targets are also just better off using selects rather than subtraction
10765   // because one of the conditions can be merged with one of the selects.
10766   // And finally, if we don't know the contents of high bits of a boolean value
10767   // we can't perform any arithmetic either.
10768   if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10769       getBooleanContents(BoolVT) == UndefinedBooleanContent) {
10770     SDValue SelectZeroOrOne =
10771         DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10772                       DAG.getConstant(0, dl, ResVT));
10773     return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10774                          SelectZeroOrOne);
10775   }
10776 
10777   if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent)
10778     std::swap(IsGT, IsLT);
10779   return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10780                             ResVT);
10781 }
10782 
10783 SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10784   unsigned Opcode = Node->getOpcode();
10785   bool IsSigned = Opcode == ISD::SSHLSAT;
10786   SDValue LHS = Node->getOperand(0);
10787   SDValue RHS = Node->getOperand(1);
10788   EVT VT = LHS.getValueType();
10789   SDLoc dl(Node);
10790 
10791   assert((Node->getOpcode() == ISD::SSHLSAT ||
10792           Node->getOpcode() == ISD::USHLSAT) &&
10793           "Expected a SHLSAT opcode");
10794   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10795   assert(VT.isInteger() && "Expected operands to be integers");
10796 
10797   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10798     return DAG.UnrollVectorOp(Node);
10799 
10800   // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10801 
10802   unsigned BW = VT.getScalarSizeInBits();
10803   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10804   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10805   SDValue Orig =
10806       DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10807 
10808   SDValue SatVal;
10809   if (IsSigned) {
10810     SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10811     SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10812     SDValue Cond =
10813         DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10814     SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10815   } else {
10816     SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10817   }
10818   SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10819   return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10820 }
10821 
10822 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10823                                         bool Signed, EVT WideVT,
10824                                         const SDValue LL, const SDValue LH,
10825                                         const SDValue RL, const SDValue RH,
10826                                         SDValue &Lo, SDValue &Hi) const {
10827   // We can fall back to a libcall with an illegal type for the MUL if we
10828   // have a libcall big enough.
10829   // Also, we can fall back to a division in some cases, but that's a big
10830   // performance hit in the general case.
10831   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10832   if (WideVT == MVT::i16)
10833     LC = RTLIB::MUL_I16;
10834   else if (WideVT == MVT::i32)
10835     LC = RTLIB::MUL_I32;
10836   else if (WideVT == MVT::i64)
10837     LC = RTLIB::MUL_I64;
10838   else if (WideVT == MVT::i128)
10839     LC = RTLIB::MUL_I128;
10840 
10841   if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10842     // We'll expand the multiplication by brute force because we have no other
10843     // options. This is a trivially-generalized version of the code from
10844     // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10845     // 4.3.1).
10846     EVT VT = LL.getValueType();
10847     unsigned Bits = VT.getSizeInBits();
10848     unsigned HalfBits = Bits >> 1;
10849     SDValue Mask =
10850         DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10851     SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10852     SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10853 
10854     SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10855     SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10856 
10857     SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10858     SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10859     SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10860     SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10861 
10862     SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10863                             DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10864     SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10865     SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10866 
10867     SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10868                             DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10869     SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10870 
10871     SDValue W =
10872         DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10873                     DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10874     Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10875                      DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10876 
10877     Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10878                      DAG.getNode(ISD::ADD, dl, VT,
10879                                  DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10880                                  DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10881   } else {
10882     // Attempt a libcall.
10883     SDValue Ret;
10884     TargetLowering::MakeLibCallOptions CallOptions;
10885     CallOptions.setIsSigned(Signed);
10886     CallOptions.setIsPostTypeLegalization(true);
10887     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10888       // Halves of WideVT are packed into registers in different order
10889       // depending on platform endianness. This is usually handled by
10890       // the C calling convention, but we can't defer to it in
10891       // the legalizer.
10892       SDValue Args[] = {LL, LH, RL, RH};
10893       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10894     } else {
10895       SDValue Args[] = {LH, LL, RH, RL};
10896       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10897     }
10898     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10899            "Ret value is a collection of constituent nodes holding result.");
10900     if (DAG.getDataLayout().isLittleEndian()) {
10901       // Same as above.
10902       Lo = Ret.getOperand(0);
10903       Hi = Ret.getOperand(1);
10904     } else {
10905       Lo = Ret.getOperand(1);
10906       Hi = Ret.getOperand(0);
10907     }
10908   }
10909 }
10910 
10911 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10912                                         bool Signed, const SDValue LHS,
10913                                         const SDValue RHS, SDValue &Lo,
10914                                         SDValue &Hi) const {
10915   EVT VT = LHS.getValueType();
10916   assert(RHS.getValueType() == VT && "Mismatching operand types");
10917 
10918   SDValue HiLHS;
10919   SDValue HiRHS;
10920   if (Signed) {
10921     // The high part is obtained by SRA'ing all but one of the bits of low
10922     // part.
10923     unsigned LoSize = VT.getFixedSizeInBits();
10924     HiLHS = DAG.getNode(
10925         ISD::SRA, dl, VT, LHS,
10926         DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10927     HiRHS = DAG.getNode(
10928         ISD::SRA, dl, VT, RHS,
10929         DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10930   } else {
10931     HiLHS = DAG.getConstant(0, dl, VT);
10932     HiRHS = DAG.getConstant(0, dl, VT);
10933   }
10934   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10935   forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10936 }
10937 
10938 SDValue
10939 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
10940   assert((Node->getOpcode() == ISD::SMULFIX ||
10941           Node->getOpcode() == ISD::UMULFIX ||
10942           Node->getOpcode() == ISD::SMULFIXSAT ||
10943           Node->getOpcode() == ISD::UMULFIXSAT) &&
10944          "Expected a fixed point multiplication opcode");
10945 
10946   SDLoc dl(Node);
10947   SDValue LHS = Node->getOperand(0);
10948   SDValue RHS = Node->getOperand(1);
10949   EVT VT = LHS.getValueType();
10950   unsigned Scale = Node->getConstantOperandVal(2);
10951   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10952                      Node->getOpcode() == ISD::UMULFIXSAT);
10953   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10954                  Node->getOpcode() == ISD::SMULFIXSAT);
10955   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10956   unsigned VTSize = VT.getScalarSizeInBits();
10957 
10958   if (!Scale) {
10959     // [us]mul.fix(a, b, 0) -> mul(a, b)
10960     if (!Saturating) {
10961       if (isOperationLegalOrCustom(ISD::MUL, VT))
10962         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10963     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10964       SDValue Result =
10965           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10966       SDValue Product = Result.getValue(0);
10967       SDValue Overflow = Result.getValue(1);
10968       SDValue Zero = DAG.getConstant(0, dl, VT);
10969 
10970       APInt MinVal = APInt::getSignedMinValue(VTSize);
10971       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10972       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10973       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10974       // Xor the inputs, if resulting sign bit is 0 the product will be
10975       // positive, else negative.
10976       SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10977       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10978       Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10979       return DAG.getSelect(dl, VT, Overflow, Result, Product);
10980     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10981       SDValue Result =
10982           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10983       SDValue Product = Result.getValue(0);
10984       SDValue Overflow = Result.getValue(1);
10985 
10986       APInt MaxVal = APInt::getMaxValue(VTSize);
10987       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10988       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10989     }
10990   }
10991 
10992   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10993          "Expected scale to be less than the number of bits if signed or at "
10994          "most the number of bits if unsigned.");
10995   assert(LHS.getValueType() == RHS.getValueType() &&
10996          "Expected both operands to be the same type");
10997 
10998   // Get the upper and lower bits of the result.
10999   SDValue Lo, Hi;
11000   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11001   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11002   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11003   if (VT.isVector())
11004     WideVT =
11005         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
11006   if (isOperationLegalOrCustom(LoHiOp, VT)) {
11007     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11008     Lo = Result.getValue(0);
11009     Hi = Result.getValue(1);
11010   } else if (isOperationLegalOrCustom(HiOp, VT)) {
11011     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11012     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11013   } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11014     // Try for a multiplication using a wider type.
11015     unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11016     SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11017     SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11018     SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11019     Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11020     SDValue Shifted =
11021         DAG.getNode(ISD::SRA, dl, WideVT, Res,
11022                     DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11023     Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11024   } else if (VT.isVector()) {
11025     return SDValue();
11026   } else {
11027     forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11028   }
11029 
11030   if (Scale == VTSize)
11031     // Result is just the top half since we'd be shifting by the width of the
11032     // operand. Overflow impossible so this works for both UMULFIX and
11033     // UMULFIXSAT.
11034     return Hi;
11035 
11036   // The result will need to be shifted right by the scale since both operands
11037   // are scaled. The result is given to us in 2 halves, so we only want part of
11038   // both in the result.
11039   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11040                                DAG.getShiftAmountConstant(Scale, VT, dl));
11041   if (!Saturating)
11042     return Result;
11043 
11044   if (!Signed) {
11045     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11046     // widened multiplication) aren't all zeroes.
11047 
11048     // Saturate to max if ((Hi >> Scale) != 0),
11049     // which is the same as if (Hi > ((1 << Scale) - 1))
11050     APInt MaxVal = APInt::getMaxValue(VTSize);
11051     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11052                                       dl, VT);
11053     Result = DAG.getSelectCC(dl, Hi, LowMask,
11054                              DAG.getConstant(MaxVal, dl, VT), Result,
11055                              ISD::SETUGT);
11056 
11057     return Result;
11058   }
11059 
11060   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11061   // widened multiplication) aren't all ones or all zeroes.
11062 
11063   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11064   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11065 
11066   if (Scale == 0) {
11067     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11068                                DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11069     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11070     // Saturated to SatMin if wide product is negative, and SatMax if wide
11071     // product is positive ...
11072     SDValue Zero = DAG.getConstant(0, dl, VT);
11073     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11074                                                ISD::SETLT);
11075     // ... but only if we overflowed.
11076     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11077   }
11078 
11079   //  We handled Scale==0 above so all the bits to examine is in Hi.
11080 
11081   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11082   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11083   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11084                                     dl, VT);
11085   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11086   // Saturate to min if (Hi >> (Scale - 1)) < -1),
11087   // which is the same as if (HI < (-1 << (Scale - 1))
11088   SDValue HighMask =
11089       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11090                       dl, VT);
11091   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11092   return Result;
11093 }
11094 
11095 SDValue
11096 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11097                                     SDValue LHS, SDValue RHS,
11098                                     unsigned Scale, SelectionDAG &DAG) const {
11099   assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11100           Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11101          "Expected a fixed point division opcode");
11102 
11103   EVT VT = LHS.getValueType();
11104   bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11105   bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11106   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11107 
11108   // If there is enough room in the type to upscale the LHS or downscale the
11109   // RHS before the division, we can perform it in this type without having to
11110   // resize. For signed operations, the LHS headroom is the number of
11111   // redundant sign bits, and for unsigned ones it is the number of zeroes.
11112   // The headroom for the RHS is the number of trailing zeroes.
11113   unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11114                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
11115   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11116 
11117   // For signed saturating operations, we need to be able to detect true integer
11118   // division overflow; that is, when you have MIN / -EPS. However, this
11119   // is undefined behavior and if we emit divisions that could take such
11120   // values it may cause undesired behavior (arithmetic exceptions on x86, for
11121   // example).
11122   // Avoid this by requiring an extra bit so that we never get this case.
11123   // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11124   // signed saturating division, we need to emit a whopping 32-bit division.
11125   if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11126     return SDValue();
11127 
11128   unsigned LHSShift = std::min(LHSLead, Scale);
11129   unsigned RHSShift = Scale - LHSShift;
11130 
11131   // At this point, we know that if we shift the LHS up by LHSShift and the
11132   // RHS down by RHSShift, we can emit a regular division with a final scaling
11133   // factor of Scale.
11134 
11135   if (LHSShift)
11136     LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11137                       DAG.getShiftAmountConstant(LHSShift, VT, dl));
11138   if (RHSShift)
11139     RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11140                       DAG.getShiftAmountConstant(RHSShift, VT, dl));
11141 
11142   SDValue Quot;
11143   if (Signed) {
11144     // For signed operations, if the resulting quotient is negative and the
11145     // remainder is nonzero, subtract 1 from the quotient to round towards
11146     // negative infinity.
11147     SDValue Rem;
11148     // FIXME: Ideally we would always produce an SDIVREM here, but if the
11149     // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11150     // we couldn't just form a libcall, but the type legalizer doesn't do it.
11151     if (isTypeLegal(VT) &&
11152         isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
11153       Quot = DAG.getNode(ISD::SDIVREM, dl,
11154                          DAG.getVTList(VT, VT),
11155                          LHS, RHS);
11156       Rem = Quot.getValue(1);
11157       Quot = Quot.getValue(0);
11158     } else {
11159       Quot = DAG.getNode(ISD::SDIV, dl, VT,
11160                          LHS, RHS);
11161       Rem = DAG.getNode(ISD::SREM, dl, VT,
11162                         LHS, RHS);
11163     }
11164     SDValue Zero = DAG.getConstant(0, dl, VT);
11165     SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11166     SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11167     SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11168     SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11169     SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11170                                DAG.getConstant(1, dl, VT));
11171     Quot = DAG.getSelect(dl, VT,
11172                          DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11173                          Sub1, Quot);
11174   } else
11175     Quot = DAG.getNode(ISD::UDIV, dl, VT,
11176                        LHS, RHS);
11177 
11178   return Quot;
11179 }
11180 
11181 void TargetLowering::expandUADDSUBO(
11182     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11183   SDLoc dl(Node);
11184   SDValue LHS = Node->getOperand(0);
11185   SDValue RHS = Node->getOperand(1);
11186   bool IsAdd = Node->getOpcode() == ISD::UADDO;
11187 
11188   // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11189   unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11190   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11191     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11192     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11193                                     { LHS, RHS, CarryIn });
11194     Result = SDValue(NodeCarry.getNode(), 0);
11195     Overflow = SDValue(NodeCarry.getNode(), 1);
11196     return;
11197   }
11198 
11199   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11200                             LHS.getValueType(), LHS, RHS);
11201 
11202   EVT ResultType = Node->getValueType(1);
11203   EVT SetCCType = getSetCCResultType(
11204       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11205   SDValue SetCC;
11206   if (IsAdd && isOneConstant(RHS)) {
11207     // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11208     // the live range of X. We assume comparing with 0 is cheap.
11209     // The general case (X + C) < C is not necessarily beneficial. Although we
11210     // reduce the live range of X, we may introduce the materialization of
11211     // constant C.
11212     SetCC =
11213         DAG.getSetCC(dl, SetCCType, Result,
11214                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11215   } else if (IsAdd && isAllOnesConstant(RHS)) {
11216     // Special case: uaddo X, -1 overflows if X != 0.
11217     SetCC =
11218         DAG.getSetCC(dl, SetCCType, LHS,
11219                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11220   } else {
11221     ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11222     SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11223   }
11224   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11225 }
11226 
11227 void TargetLowering::expandSADDSUBO(
11228     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11229   SDLoc dl(Node);
11230   SDValue LHS = Node->getOperand(0);
11231   SDValue RHS = Node->getOperand(1);
11232   bool IsAdd = Node->getOpcode() == ISD::SADDO;
11233 
11234   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11235                             LHS.getValueType(), LHS, RHS);
11236 
11237   EVT ResultType = Node->getValueType(1);
11238   EVT OType = getSetCCResultType(
11239       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11240 
11241   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11242   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11243   if (isOperationLegal(OpcSat, LHS.getValueType())) {
11244     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11245     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11246     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11247     return;
11248   }
11249 
11250   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11251 
11252   // For an addition, the result should be less than one of the operands (LHS)
11253   // if and only if the other operand (RHS) is negative, otherwise there will
11254   // be overflow.
11255   // For a subtraction, the result should be less than one of the operands
11256   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11257   // otherwise there will be overflow.
11258   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11259   SDValue ConditionRHS =
11260       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11261 
11262   Overflow = DAG.getBoolExtOrTrunc(
11263       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11264       ResultType, ResultType);
11265 }
11266 
11267 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11268                                 SDValue &Overflow, SelectionDAG &DAG) const {
11269   SDLoc dl(Node);
11270   EVT VT = Node->getValueType(0);
11271   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11272   SDValue LHS = Node->getOperand(0);
11273   SDValue RHS = Node->getOperand(1);
11274   bool isSigned = Node->getOpcode() == ISD::SMULO;
11275 
11276   // For power-of-two multiplications we can use a simpler shift expansion.
11277   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11278     const APInt &C = RHSC->getAPIntValue();
11279     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11280     if (C.isPowerOf2()) {
11281       // smulo(x, signed_min) is same as umulo(x, signed_min).
11282       bool UseArithShift = isSigned && !C.isMinSignedValue();
11283       SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11284       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11285       Overflow = DAG.getSetCC(dl, SetCCVT,
11286           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11287                       dl, VT, Result, ShiftAmt),
11288           LHS, ISD::SETNE);
11289       return true;
11290     }
11291   }
11292 
11293   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11294   if (VT.isVector())
11295     WideVT =
11296         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
11297 
11298   SDValue BottomHalf;
11299   SDValue TopHalf;
11300   static const unsigned Ops[2][3] =
11301       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
11302         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
11303   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11304     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11305     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11306   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11307     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11308                              RHS);
11309     TopHalf = BottomHalf.getValue(1);
11310   } else if (isTypeLegal(WideVT)) {
11311     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11312     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11313     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11314     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11315     SDValue ShiftAmt =
11316         DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11317     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11318                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11319   } else {
11320     if (VT.isVector())
11321       return false;
11322 
11323     forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11324   }
11325 
11326   Result = BottomHalf;
11327   if (isSigned) {
11328     SDValue ShiftAmt = DAG.getShiftAmountConstant(
11329         VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11330     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11331     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11332   } else {
11333     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11334                             DAG.getConstant(0, dl, VT), ISD::SETNE);
11335   }
11336 
11337   // Truncate the result if SetCC returns a larger type than needed.
11338   EVT RType = Node->getValueType(1);
11339   if (RType.bitsLT(Overflow.getValueType()))
11340     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11341 
11342   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11343          "Unexpected result type for S/UMULO legalization");
11344   return true;
11345 }
11346 
11347 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
11348   SDLoc dl(Node);
11349   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11350   SDValue Op = Node->getOperand(0);
11351   EVT VT = Op.getValueType();
11352 
11353   if (VT.isScalableVector())
11354     report_fatal_error(
11355         "Expanding reductions for scalable vectors is undefined.");
11356 
11357   // Try to use a shuffle reduction for power of two vectors.
11358   if (VT.isPow2VectorType()) {
11359     while (VT.getVectorNumElements() > 1) {
11360       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11361       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11362         break;
11363 
11364       SDValue Lo, Hi;
11365       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11366       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11367       VT = HalfVT;
11368     }
11369   }
11370 
11371   EVT EltVT = VT.getVectorElementType();
11372   unsigned NumElts = VT.getVectorNumElements();
11373 
11374   SmallVector<SDValue, 8> Ops;
11375   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11376 
11377   SDValue Res = Ops[0];
11378   for (unsigned i = 1; i < NumElts; i++)
11379     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11380 
11381   // Result type may be wider than element type.
11382   if (EltVT != Node->getValueType(0))
11383     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11384   return Res;
11385 }
11386 
11387 SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
11388   SDLoc dl(Node);
11389   SDValue AccOp = Node->getOperand(0);
11390   SDValue VecOp = Node->getOperand(1);
11391   SDNodeFlags Flags = Node->getFlags();
11392 
11393   EVT VT = VecOp.getValueType();
11394   EVT EltVT = VT.getVectorElementType();
11395 
11396   if (VT.isScalableVector())
11397     report_fatal_error(
11398         "Expanding reductions for scalable vectors is undefined.");
11399 
11400   unsigned NumElts = VT.getVectorNumElements();
11401 
11402   SmallVector<SDValue, 8> Ops;
11403   DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11404 
11405   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11406 
11407   SDValue Res = AccOp;
11408   for (unsigned i = 0; i < NumElts; i++)
11409     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11410 
11411   return Res;
11412 }
11413 
11414 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11415                                SelectionDAG &DAG) const {
11416   EVT VT = Node->getValueType(0);
11417   SDLoc dl(Node);
11418   bool isSigned = Node->getOpcode() == ISD::SREM;
11419   unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11420   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11421   SDValue Dividend = Node->getOperand(0);
11422   SDValue Divisor = Node->getOperand(1);
11423   if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11424     SDVTList VTs = DAG.getVTList(VT, VT);
11425     Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11426     return true;
11427   }
11428   if (isOperationLegalOrCustom(DivOpc, VT)) {
11429     // X % Y -> X-X/Y*Y
11430     SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11431     SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11432     Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11433     return true;
11434   }
11435   return false;
11436 }
11437 
11438 SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11439                                             SelectionDAG &DAG) const {
11440   bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11441   SDLoc dl(SDValue(Node, 0));
11442   SDValue Src = Node->getOperand(0);
11443 
11444   // DstVT is the result type, while SatVT is the size to which we saturate
11445   EVT SrcVT = Src.getValueType();
11446   EVT DstVT = Node->getValueType(0);
11447 
11448   EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11449   unsigned SatWidth = SatVT.getScalarSizeInBits();
11450   unsigned DstWidth = DstVT.getScalarSizeInBits();
11451   assert(SatWidth <= DstWidth &&
11452          "Expected saturation width smaller than result width");
11453 
11454   // Determine minimum and maximum integer values and their corresponding
11455   // floating-point values.
11456   APInt MinInt, MaxInt;
11457   if (IsSigned) {
11458     MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11459     MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11460   } else {
11461     MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11462     MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11463   }
11464 
11465   // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11466   // libcall emission cannot handle this. Large result types will fail.
11467   if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11468     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11469     SrcVT = Src.getValueType();
11470   }
11471 
11472   const fltSemantics &Sem = SrcVT.getFltSemantics();
11473   APFloat MinFloat(Sem);
11474   APFloat MaxFloat(Sem);
11475 
11476   APFloat::opStatus MinStatus =
11477       MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11478   APFloat::opStatus MaxStatus =
11479       MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11480   bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11481                              !(MaxStatus & APFloat::opStatus::opInexact);
11482 
11483   SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11484   SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11485 
11486   // If the integer bounds are exactly representable as floats and min/max are
11487   // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11488   // of comparisons and selects.
11489   bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11490                      isOperationLegal(ISD::FMAXNUM, SrcVT);
11491   if (AreExactFloatBounds && MinMaxLegal) {
11492     SDValue Clamped = Src;
11493 
11494     // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11495     Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11496     // Clamp by MaxFloat from above. NaN cannot occur.
11497     Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11498     // Convert clamped value to integer.
11499     SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11500                                   dl, DstVT, Clamped);
11501 
11502     // In the unsigned case we're done, because we mapped NaN to MinFloat,
11503     // which will cast to zero.
11504     if (!IsSigned)
11505       return FpToInt;
11506 
11507     // Otherwise, select 0 if Src is NaN.
11508     SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11509     EVT SetCCVT =
11510         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11511     SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11512     return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11513   }
11514 
11515   SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11516   SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11517 
11518   // Result of direct conversion. The assumption here is that the operation is
11519   // non-trapping and it's fine to apply it to an out-of-range value if we
11520   // select it away later.
11521   SDValue FpToInt =
11522       DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11523 
11524   SDValue Select = FpToInt;
11525 
11526   EVT SetCCVT =
11527       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11528 
11529   // If Src ULT MinFloat, select MinInt. In particular, this also selects
11530   // MinInt if Src is NaN.
11531   SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11532   Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11533   // If Src OGT MaxFloat, select MaxInt.
11534   SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11535   Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11536 
11537   // In the unsigned case we are done, because we mapped NaN to MinInt, which
11538   // is already zero.
11539   if (!IsSigned)
11540     return Select;
11541 
11542   // Otherwise, select 0 if Src is NaN.
11543   SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11544   SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11545   return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11546 }
11547 
11548 SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
11549                                                 const SDLoc &dl,
11550                                                 SelectionDAG &DAG) const {
11551   EVT OperandVT = Op.getValueType();
11552   if (OperandVT.getScalarType() == ResultVT.getScalarType())
11553     return Op;
11554   EVT ResultIntVT = ResultVT.changeTypeToInteger();
11555   // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11556   // can induce double-rounding which may alter the results. We can
11557   // correct for this using a trick explained in: Boldo, Sylvie, and
11558   // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11559   // World Congress. 2005.
11560   unsigned BitSize = OperandVT.getScalarSizeInBits();
11561   EVT WideIntVT = OperandVT.changeTypeToInteger();
11562   SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11563   SDValue SignBit =
11564       DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11565                   DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11566   SDValue AbsWide;
11567   if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11568     AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11569   } else {
11570     SDValue ClearedSign = DAG.getNode(
11571         ISD::AND, dl, WideIntVT, OpAsInt,
11572         DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11573     AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11574   }
11575   SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11576   SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11577 
11578   // We can keep the narrow value as-is if narrowing was exact (no
11579   // rounding error), the wide value was NaN (the narrow value is also
11580   // NaN and should be preserved) or if we rounded to the odd value.
11581   SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11582   SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11583   SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11584   SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11585   EVT ResultIntVTCCVT = getSetCCResultType(
11586       DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11587   SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11588   // The result is already odd so we don't need to do anything.
11589   SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11590 
11591   EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11592                                        AbsWide.getValueType());
11593   // We keep results which are exact, odd or NaN.
11594   SDValue KeepNarrow =
11595       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11596   KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11597   // We morally performed a round-down if AbsNarrow is smaller than
11598   // AbsWide.
11599   SDValue NarrowIsRd =
11600       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11601   // If the narrow value is odd or exact, pick it.
11602   // Otherwise, narrow is even and corresponds to either the rounded-up
11603   // or rounded-down value. If narrow is the rounded-down value, we want
11604   // the rounded-up value as it will be odd.
11605   SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11606   SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11607   Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11608   int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11609   SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11610   SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11611   SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11612   Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11613   return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11614 }
11615 
11616 SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
11617   assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11618   SDValue Op = Node->getOperand(0);
11619   EVT VT = Node->getValueType(0);
11620   SDLoc dl(Node);
11621   if (VT.getScalarType() == MVT::bf16) {
11622     if (Node->getConstantOperandVal(1) == 1) {
11623       return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11624     }
11625     EVT OperandVT = Op.getValueType();
11626     SDValue IsNaN = DAG.getSetCC(
11627         dl,
11628         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11629         Op, Op, ISD::SETUO);
11630 
11631     // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11632     // can induce double-rounding which may alter the results. We can
11633     // correct for this using a trick explained in: Boldo, Sylvie, and
11634     // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11635     // World Congress. 2005.
11636     EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11637     EVT I32 = F32.changeTypeToInteger();
11638     Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11639     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11640 
11641     // Conversions should set NaN's quiet bit. This also prevents NaNs from
11642     // turning into infinities.
11643     SDValue NaN =
11644         DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11645 
11646     // Factor in the contribution of the low 16 bits.
11647     SDValue One = DAG.getConstant(1, dl, I32);
11648     SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11649                               DAG.getShiftAmountConstant(16, I32, dl));
11650     Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11651     SDValue RoundingBias =
11652         DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11653     SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11654 
11655     // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11656     // 0x80000000.
11657     Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11658 
11659     // Now that we have rounded, shift the bits into position.
11660     Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11661                      DAG.getShiftAmountConstant(16, I32, dl));
11662     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11663     EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11664     Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11665     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11666   }
11667   return SDValue();
11668 }
11669 
11670 SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11671                                            SelectionDAG &DAG) const {
11672   assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11673   assert(Node->getValueType(0).isScalableVector() &&
11674          "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11675 
11676   EVT VT = Node->getValueType(0);
11677   SDValue V1 = Node->getOperand(0);
11678   SDValue V2 = Node->getOperand(1);
11679   int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11680   SDLoc DL(Node);
11681 
11682   // Expand through memory thusly:
11683   //  Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11684   //  Store V1, Ptr
11685   //  Store V2, Ptr + sizeof(V1)
11686   //  If (Imm < 0)
11687   //    TrailingElts = -Imm
11688   //    Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11689   //  else
11690   //    Ptr = Ptr + (Imm * sizeof(VT.Elt))
11691   //  Res = Load Ptr
11692 
11693   Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11694 
11695   EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
11696                                VT.getVectorElementCount() * 2);
11697   SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11698   EVT PtrVT = StackPtr.getValueType();
11699   auto &MF = DAG.getMachineFunction();
11700   auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11701   auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11702 
11703   // Store the lo part of CONCAT_VECTORS(V1, V2)
11704   SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11705   // Store the hi part of CONCAT_VECTORS(V1, V2)
11706   SDValue OffsetToV2 = DAG.getVScale(
11707       DL, PtrVT,
11708       APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11709   SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11710   SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11711 
11712   if (Imm >= 0) {
11713     // Load back the required element. getVectorElementPointer takes care of
11714     // clamping the index if it's out-of-bounds.
11715     StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11716     // Load the spliced result
11717     return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11718                        MachinePointerInfo::getUnknownStack(MF));
11719   }
11720 
11721   uint64_t TrailingElts = -Imm;
11722 
11723   // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11724   TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11725   SDValue TrailingBytes =
11726       DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11727 
11728   if (TrailingElts > VT.getVectorMinNumElements()) {
11729     SDValue VLBytes =
11730         DAG.getVScale(DL, PtrVT,
11731                       APInt(PtrVT.getFixedSizeInBits(),
11732                             VT.getStoreSize().getKnownMinValue()));
11733     TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11734   }
11735 
11736   // Calculate the start address of the spliced result.
11737   StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11738 
11739   // Load the spliced result
11740   return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11741                      MachinePointerInfo::getUnknownStack(MF));
11742 }
11743 
11744 SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
11745                                               SelectionDAG &DAG) const {
11746   SDLoc DL(Node);
11747   SDValue Vec = Node->getOperand(0);
11748   SDValue Mask = Node->getOperand(1);
11749   SDValue Passthru = Node->getOperand(2);
11750 
11751   EVT VecVT = Vec.getValueType();
11752   EVT ScalarVT = VecVT.getScalarType();
11753   EVT MaskVT = Mask.getValueType();
11754   EVT MaskScalarVT = MaskVT.getScalarType();
11755 
11756   // Needs to be handled by targets that have scalable vector types.
11757   if (VecVT.isScalableVector())
11758     report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11759 
11760   SDValue StackPtr = DAG.CreateStackTemporary(
11761       VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11762   int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11763   MachinePointerInfo PtrInfo =
11764       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
11765 
11766   MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11767   SDValue Chain = DAG.getEntryNode();
11768   SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11769 
11770   bool HasPassthru = !Passthru.isUndef();
11771 
11772   // If we have a passthru vector, store it on the stack, overwrite the matching
11773   // positions and then re-write the last element that was potentially
11774   // overwritten even though mask[i] = false.
11775   if (HasPassthru)
11776     Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11777 
11778   SDValue LastWriteVal;
11779   APInt PassthruSplatVal;
11780   bool IsSplatPassthru =
11781       ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11782 
11783   if (IsSplatPassthru) {
11784     // As we do not know which position we wrote to last, we cannot simply
11785     // access that index from the passthru vector. So we first check if passthru
11786     // is a splat vector, to use any element ...
11787     LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11788   } else if (HasPassthru) {
11789     // ... if it is not a splat vector, we need to get the passthru value at
11790     // position = popcount(mask) and re-load it from the stack before it is
11791     // overwritten in the loop below.
11792     EVT PopcountVT = ScalarVT.changeTypeToInteger();
11793     SDValue Popcount = DAG.getNode(
11794         ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11795     Popcount =
11796         DAG.getNode(ISD::ZERO_EXTEND, DL,
11797                     MaskVT.changeVectorElementType(PopcountVT), Popcount);
11798     Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
11799     SDValue LastElmtPtr =
11800         getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11801     LastWriteVal = DAG.getLoad(
11802         ScalarVT, DL, Chain, LastElmtPtr,
11803         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11804     Chain = LastWriteVal.getValue(1);
11805   }
11806 
11807   unsigned NumElms = VecVT.getVectorNumElements();
11808   for (unsigned I = 0; I < NumElms; I++) {
11809     SDValue Idx = DAG.getVectorIdxConstant(I, DL);
11810 
11811     SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11812     SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11813     Chain = DAG.getStore(
11814         Chain, DL, ValI, OutPtr,
11815         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11816 
11817     // Get the mask value and add it to the current output position. This
11818     // either increments by 1 if MaskI is true or adds 0 otherwise.
11819     // Freeze in case we have poison/undef mask entries.
11820     SDValue MaskI = DAG.getFreeze(
11821         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11822     MaskI = DAG.getFreeze(MaskI);
11823     MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11824     MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11825     OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11826 
11827     if (HasPassthru && I == NumElms - 1) {
11828       SDValue EndOfVector =
11829           DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11830       SDValue AllLanesSelected =
11831           DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11832       OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11833       OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11834 
11835       // Re-write the last ValI if all lanes were selected. Otherwise,
11836       // overwrite the last write it with the passthru value.
11837       LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
11838                                    LastWriteVal, SDNodeFlags::Unpredictable);
11839       Chain = DAG.getStore(
11840           Chain, DL, LastWriteVal, OutPtr,
11841           MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11842     }
11843   }
11844 
11845   return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11846 }
11847 
11848 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
11849                                            SDValue &LHS, SDValue &RHS,
11850                                            SDValue &CC, SDValue Mask,
11851                                            SDValue EVL, bool &NeedInvert,
11852                                            const SDLoc &dl, SDValue &Chain,
11853                                            bool IsSignaling) const {
11854   MVT OpVT = LHS.getSimpleValueType();
11855   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11856   NeedInvert = false;
11857   assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11858   bool IsNonVP = !EVL;
11859   switch (getCondCodeAction(CCCode, OpVT)) {
11860   default:
11861     llvm_unreachable("Unknown condition code action!");
11862   case TargetLowering::Legal:
11863     // Nothing to do.
11864     break;
11865   case TargetLowering::Expand: {
11866     ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
11867     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11868       std::swap(LHS, RHS);
11869       CC = DAG.getCondCode(InvCC);
11870       return true;
11871     }
11872     // Swapping operands didn't work. Try inverting the condition.
11873     bool NeedSwap = false;
11874     InvCC = getSetCCInverse(CCCode, OpVT);
11875     if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
11876       // If inverting the condition is not enough, try swapping operands
11877       // on top of it.
11878       InvCC = ISD::getSetCCSwappedOperands(InvCC);
11879       NeedSwap = true;
11880     }
11881     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11882       CC = DAG.getCondCode(InvCC);
11883       NeedInvert = true;
11884       if (NeedSwap)
11885         std::swap(LHS, RHS);
11886       return true;
11887     }
11888 
11889     // Special case: expand i1 comparisons using logical operations.
11890     if (OpVT == MVT::i1) {
11891       SDValue Ret;
11892       switch (CCCode) {
11893       default:
11894         llvm_unreachable("Unknown integer setcc!");
11895       case ISD::SETEQ: // X == Y  -->  ~(X ^ Y)
11896         Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
11897                          MVT::i1);
11898         break;
11899       case ISD::SETNE: // X != Y  -->  (X ^ Y)
11900         Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
11901         break;
11902       case ISD::SETGT:  // X >s Y  -->  X == 0 & Y == 1  -->  ~X & Y
11903       case ISD::SETULT: // X <u Y  -->  X == 0 & Y == 1  -->  ~X & Y
11904         Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
11905                           DAG.getNOT(dl, LHS, MVT::i1));
11906         break;
11907       case ISD::SETLT:  // X <s Y  -->  X == 1 & Y == 0  -->  ~Y & X
11908       case ISD::SETUGT: // X >u Y  -->  X == 1 & Y == 0  -->  ~Y & X
11909         Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
11910                           DAG.getNOT(dl, RHS, MVT::i1));
11911         break;
11912       case ISD::SETULE: // X <=u Y  -->  X == 0 | Y == 1  -->  ~X | Y
11913       case ISD::SETGE:  // X >=s Y  -->  X == 0 | Y == 1  -->  ~X | Y
11914         Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
11915                           DAG.getNOT(dl, LHS, MVT::i1));
11916         break;
11917       case ISD::SETUGE: // X >=u Y  -->  X == 1 | Y == 0  -->  ~Y | X
11918       case ISD::SETLE:  // X <=s Y  -->  X == 1 | Y == 0  -->  ~Y | X
11919         Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
11920                           DAG.getNOT(dl, RHS, MVT::i1));
11921         break;
11922       }
11923 
11924       LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
11925       RHS = SDValue();
11926       CC = SDValue();
11927       return true;
11928     }
11929 
11930     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
11931     unsigned Opc = 0;
11932     switch (CCCode) {
11933     default:
11934       llvm_unreachable("Don't know how to expand this condition!");
11935     case ISD::SETUO:
11936       if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
11937         CC1 = ISD::SETUNE;
11938         CC2 = ISD::SETUNE;
11939         Opc = ISD::OR;
11940         break;
11941       }
11942       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11943              "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11944       NeedInvert = true;
11945       [[fallthrough]];
11946     case ISD::SETO:
11947       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11948              "If SETO is expanded, SETOEQ must be legal!");
11949       CC1 = ISD::SETOEQ;
11950       CC2 = ISD::SETOEQ;
11951       Opc = ISD::AND;
11952       break;
11953     case ISD::SETONE:
11954     case ISD::SETUEQ:
11955       // If the SETUO or SETO CC isn't legal, we might be able to use
11956       // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11957       // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11958       // the operands.
11959       CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11960       if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
11961                                           isCondCodeLegal(ISD::SETOLT, OpVT))) {
11962         CC1 = ISD::SETOGT;
11963         CC2 = ISD::SETOLT;
11964         Opc = ISD::OR;
11965         NeedInvert = ((unsigned)CCCode & 0x8U);
11966         break;
11967       }
11968       [[fallthrough]];
11969     case ISD::SETOEQ:
11970     case ISD::SETOGT:
11971     case ISD::SETOGE:
11972     case ISD::SETOLT:
11973     case ISD::SETOLE:
11974     case ISD::SETUNE:
11975     case ISD::SETUGT:
11976     case ISD::SETUGE:
11977     case ISD::SETULT:
11978     case ISD::SETULE:
11979       // If we are floating point, assign and break, otherwise fall through.
11980       if (!OpVT.isInteger()) {
11981         // We can use the 4th bit to tell if we are the unordered
11982         // or ordered version of the opcode.
11983         CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11984         Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11985         CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11986         break;
11987       }
11988       // Fallthrough if we are unsigned integer.
11989       [[fallthrough]];
11990     case ISD::SETLE:
11991     case ISD::SETGT:
11992     case ISD::SETGE:
11993     case ISD::SETLT:
11994     case ISD::SETNE:
11995     case ISD::SETEQ:
11996       // If all combinations of inverting the condition and swapping operands
11997       // didn't work then we have no means to expand the condition.
11998       llvm_unreachable("Don't know how to expand this condition!");
11999     }
12000 
12001     SDValue SetCC1, SetCC2;
12002     if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12003       // If we aren't the ordered or unorder operation,
12004       // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12005       if (IsNonVP) {
12006         SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12007         SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12008       } else {
12009         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12010         SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12011       }
12012     } else {
12013       // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12014       if (IsNonVP) {
12015         SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12016         SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12017       } else {
12018         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12019         SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12020       }
12021     }
12022     if (Chain)
12023       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12024                           SetCC2.getValue(1));
12025     if (IsNonVP)
12026       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12027     else {
12028       // Transform the binary opcode to the VP equivalent.
12029       assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12030       Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12031       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12032     }
12033     RHS = SDValue();
12034     CC = SDValue();
12035     return true;
12036   }
12037   }
12038   return false;
12039 }
12040 
12041 SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
12042                                                       SelectionDAG &DAG) const {
12043   EVT VT = Node->getValueType(0);
12044   // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12045   // split into two equal parts.
12046   if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12047     return SDValue();
12048 
12049   // Restrict expansion to cases where both parts can be concatenated.
12050   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12051   if (LoVT != HiVT || !isTypeLegal(LoVT))
12052     return SDValue();
12053 
12054   SDLoc DL(Node);
12055   unsigned Opcode = Node->getOpcode();
12056 
12057   // Don't expand if the result is likely to be unrolled anyway.
12058   if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12059     return SDValue();
12060 
12061   SmallVector<SDValue, 4> LoOps, HiOps;
12062   for (const SDValue &V : Node->op_values()) {
12063     auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12064     LoOps.push_back(Lo);
12065     HiOps.push_back(Hi);
12066   }
12067 
12068   SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12069   SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12070   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12071 }
12072