xref: /llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (revision d839e765f03cf76e3770921d2f8e8bf510136dca)
1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Analysis/ValueTracking.h"
16 #include "llvm/Analysis/VectorUtils.h"
17 #include "llvm/CodeGen/CallingConvLower.h"
18 #include "llvm/CodeGen/CodeGenCommonISel.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineJumpTableInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/TargetRegisterInfo.h"
25 #include "llvm/IR/DataLayout.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/Support/DivisionByConstantInfo.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/KnownBits.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include <cctype>
37 using namespace llvm;
38 
39 /// NOTE: The TargetMachine owns TLOF.
40 TargetLowering::TargetLowering(const TargetMachine &tm)
41     : TargetLoweringBase(tm) {}
42 
43 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44   return nullptr;
45 }
46 
47 bool TargetLowering::isPositionIndependent() const {
48   return getTargetMachine().isPositionIndependent();
49 }
50 
51 /// Check whether a given call node is in tail position within its function. If
52 /// so, it sets Chain to the input chain of the tail call.
53 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54                                           SDValue &Chain) const {
55   const Function &F = DAG.getMachineFunction().getFunction();
56 
57   // First, check if tail calls have been disabled in this function.
58   if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59     return false;
60 
61   // Conservatively require the attributes of the call to match those of
62   // the return. Ignore following attributes because they don't affect the
63   // call sequence.
64   AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65   for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66                            Attribute::DereferenceableOrNull, Attribute::NoAlias,
67                            Attribute::NonNull, Attribute::NoUndef,
68                            Attribute::Range, Attribute::NoFPClass})
69     CallerAttrs.removeAttribute(Attr);
70 
71   if (CallerAttrs.hasAttributes())
72     return false;
73 
74   // It's not safe to eliminate the sign / zero extension of the return value.
75   if (CallerAttrs.contains(Attribute::ZExt) ||
76       CallerAttrs.contains(Attribute::SExt))
77     return false;
78 
79   // Check if the only use is a function return node.
80   return isUsedByReturnOnly(Node, Chain);
81 }
82 
83 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
84     const uint32_t *CallerPreservedMask,
85     const SmallVectorImpl<CCValAssign> &ArgLocs,
86     const SmallVectorImpl<SDValue> &OutVals) const {
87   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
88     const CCValAssign &ArgLoc = ArgLocs[I];
89     if (!ArgLoc.isRegLoc())
90       continue;
91     MCRegister Reg = ArgLoc.getLocReg();
92     // Only look at callee saved registers.
93     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94       continue;
95     // Check that we pass the value used for the caller.
96     // (We look for a CopyFromReg reading a virtual register that is used
97     //  for the function live-in value of register Reg)
98     SDValue Value = OutVals[I];
99     if (Value->getOpcode() == ISD::AssertZext)
100       Value = Value.getOperand(0);
101     if (Value->getOpcode() != ISD::CopyFromReg)
102       return false;
103     Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105       return false;
106   }
107   return true;
108 }
109 
110 /// Set CallLoweringInfo attribute flags based on a call instruction
111 /// and called function attributes.
112 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
113                                                      unsigned ArgIdx) {
114   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116   IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
117   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
118   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
119   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
120   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
121   IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
122   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
123   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
124   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
125   IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
126   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
127   Alignment = Call->getParamStackAlign(ArgIdx);
128   IndirectType = nullptr;
129   assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
130          "multiple ABI attributes?");
131   if (IsByVal) {
132     IndirectType = Call->getParamByValType(ArgIdx);
133     if (!Alignment)
134       Alignment = Call->getParamAlign(ArgIdx);
135   }
136   if (IsPreallocated)
137     IndirectType = Call->getParamPreallocatedType(ArgIdx);
138   if (IsInAlloca)
139     IndirectType = Call->getParamInAllocaType(ArgIdx);
140   if (IsSRet)
141     IndirectType = Call->getParamStructRetType(ArgIdx);
142 }
143 
144 /// Generate a libcall taking the given operands as arguments and returning a
145 /// result of type RetVT.
146 std::pair<SDValue, SDValue>
147 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
148                             ArrayRef<SDValue> Ops,
149                             MakeLibCallOptions CallOptions,
150                             const SDLoc &dl,
151                             SDValue InChain) const {
152   if (!InChain)
153     InChain = DAG.getEntryNode();
154 
155   TargetLowering::ArgListTy Args;
156   Args.reserve(Ops.size());
157 
158   TargetLowering::ArgListEntry Entry;
159   for (unsigned i = 0; i < Ops.size(); ++i) {
160     SDValue NewOp = Ops[i];
161     Entry.Node = NewOp;
162     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
163     Entry.IsSExt =
164         shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
165     Entry.IsZExt = !Entry.IsSExt;
166 
167     if (CallOptions.IsSoften &&
168         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
169       Entry.IsSExt = Entry.IsZExt = false;
170     }
171     Args.push_back(Entry);
172   }
173 
174   if (LC == RTLIB::UNKNOWN_LIBCALL)
175     report_fatal_error("Unsupported library call operation!");
176   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
177                                          getPointerTy(DAG.getDataLayout()));
178 
179   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180   TargetLowering::CallLoweringInfo CLI(DAG);
181   bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
182   bool zeroExtend = !signExtend;
183 
184   if (CallOptions.IsSoften &&
185       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
186     signExtend = zeroExtend = false;
187   }
188 
189   CLI.setDebugLoc(dl)
190       .setChain(InChain)
191       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
192       .setNoReturn(CallOptions.DoesNotReturn)
193       .setDiscardResult(!CallOptions.IsReturnValueUsed)
194       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
195       .setSExtResult(signExtend)
196       .setZExtResult(zeroExtend);
197   return LowerCallTo(CLI);
198 }
199 
200 bool TargetLowering::findOptimalMemOpLowering(
201     std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
202     unsigned SrcAS, const AttributeList &FuncAttributes) const {
203   if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
204       Op.getSrcAlign() < Op.getDstAlign())
205     return false;
206 
207   EVT VT = getOptimalMemOpType(Op, FuncAttributes);
208 
209   if (VT == MVT::Other) {
210     // Use the largest integer type whose alignment constraints are satisfied.
211     // We only need to check DstAlign here as SrcAlign is always greater or
212     // equal to DstAlign (or zero).
213     VT = MVT::LAST_INTEGER_VALUETYPE;
214     if (Op.isFixedDstAlign())
215       while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
216              !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
217         VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
218     assert(VT.isInteger());
219 
220     // Find the largest legal integer type.
221     MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
222     while (!isTypeLegal(LVT))
223       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
224     assert(LVT.isInteger());
225 
226     // If the type we've chosen is larger than the largest legal integer type
227     // then use that instead.
228     if (VT.bitsGT(LVT))
229       VT = LVT;
230   }
231 
232   unsigned NumMemOps = 0;
233   uint64_t Size = Op.size();
234   while (Size) {
235     unsigned VTSize = VT.getSizeInBits() / 8;
236     while (VTSize > Size) {
237       // For now, only use non-vector load / store's for the left-over pieces.
238       EVT NewVT = VT;
239       unsigned NewVTSize;
240 
241       bool Found = false;
242       if (VT.isVector() || VT.isFloatingPoint()) {
243         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
244         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
245             isSafeMemOpType(NewVT.getSimpleVT()))
246           Found = true;
247         else if (NewVT == MVT::i64 &&
248                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
249                  isSafeMemOpType(MVT::f64)) {
250           // i64 is usually not legal on 32-bit targets, but f64 may be.
251           NewVT = MVT::f64;
252           Found = true;
253         }
254       }
255 
256       if (!Found) {
257         do {
258           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
259           if (NewVT == MVT::i8)
260             break;
261         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
262       }
263       NewVTSize = NewVT.getSizeInBits() / 8;
264 
265       // If the new VT cannot cover all of the remaining bits, then consider
266       // issuing a (or a pair of) unaligned and overlapping load / store.
267       unsigned Fast;
268       if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
269           allowsMisalignedMemoryAccesses(
270               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
271               MachineMemOperand::MONone, &Fast) &&
272           Fast)
273         VTSize = Size;
274       else {
275         VT = NewVT;
276         VTSize = NewVTSize;
277       }
278     }
279 
280     if (++NumMemOps > Limit)
281       return false;
282 
283     MemOps.push_back(VT);
284     Size -= VTSize;
285   }
286 
287   return true;
288 }
289 
290 /// Soften the operands of a comparison. This code is shared among BR_CC,
291 /// SELECT_CC, and SETCC handlers.
292 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
293                                          SDValue &NewLHS, SDValue &NewRHS,
294                                          ISD::CondCode &CCCode,
295                                          const SDLoc &dl, const SDValue OldLHS,
296                                          const SDValue OldRHS) const {
297   SDValue Chain;
298   return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
299                              OldRHS, Chain);
300 }
301 
302 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
303                                          SDValue &NewLHS, SDValue &NewRHS,
304                                          ISD::CondCode &CCCode,
305                                          const SDLoc &dl, const SDValue OldLHS,
306                                          const SDValue OldRHS,
307                                          SDValue &Chain,
308                                          bool IsSignaling) const {
309   // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
310   // not supporting it. We can update this code when libgcc provides such
311   // functions.
312 
313   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
314          && "Unsupported setcc type!");
315 
316   // Expand into one or more soft-fp libcall(s).
317   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
318   bool ShouldInvertCC = false;
319   switch (CCCode) {
320   case ISD::SETEQ:
321   case ISD::SETOEQ:
322     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
323           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
324           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
325     break;
326   case ISD::SETNE:
327   case ISD::SETUNE:
328     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
329           (VT == MVT::f64) ? RTLIB::UNE_F64 :
330           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
331     break;
332   case ISD::SETGE:
333   case ISD::SETOGE:
334     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
335           (VT == MVT::f64) ? RTLIB::OGE_F64 :
336           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
337     break;
338   case ISD::SETLT:
339   case ISD::SETOLT:
340     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
341           (VT == MVT::f64) ? RTLIB::OLT_F64 :
342           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
343     break;
344   case ISD::SETLE:
345   case ISD::SETOLE:
346     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
347           (VT == MVT::f64) ? RTLIB::OLE_F64 :
348           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
349     break;
350   case ISD::SETGT:
351   case ISD::SETOGT:
352     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
353           (VT == MVT::f64) ? RTLIB::OGT_F64 :
354           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
355     break;
356   case ISD::SETO:
357     ShouldInvertCC = true;
358     [[fallthrough]];
359   case ISD::SETUO:
360     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
361           (VT == MVT::f64) ? RTLIB::UO_F64 :
362           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
363     break;
364   case ISD::SETONE:
365     // SETONE = O && UNE
366     ShouldInvertCC = true;
367     [[fallthrough]];
368   case ISD::SETUEQ:
369     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
370           (VT == MVT::f64) ? RTLIB::UO_F64 :
371           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
372     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
373           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
374           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
375     break;
376   default:
377     // Invert CC for unordered comparisons
378     ShouldInvertCC = true;
379     switch (CCCode) {
380     case ISD::SETULT:
381       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
382             (VT == MVT::f64) ? RTLIB::OGE_F64 :
383             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
384       break;
385     case ISD::SETULE:
386       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
387             (VT == MVT::f64) ? RTLIB::OGT_F64 :
388             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
389       break;
390     case ISD::SETUGT:
391       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
392             (VT == MVT::f64) ? RTLIB::OLE_F64 :
393             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
394       break;
395     case ISD::SETUGE:
396       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
397             (VT == MVT::f64) ? RTLIB::OLT_F64 :
398             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
399       break;
400     default: llvm_unreachable("Do not know how to soften this setcc!");
401     }
402   }
403 
404   // Use the target specific return value for comparison lib calls.
405   EVT RetVT = getCmpLibcallReturnType();
406   SDValue Ops[2] = {NewLHS, NewRHS};
407   TargetLowering::MakeLibCallOptions CallOptions;
408   EVT OpsVT[2] = { OldLHS.getValueType(),
409                    OldRHS.getValueType() };
410   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
411   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
412   NewLHS = Call.first;
413   NewRHS = DAG.getConstant(0, dl, RetVT);
414 
415   CCCode = getCmpLibcallCC(LC1);
416   if (ShouldInvertCC) {
417     assert(RetVT.isInteger());
418     CCCode = getSetCCInverse(CCCode, RetVT);
419   }
420 
421   if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
422     // Update Chain.
423     Chain = Call.second;
424   } else {
425     EVT SetCCVT =
426         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
427     SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
428     auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
429     CCCode = getCmpLibcallCC(LC2);
430     if (ShouldInvertCC)
431       CCCode = getSetCCInverse(CCCode, RetVT);
432     NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
433     if (Chain)
434       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
435                           Call2.second);
436     NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
437                          Tmp.getValueType(), Tmp, NewLHS);
438     NewRHS = SDValue();
439   }
440 }
441 
442 /// Return the entry encoding for a jump table in the current function. The
443 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444 unsigned TargetLowering::getJumpTableEncoding() const {
445   // In non-pic modes, just use the address of a block.
446   if (!isPositionIndependent())
447     return MachineJumpTableInfo::EK_BlockAddress;
448 
449   // In PIC mode, if the target supports a GPRel32 directive, use it.
450   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
451     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
452 
453   // Otherwise, use a label difference.
454   return MachineJumpTableInfo::EK_LabelDifference32;
455 }
456 
457 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
458                                                  SelectionDAG &DAG) const {
459   // If our PIC model is GP relative, use the global offset table as the base.
460   unsigned JTEncoding = getJumpTableEncoding();
461 
462   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
463       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
464     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
465 
466   return Table;
467 }
468 
469 /// This returns the relocation base for the given PIC jumptable, the same as
470 /// getPICJumpTableRelocBase, but as an MCExpr.
471 const MCExpr *
472 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
473                                              unsigned JTI,MCContext &Ctx) const{
474   // The normal PIC reloc base is the label at the start of the jump table.
475   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
476 }
477 
478 SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
479                                                SDValue Addr, int JTI,
480                                                SelectionDAG &DAG) const {
481   SDValue Chain = Value;
482   // Jump table debug info is only needed if CodeView is enabled.
483   if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
484     Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
485   }
486   return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
487 }
488 
489 bool
490 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
491   const TargetMachine &TM = getTargetMachine();
492   const GlobalValue *GV = GA->getGlobal();
493 
494   // If the address is not even local to this DSO we will have to load it from
495   // a got and then add the offset.
496   if (!TM.shouldAssumeDSOLocal(GV))
497     return false;
498 
499   // If the code is position independent we will have to add a base register.
500   if (isPositionIndependent())
501     return false;
502 
503   // Otherwise we can do it.
504   return true;
505 }
506 
507 //===----------------------------------------------------------------------===//
508 //  Optimization Methods
509 //===----------------------------------------------------------------------===//
510 
511 /// If the specified instruction has a constant integer operand and there are
512 /// bits set in that constant that are not demanded, then clear those bits and
513 /// return true.
514 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
515                                             const APInt &DemandedBits,
516                                             const APInt &DemandedElts,
517                                             TargetLoweringOpt &TLO) const {
518   SDLoc DL(Op);
519   unsigned Opcode = Op.getOpcode();
520 
521   // Early-out if we've ended up calling an undemanded node, leave this to
522   // constant folding.
523   if (DemandedBits.isZero() || DemandedElts.isZero())
524     return false;
525 
526   // Do target-specific constant optimization.
527   if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
528     return TLO.New.getNode();
529 
530   // FIXME: ISD::SELECT, ISD::SELECT_CC
531   switch (Opcode) {
532   default:
533     break;
534   case ISD::XOR:
535   case ISD::AND:
536   case ISD::OR: {
537     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
538     if (!Op1C || Op1C->isOpaque())
539       return false;
540 
541     // If this is a 'not' op, don't touch it because that's a canonical form.
542     const APInt &C = Op1C->getAPIntValue();
543     if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
544       return false;
545 
546     if (!C.isSubsetOf(DemandedBits)) {
547       EVT VT = Op.getValueType();
548       SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
549       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
550                                       Op->getFlags());
551       return TLO.CombineTo(Op, NewOp);
552     }
553 
554     break;
555   }
556   }
557 
558   return false;
559 }
560 
561 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
562                                             const APInt &DemandedBits,
563                                             TargetLoweringOpt &TLO) const {
564   EVT VT = Op.getValueType();
565   APInt DemandedElts = VT.isVector()
566                            ? APInt::getAllOnes(VT.getVectorNumElements())
567                            : APInt(1, 1);
568   return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
569 }
570 
571 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
572 /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
573 /// but it could be generalized for targets with other types of implicit
574 /// widening casts.
575 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
576                                       const APInt &DemandedBits,
577                                       TargetLoweringOpt &TLO) const {
578   assert(Op.getNumOperands() == 2 &&
579          "ShrinkDemandedOp only supports binary operators!");
580   assert(Op.getNode()->getNumValues() == 1 &&
581          "ShrinkDemandedOp only supports nodes with one result!");
582 
583   EVT VT = Op.getValueType();
584   SelectionDAG &DAG = TLO.DAG;
585   SDLoc dl(Op);
586 
587   // Early return, as this function cannot handle vector types.
588   if (VT.isVector())
589     return false;
590 
591   assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
592          Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
593          "ShrinkDemandedOp only supports operands that have the same size!");
594 
595   // Don't do this if the node has another user, which may require the
596   // full value.
597   if (!Op.getNode()->hasOneUse())
598     return false;
599 
600   // Search for the smallest integer type with free casts to and from
601   // Op's type. For expedience, just check power-of-2 integer types.
602   unsigned DemandedSize = DemandedBits.getActiveBits();
603   for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
604        SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
605     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
606     if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
607       // We found a type with free casts.
608 
609       // If the operation has the 'disjoint' flag, then the
610       // operands on the new node are also disjoint.
611       SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
612                                                      : SDNodeFlags::None);
613       SDValue X = DAG.getNode(
614           Op.getOpcode(), dl, SmallVT,
615           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
616           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
617       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
618       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
619       return TLO.CombineTo(Op, Z);
620     }
621   }
622   return false;
623 }
624 
625 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
626                                           DAGCombinerInfo &DCI) const {
627   SelectionDAG &DAG = DCI.DAG;
628   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
629                         !DCI.isBeforeLegalizeOps());
630   KnownBits Known;
631 
632   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
633   if (Simplified) {
634     DCI.AddToWorklist(Op.getNode());
635     DCI.CommitTargetLoweringOpt(TLO);
636   }
637   return Simplified;
638 }
639 
640 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
641                                           const APInt &DemandedElts,
642                                           DAGCombinerInfo &DCI) const {
643   SelectionDAG &DAG = DCI.DAG;
644   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
645                         !DCI.isBeforeLegalizeOps());
646   KnownBits Known;
647 
648   bool Simplified =
649       SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
650   if (Simplified) {
651     DCI.AddToWorklist(Op.getNode());
652     DCI.CommitTargetLoweringOpt(TLO);
653   }
654   return Simplified;
655 }
656 
657 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
658                                           KnownBits &Known,
659                                           TargetLoweringOpt &TLO,
660                                           unsigned Depth,
661                                           bool AssumeSingleUse) const {
662   EVT VT = Op.getValueType();
663 
664   // Since the number of lanes in a scalable vector is unknown at compile time,
665   // we track one bit which is implicitly broadcast to all lanes.  This means
666   // that all lanes in a scalable vector are considered demanded.
667   APInt DemandedElts = VT.isFixedLengthVector()
668                            ? APInt::getAllOnes(VT.getVectorNumElements())
669                            : APInt(1, 1);
670   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
671                               AssumeSingleUse);
672 }
673 
674 // TODO: Under what circumstances can we create nodes? Constant folding?
675 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
676     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
677     SelectionDAG &DAG, unsigned Depth) const {
678   EVT VT = Op.getValueType();
679 
680   // Limit search depth.
681   if (Depth >= SelectionDAG::MaxRecursionDepth)
682     return SDValue();
683 
684   // Ignore UNDEFs.
685   if (Op.isUndef())
686     return SDValue();
687 
688   // Not demanding any bits/elts from Op.
689   if (DemandedBits == 0 || DemandedElts == 0)
690     return DAG.getUNDEF(VT);
691 
692   bool IsLE = DAG.getDataLayout().isLittleEndian();
693   unsigned NumElts = DemandedElts.getBitWidth();
694   unsigned BitWidth = DemandedBits.getBitWidth();
695   KnownBits LHSKnown, RHSKnown;
696   switch (Op.getOpcode()) {
697   case ISD::BITCAST: {
698     if (VT.isScalableVector())
699       return SDValue();
700 
701     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
702     EVT SrcVT = Src.getValueType();
703     EVT DstVT = Op.getValueType();
704     if (SrcVT == DstVT)
705       return Src;
706 
707     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
708     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
709     if (NumSrcEltBits == NumDstEltBits)
710       if (SDValue V = SimplifyMultipleUseDemandedBits(
711               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
712         return DAG.getBitcast(DstVT, V);
713 
714     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
715       unsigned Scale = NumDstEltBits / NumSrcEltBits;
716       unsigned NumSrcElts = SrcVT.getVectorNumElements();
717       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
718       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
719       for (unsigned i = 0; i != Scale; ++i) {
720         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
721         unsigned BitOffset = EltOffset * NumSrcEltBits;
722         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
723         if (!Sub.isZero()) {
724           DemandedSrcBits |= Sub;
725           for (unsigned j = 0; j != NumElts; ++j)
726             if (DemandedElts[j])
727               DemandedSrcElts.setBit((j * Scale) + i);
728         }
729       }
730 
731       if (SDValue V = SimplifyMultipleUseDemandedBits(
732               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
733         return DAG.getBitcast(DstVT, V);
734     }
735 
736     // TODO - bigendian once we have test coverage.
737     if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
738       unsigned Scale = NumSrcEltBits / NumDstEltBits;
739       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
740       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
741       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
742       for (unsigned i = 0; i != NumElts; ++i)
743         if (DemandedElts[i]) {
744           unsigned Offset = (i % Scale) * NumDstEltBits;
745           DemandedSrcBits.insertBits(DemandedBits, Offset);
746           DemandedSrcElts.setBit(i / Scale);
747         }
748 
749       if (SDValue V = SimplifyMultipleUseDemandedBits(
750               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
751         return DAG.getBitcast(DstVT, V);
752     }
753 
754     break;
755   }
756   case ISD::FREEZE: {
757     SDValue N0 = Op.getOperand(0);
758     if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
759                                              /*PoisonOnly=*/false))
760       return N0;
761     break;
762   }
763   case ISD::AND: {
764     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
765     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
766 
767     // If all of the demanded bits are known 1 on one side, return the other.
768     // These bits cannot contribute to the result of the 'and' in this
769     // context.
770     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
771       return Op.getOperand(0);
772     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
773       return Op.getOperand(1);
774     break;
775   }
776   case ISD::OR: {
777     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
778     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
779 
780     // If all of the demanded bits are known zero on one side, return the
781     // other.  These bits cannot contribute to the result of the 'or' in this
782     // context.
783     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
784       return Op.getOperand(0);
785     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
786       return Op.getOperand(1);
787     break;
788   }
789   case ISD::XOR: {
790     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
791     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
792 
793     // If all of the demanded bits are known zero on one side, return the
794     // other.
795     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
796       return Op.getOperand(0);
797     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
798       return Op.getOperand(1);
799     break;
800   }
801   case ISD::ADD: {
802     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
803     if (RHSKnown.isZero())
804       return Op.getOperand(0);
805 
806     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
807     if (LHSKnown.isZero())
808       return Op.getOperand(1);
809     break;
810   }
811   case ISD::SHL: {
812     // If we are only demanding sign bits then we can use the shift source
813     // directly.
814     if (std::optional<uint64_t> MaxSA =
815             DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
816       SDValue Op0 = Op.getOperand(0);
817       unsigned ShAmt = *MaxSA;
818       unsigned NumSignBits =
819           DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
820       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
821       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
822         return Op0;
823     }
824     break;
825   }
826   case ISD::SRL: {
827     // If we are only demanding sign bits then we can use the shift source
828     // directly.
829     if (std::optional<uint64_t> MaxSA =
830             DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
831       SDValue Op0 = Op.getOperand(0);
832       unsigned ShAmt = *MaxSA;
833       // Must already be signbits in DemandedBits bounds, and can't demand any
834       // shifted in zeroes.
835       if (DemandedBits.countl_zero() >= ShAmt) {
836         unsigned NumSignBits =
837             DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
838         if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
839           return Op0;
840       }
841     }
842     break;
843   }
844   case ISD::SETCC: {
845     SDValue Op0 = Op.getOperand(0);
846     SDValue Op1 = Op.getOperand(1);
847     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
848     // If (1) we only need the sign-bit, (2) the setcc operands are the same
849     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
850     // -1, we may be able to bypass the setcc.
851     if (DemandedBits.isSignMask() &&
852         Op0.getScalarValueSizeInBits() == BitWidth &&
853         getBooleanContents(Op0.getValueType()) ==
854             BooleanContent::ZeroOrNegativeOneBooleanContent) {
855       // If we're testing X < 0, then this compare isn't needed - just use X!
856       // FIXME: We're limiting to integer types here, but this should also work
857       // if we don't care about FP signed-zero. The use of SETLT with FP means
858       // that we don't care about NaNs.
859       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
860           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
861         return Op0;
862     }
863     break;
864   }
865   case ISD::SIGN_EXTEND_INREG: {
866     // If none of the extended bits are demanded, eliminate the sextinreg.
867     SDValue Op0 = Op.getOperand(0);
868     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
869     unsigned ExBits = ExVT.getScalarSizeInBits();
870     if (DemandedBits.getActiveBits() <= ExBits &&
871         shouldRemoveRedundantExtend(Op))
872       return Op0;
873     // If the input is already sign extended, just drop the extension.
874     unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
875     if (NumSignBits >= (BitWidth - ExBits + 1))
876       return Op0;
877     break;
878   }
879   case ISD::ANY_EXTEND_VECTOR_INREG:
880   case ISD::SIGN_EXTEND_VECTOR_INREG:
881   case ISD::ZERO_EXTEND_VECTOR_INREG: {
882     if (VT.isScalableVector())
883       return SDValue();
884 
885     // If we only want the lowest element and none of extended bits, then we can
886     // return the bitcasted source vector.
887     SDValue Src = Op.getOperand(0);
888     EVT SrcVT = Src.getValueType();
889     EVT DstVT = Op.getValueType();
890     if (IsLE && DemandedElts == 1 &&
891         DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
892         DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
893       return DAG.getBitcast(DstVT, Src);
894     }
895     break;
896   }
897   case ISD::INSERT_VECTOR_ELT: {
898     if (VT.isScalableVector())
899       return SDValue();
900 
901     // If we don't demand the inserted element, return the base vector.
902     SDValue Vec = Op.getOperand(0);
903     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
904     EVT VecVT = Vec.getValueType();
905     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
906         !DemandedElts[CIdx->getZExtValue()])
907       return Vec;
908     break;
909   }
910   case ISD::INSERT_SUBVECTOR: {
911     if (VT.isScalableVector())
912       return SDValue();
913 
914     SDValue Vec = Op.getOperand(0);
915     SDValue Sub = Op.getOperand(1);
916     uint64_t Idx = Op.getConstantOperandVal(2);
917     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
918     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
919     // If we don't demand the inserted subvector, return the base vector.
920     if (DemandedSubElts == 0)
921       return Vec;
922     break;
923   }
924   case ISD::VECTOR_SHUFFLE: {
925     assert(!VT.isScalableVector());
926     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
927 
928     // If all the demanded elts are from one operand and are inline,
929     // then we can use the operand directly.
930     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
931     for (unsigned i = 0; i != NumElts; ++i) {
932       int M = ShuffleMask[i];
933       if (M < 0 || !DemandedElts[i])
934         continue;
935       AllUndef = false;
936       IdentityLHS &= (M == (int)i);
937       IdentityRHS &= ((M - NumElts) == i);
938     }
939 
940     if (AllUndef)
941       return DAG.getUNDEF(Op.getValueType());
942     if (IdentityLHS)
943       return Op.getOperand(0);
944     if (IdentityRHS)
945       return Op.getOperand(1);
946     break;
947   }
948   default:
949     // TODO: Probably okay to remove after audit; here to reduce change size
950     // in initial enablement patch for scalable vectors
951     if (VT.isScalableVector())
952       return SDValue();
953 
954     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
955       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
956               Op, DemandedBits, DemandedElts, DAG, Depth))
957         return V;
958     break;
959   }
960   return SDValue();
961 }
962 
963 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
964     SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
965     unsigned Depth) const {
966   EVT VT = Op.getValueType();
967   // Since the number of lanes in a scalable vector is unknown at compile time,
968   // we track one bit which is implicitly broadcast to all lanes.  This means
969   // that all lanes in a scalable vector are considered demanded.
970   APInt DemandedElts = VT.isFixedLengthVector()
971                            ? APInt::getAllOnes(VT.getVectorNumElements())
972                            : APInt(1, 1);
973   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
974                                          Depth);
975 }
976 
977 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
978     SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
979     unsigned Depth) const {
980   APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
981   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
982                                          Depth);
983 }
984 
985 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
986 //      or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
987 static SDValue combineShiftToAVG(SDValue Op,
988                                  TargetLowering::TargetLoweringOpt &TLO,
989                                  const TargetLowering &TLI,
990                                  const APInt &DemandedBits,
991                                  const APInt &DemandedElts, unsigned Depth) {
992   assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
993          "SRL or SRA node is required here!");
994   // Is the right shift using an immediate value of 1?
995   ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
996   if (!N1C || !N1C->isOne())
997     return SDValue();
998 
999   // We are looking for an avgfloor
1000   // add(ext, ext)
1001   // or one of these as a avgceil
1002   // add(add(ext, ext), 1)
1003   // add(add(ext, 1), ext)
1004   // add(ext, add(ext, 1))
1005   SDValue Add = Op.getOperand(0);
1006   if (Add.getOpcode() != ISD::ADD)
1007     return SDValue();
1008 
1009   SDValue ExtOpA = Add.getOperand(0);
1010   SDValue ExtOpB = Add.getOperand(1);
1011   SDValue Add2;
1012   auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1013     ConstantSDNode *ConstOp;
1014     if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1015         ConstOp->isOne()) {
1016       ExtOpA = Op1;
1017       ExtOpB = Op3;
1018       Add2 = A;
1019       return true;
1020     }
1021     if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1022         ConstOp->isOne()) {
1023       ExtOpA = Op1;
1024       ExtOpB = Op2;
1025       Add2 = A;
1026       return true;
1027     }
1028     return false;
1029   };
1030   bool IsCeil =
1031       (ExtOpA.getOpcode() == ISD::ADD &&
1032        MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1033       (ExtOpB.getOpcode() == ISD::ADD &&
1034        MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1035 
1036   // If the shift is signed (sra):
1037   //  - Needs >= 2 sign bit for both operands.
1038   //  - Needs >= 2 zero bits.
1039   // If the shift is unsigned (srl):
1040   //  - Needs >= 1 zero bit for both operands.
1041   //  - Needs 1 demanded bit zero and >= 2 sign bits.
1042   SelectionDAG &DAG = TLO.DAG;
1043   unsigned ShiftOpc = Op.getOpcode();
1044   bool IsSigned = false;
1045   unsigned KnownBits;
1046   unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1047   unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1048   unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1049   unsigned NumZeroA =
1050       DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1051   unsigned NumZeroB =
1052       DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1053   unsigned NumZero = std::min(NumZeroA, NumZeroB);
1054 
1055   switch (ShiftOpc) {
1056   default:
1057     llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1058   case ISD::SRA: {
1059     if (NumZero >= 2 && NumSigned < NumZero) {
1060       IsSigned = false;
1061       KnownBits = NumZero;
1062       break;
1063     }
1064     if (NumSigned >= 1) {
1065       IsSigned = true;
1066       KnownBits = NumSigned;
1067       break;
1068     }
1069     return SDValue();
1070   }
1071   case ISD::SRL: {
1072     if (NumZero >= 1 && NumSigned < NumZero) {
1073       IsSigned = false;
1074       KnownBits = NumZero;
1075       break;
1076     }
1077     if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1078       IsSigned = true;
1079       KnownBits = NumSigned;
1080       break;
1081     }
1082     return SDValue();
1083   }
1084   }
1085 
1086   unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1087                            : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1088 
1089   // Find the smallest power-2 type that is legal for this vector size and
1090   // operation, given the original type size and the number of known sign/zero
1091   // bits.
1092   EVT VT = Op.getValueType();
1093   unsigned MinWidth =
1094       std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1095   EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1096   if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1097     return SDValue();
1098   if (VT.isVector())
1099     NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1100   if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1101     // If we could not transform, and (both) adds are nuw/nsw, we can use the
1102     // larger type size to do the transform.
1103     if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1104       return SDValue();
1105     if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1106                                Add.getOperand(1)) &&
1107         (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1108                                          Add2.getOperand(1))))
1109       NVT = VT;
1110     else
1111       return SDValue();
1112   }
1113 
1114   // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1115   // this is likely to stop other folds (reassociation, value tracking etc.)
1116   if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1117       (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1118     return SDValue();
1119 
1120   SDLoc DL(Op);
1121   SDValue ResultAVG =
1122       DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1123                   DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1124   return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1125 }
1126 
1127 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1128 /// result of Op are ever used downstream. If we can use this information to
1129 /// simplify Op, create a new simplified DAG node and return true, returning the
1130 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1131 /// return a mask of Known bits for the expression (used to simplify the
1132 /// caller).  The Known bits may only be accurate for those bits in the
1133 /// OriginalDemandedBits and OriginalDemandedElts.
1134 bool TargetLowering::SimplifyDemandedBits(
1135     SDValue Op, const APInt &OriginalDemandedBits,
1136     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1137     unsigned Depth, bool AssumeSingleUse) const {
1138   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1139   assert(Op.getScalarValueSizeInBits() == BitWidth &&
1140          "Mask size mismatches value type size!");
1141 
1142   // Don't know anything.
1143   Known = KnownBits(BitWidth);
1144 
1145   EVT VT = Op.getValueType();
1146   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1147   unsigned NumElts = OriginalDemandedElts.getBitWidth();
1148   assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1149          "Unexpected vector size");
1150 
1151   APInt DemandedBits = OriginalDemandedBits;
1152   APInt DemandedElts = OriginalDemandedElts;
1153   SDLoc dl(Op);
1154 
1155   // Undef operand.
1156   if (Op.isUndef())
1157     return false;
1158 
1159   // We can't simplify target constants.
1160   if (Op.getOpcode() == ISD::TargetConstant)
1161     return false;
1162 
1163   if (Op.getOpcode() == ISD::Constant) {
1164     // We know all of the bits for a constant!
1165     Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1166     return false;
1167   }
1168 
1169   if (Op.getOpcode() == ISD::ConstantFP) {
1170     // We know all of the bits for a floating point constant!
1171     Known = KnownBits::makeConstant(
1172         cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1173     return false;
1174   }
1175 
1176   // Other users may use these bits.
1177   bool HasMultiUse = false;
1178   if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1179     if (Depth >= SelectionDAG::MaxRecursionDepth) {
1180       // Limit search depth.
1181       return false;
1182     }
1183     // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1184     DemandedBits = APInt::getAllOnes(BitWidth);
1185     DemandedElts = APInt::getAllOnes(NumElts);
1186     HasMultiUse = true;
1187   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1188     // Not demanding any bits/elts from Op.
1189     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1190   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1191     // Limit search depth.
1192     return false;
1193   }
1194 
1195   KnownBits Known2;
1196   switch (Op.getOpcode()) {
1197   case ISD::SCALAR_TO_VECTOR: {
1198     if (VT.isScalableVector())
1199       return false;
1200     if (!DemandedElts[0])
1201       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1202 
1203     KnownBits SrcKnown;
1204     SDValue Src = Op.getOperand(0);
1205     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1206     APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1207     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1208       return true;
1209 
1210     // Upper elements are undef, so only get the knownbits if we just demand
1211     // the bottom element.
1212     if (DemandedElts == 1)
1213       Known = SrcKnown.anyextOrTrunc(BitWidth);
1214     break;
1215   }
1216   case ISD::BUILD_VECTOR:
1217     // Collect the known bits that are shared by every demanded element.
1218     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1219     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1220     return false; // Don't fall through, will infinitely loop.
1221   case ISD::SPLAT_VECTOR: {
1222     SDValue Scl = Op.getOperand(0);
1223     APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1224     KnownBits KnownScl;
1225     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1226       return true;
1227 
1228     // Implicitly truncate the bits to match the official semantics of
1229     // SPLAT_VECTOR.
1230     Known = KnownScl.trunc(BitWidth);
1231     break;
1232   }
1233   case ISD::LOAD: {
1234     auto *LD = cast<LoadSDNode>(Op);
1235     if (getTargetConstantFromLoad(LD)) {
1236       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1237       return false; // Don't fall through, will infinitely loop.
1238     }
1239     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1240       // If this is a ZEXTLoad and we are looking at the loaded value.
1241       EVT MemVT = LD->getMemoryVT();
1242       unsigned MemBits = MemVT.getScalarSizeInBits();
1243       Known.Zero.setBitsFrom(MemBits);
1244       return false; // Don't fall through, will infinitely loop.
1245     }
1246     break;
1247   }
1248   case ISD::INSERT_VECTOR_ELT: {
1249     if (VT.isScalableVector())
1250       return false;
1251     SDValue Vec = Op.getOperand(0);
1252     SDValue Scl = Op.getOperand(1);
1253     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1254     EVT VecVT = Vec.getValueType();
1255 
1256     // If index isn't constant, assume we need all vector elements AND the
1257     // inserted element.
1258     APInt DemandedVecElts(DemandedElts);
1259     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1260       unsigned Idx = CIdx->getZExtValue();
1261       DemandedVecElts.clearBit(Idx);
1262 
1263       // Inserted element is not required.
1264       if (!DemandedElts[Idx])
1265         return TLO.CombineTo(Op, Vec);
1266     }
1267 
1268     KnownBits KnownScl;
1269     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1270     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1271     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1272       return true;
1273 
1274     Known = KnownScl.anyextOrTrunc(BitWidth);
1275 
1276     KnownBits KnownVec;
1277     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1278                              Depth + 1))
1279       return true;
1280 
1281     if (!!DemandedVecElts)
1282       Known = Known.intersectWith(KnownVec);
1283 
1284     return false;
1285   }
1286   case ISD::INSERT_SUBVECTOR: {
1287     if (VT.isScalableVector())
1288       return false;
1289     // Demand any elements from the subvector and the remainder from the src its
1290     // inserted into.
1291     SDValue Src = Op.getOperand(0);
1292     SDValue Sub = Op.getOperand(1);
1293     uint64_t Idx = Op.getConstantOperandVal(2);
1294     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1295     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1296     APInt DemandedSrcElts = DemandedElts;
1297     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1298 
1299     KnownBits KnownSub, KnownSrc;
1300     if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1301                              Depth + 1))
1302       return true;
1303     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1304                              Depth + 1))
1305       return true;
1306 
1307     Known.Zero.setAllBits();
1308     Known.One.setAllBits();
1309     if (!!DemandedSubElts)
1310       Known = Known.intersectWith(KnownSub);
1311     if (!!DemandedSrcElts)
1312       Known = Known.intersectWith(KnownSrc);
1313 
1314     // Attempt to avoid multi-use src if we don't need anything from it.
1315     if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1316         !DemandedSrcElts.isAllOnes()) {
1317       SDValue NewSub = SimplifyMultipleUseDemandedBits(
1318           Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1319       SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1320           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1321       if (NewSub || NewSrc) {
1322         NewSub = NewSub ? NewSub : Sub;
1323         NewSrc = NewSrc ? NewSrc : Src;
1324         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1325                                         Op.getOperand(2));
1326         return TLO.CombineTo(Op, NewOp);
1327       }
1328     }
1329     break;
1330   }
1331   case ISD::EXTRACT_SUBVECTOR: {
1332     if (VT.isScalableVector())
1333       return false;
1334     // Offset the demanded elts by the subvector index.
1335     SDValue Src = Op.getOperand(0);
1336     if (Src.getValueType().isScalableVector())
1337       break;
1338     uint64_t Idx = Op.getConstantOperandVal(1);
1339     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1340     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1341 
1342     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1343                              Depth + 1))
1344       return true;
1345 
1346     // Attempt to avoid multi-use src if we don't need anything from it.
1347     if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1348       SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1349           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1350       if (DemandedSrc) {
1351         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1352                                         Op.getOperand(1));
1353         return TLO.CombineTo(Op, NewOp);
1354       }
1355     }
1356     break;
1357   }
1358   case ISD::CONCAT_VECTORS: {
1359     if (VT.isScalableVector())
1360       return false;
1361     Known.Zero.setAllBits();
1362     Known.One.setAllBits();
1363     EVT SubVT = Op.getOperand(0).getValueType();
1364     unsigned NumSubVecs = Op.getNumOperands();
1365     unsigned NumSubElts = SubVT.getVectorNumElements();
1366     for (unsigned i = 0; i != NumSubVecs; ++i) {
1367       APInt DemandedSubElts =
1368           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1369       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1370                                Known2, TLO, Depth + 1))
1371         return true;
1372       // Known bits are shared by every demanded subvector element.
1373       if (!!DemandedSubElts)
1374         Known = Known.intersectWith(Known2);
1375     }
1376     break;
1377   }
1378   case ISD::VECTOR_SHUFFLE: {
1379     assert(!VT.isScalableVector());
1380     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1381 
1382     // Collect demanded elements from shuffle operands..
1383     APInt DemandedLHS, DemandedRHS;
1384     if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1385                                 DemandedRHS))
1386       break;
1387 
1388     if (!!DemandedLHS || !!DemandedRHS) {
1389       SDValue Op0 = Op.getOperand(0);
1390       SDValue Op1 = Op.getOperand(1);
1391 
1392       Known.Zero.setAllBits();
1393       Known.One.setAllBits();
1394       if (!!DemandedLHS) {
1395         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1396                                  Depth + 1))
1397           return true;
1398         Known = Known.intersectWith(Known2);
1399       }
1400       if (!!DemandedRHS) {
1401         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1402                                  Depth + 1))
1403           return true;
1404         Known = Known.intersectWith(Known2);
1405       }
1406 
1407       // Attempt to avoid multi-use ops if we don't need anything from them.
1408       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1409           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1410       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1411           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1412       if (DemandedOp0 || DemandedOp1) {
1413         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1414         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1415         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1416         return TLO.CombineTo(Op, NewOp);
1417       }
1418     }
1419     break;
1420   }
1421   case ISD::AND: {
1422     SDValue Op0 = Op.getOperand(0);
1423     SDValue Op1 = Op.getOperand(1);
1424 
1425     // If the RHS is a constant, check to see if the LHS would be zero without
1426     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1427     // simplify the LHS, here we're using information from the LHS to simplify
1428     // the RHS.
1429     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1430       // Do not increment Depth here; that can cause an infinite loop.
1431       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1432       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1433       if ((LHSKnown.Zero & DemandedBits) ==
1434           (~RHSC->getAPIntValue() & DemandedBits))
1435         return TLO.CombineTo(Op, Op0);
1436 
1437       // If any of the set bits in the RHS are known zero on the LHS, shrink
1438       // the constant.
1439       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1440                                  DemandedElts, TLO))
1441         return true;
1442 
1443       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1444       // constant, but if this 'and' is only clearing bits that were just set by
1445       // the xor, then this 'and' can be eliminated by shrinking the mask of
1446       // the xor. For example, for a 32-bit X:
1447       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1448       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1449           LHSKnown.One == ~RHSC->getAPIntValue()) {
1450         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1451         return TLO.CombineTo(Op, Xor);
1452       }
1453     }
1454 
1455     // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1456     // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1457     if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1458         (Op0.getOperand(0).isUndef() ||
1459          ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
1460         Op0->hasOneUse()) {
1461       unsigned NumSubElts =
1462           Op0.getOperand(1).getValueType().getVectorNumElements();
1463       unsigned SubIdx = Op0.getConstantOperandVal(2);
1464       APInt DemandedSub =
1465           APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1466       KnownBits KnownSubMask =
1467           TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1468       if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1469         SDValue NewAnd =
1470             TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1471         SDValue NewInsert =
1472             TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1473                             Op0.getOperand(1), Op0.getOperand(2));
1474         return TLO.CombineTo(Op, NewInsert);
1475       }
1476     }
1477 
1478     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1479                              Depth + 1))
1480       return true;
1481     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1482                              Known2, TLO, Depth + 1))
1483       return true;
1484 
1485     // If all of the demanded bits are known one on one side, return the other.
1486     // These bits cannot contribute to the result of the 'and'.
1487     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1488       return TLO.CombineTo(Op, Op0);
1489     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1490       return TLO.CombineTo(Op, Op1);
1491     // If all of the demanded bits in the inputs are known zeros, return zero.
1492     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1493       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1494     // If the RHS is a constant, see if we can simplify it.
1495     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1496                                TLO))
1497       return true;
1498     // If the operation can be done in a smaller type, do so.
1499     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1500       return true;
1501 
1502     // Attempt to avoid multi-use ops if we don't need anything from them.
1503     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1504       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1505           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1506       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1507           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1508       if (DemandedOp0 || DemandedOp1) {
1509         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1510         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1511         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1512         return TLO.CombineTo(Op, NewOp);
1513       }
1514     }
1515 
1516     Known &= Known2;
1517     break;
1518   }
1519   case ISD::OR: {
1520     SDValue Op0 = Op.getOperand(0);
1521     SDValue Op1 = Op.getOperand(1);
1522     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1523                              Depth + 1)) {
1524       Op->dropFlags(SDNodeFlags::Disjoint);
1525       return true;
1526     }
1527 
1528     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1529                              Known2, TLO, Depth + 1)) {
1530       Op->dropFlags(SDNodeFlags::Disjoint);
1531       return true;
1532     }
1533 
1534     // If all of the demanded bits are known zero on one side, return the other.
1535     // These bits cannot contribute to the result of the 'or'.
1536     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1537       return TLO.CombineTo(Op, Op0);
1538     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1539       return TLO.CombineTo(Op, Op1);
1540     // If the RHS is a constant, see if we can simplify it.
1541     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1542       return true;
1543     // If the operation can be done in a smaller type, do so.
1544     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1545       return true;
1546 
1547     // Attempt to avoid multi-use ops if we don't need anything from them.
1548     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1549       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1550           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1551       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1552           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1553       if (DemandedOp0 || DemandedOp1) {
1554         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1555         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1556         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1557         return TLO.CombineTo(Op, NewOp);
1558       }
1559     }
1560 
1561     // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1562     // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1563     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1564         Op0->hasOneUse() && Op1->hasOneUse()) {
1565       // Attempt to match all commutations - m_c_Or would've been useful!
1566       for (int I = 0; I != 2; ++I) {
1567         SDValue X = Op.getOperand(I).getOperand(0);
1568         SDValue C1 = Op.getOperand(I).getOperand(1);
1569         SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1570         SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1571         if (Alt.getOpcode() == ISD::OR) {
1572           for (int J = 0; J != 2; ++J) {
1573             if (X == Alt.getOperand(J)) {
1574               SDValue Y = Alt.getOperand(1 - J);
1575               if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1576                                                                {C1, C2})) {
1577                 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1578                 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1579                 return TLO.CombineTo(
1580                     Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1581               }
1582             }
1583           }
1584         }
1585       }
1586     }
1587 
1588     Known |= Known2;
1589     break;
1590   }
1591   case ISD::XOR: {
1592     SDValue Op0 = Op.getOperand(0);
1593     SDValue Op1 = Op.getOperand(1);
1594 
1595     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1596                              Depth + 1))
1597       return true;
1598     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1599                              Depth + 1))
1600       return true;
1601 
1602     // If all of the demanded bits are known zero on one side, return the other.
1603     // These bits cannot contribute to the result of the 'xor'.
1604     if (DemandedBits.isSubsetOf(Known.Zero))
1605       return TLO.CombineTo(Op, Op0);
1606     if (DemandedBits.isSubsetOf(Known2.Zero))
1607       return TLO.CombineTo(Op, Op1);
1608     // If the operation can be done in a smaller type, do so.
1609     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1610       return true;
1611 
1612     // If all of the unknown bits are known to be zero on one side or the other
1613     // turn this into an *inclusive* or.
1614     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1615     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1616       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1617 
1618     ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1619     if (C) {
1620       // If one side is a constant, and all of the set bits in the constant are
1621       // also known set on the other side, turn this into an AND, as we know
1622       // the bits will be cleared.
1623       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1624       // NB: it is okay if more bits are known than are requested
1625       if (C->getAPIntValue() == Known2.One) {
1626         SDValue ANDC =
1627             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1628         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1629       }
1630 
1631       // If the RHS is a constant, see if we can change it. Don't alter a -1
1632       // constant because that's a 'not' op, and that is better for combining
1633       // and codegen.
1634       if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1635         // We're flipping all demanded bits. Flip the undemanded bits too.
1636         SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1637         return TLO.CombineTo(Op, New);
1638       }
1639 
1640       unsigned Op0Opcode = Op0.getOpcode();
1641       if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1642         if (ConstantSDNode *ShiftC =
1643                 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1644           // Don't crash on an oversized shift. We can not guarantee that a
1645           // bogus shift has been simplified to undef.
1646           if (ShiftC->getAPIntValue().ult(BitWidth)) {
1647             uint64_t ShiftAmt = ShiftC->getZExtValue();
1648             APInt Ones = APInt::getAllOnes(BitWidth);
1649             Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1650                                          : Ones.lshr(ShiftAmt);
1651             if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1652                 isDesirableToCommuteXorWithShift(Op.getNode())) {
1653               // If the xor constant is a demanded mask, do a 'not' before the
1654               // shift:
1655               // xor (X << ShiftC), XorC --> (not X) << ShiftC
1656               // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1657               SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1658               return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1659                                                        Op0.getOperand(1)));
1660             }
1661           }
1662         }
1663       }
1664     }
1665 
1666     // If we can't turn this into a 'not', try to shrink the constant.
1667     if (!C || !C->isAllOnes())
1668       if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1669         return true;
1670 
1671     // Attempt to avoid multi-use ops if we don't need anything from them.
1672     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1673       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1674           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1675       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1676           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1677       if (DemandedOp0 || DemandedOp1) {
1678         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1679         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1680         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1681         return TLO.CombineTo(Op, NewOp);
1682       }
1683     }
1684 
1685     Known ^= Known2;
1686     break;
1687   }
1688   case ISD::SELECT:
1689     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1690                              Known, TLO, Depth + 1))
1691       return true;
1692     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1693                              Known2, TLO, Depth + 1))
1694       return true;
1695 
1696     // If the operands are constants, see if we can simplify them.
1697     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1698       return true;
1699 
1700     // Only known if known in both the LHS and RHS.
1701     Known = Known.intersectWith(Known2);
1702     break;
1703   case ISD::VSELECT:
1704     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1705                              Known, TLO, Depth + 1))
1706       return true;
1707     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1708                              Known2, TLO, Depth + 1))
1709       return true;
1710 
1711     // Only known if known in both the LHS and RHS.
1712     Known = Known.intersectWith(Known2);
1713     break;
1714   case ISD::SELECT_CC:
1715     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1716                              Known, TLO, Depth + 1))
1717       return true;
1718     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1719                              Known2, TLO, Depth + 1))
1720       return true;
1721 
1722     // If the operands are constants, see if we can simplify them.
1723     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1724       return true;
1725 
1726     // Only known if known in both the LHS and RHS.
1727     Known = Known.intersectWith(Known2);
1728     break;
1729   case ISD::SETCC: {
1730     SDValue Op0 = Op.getOperand(0);
1731     SDValue Op1 = Op.getOperand(1);
1732     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1733     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1734     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1735     // -1, we may be able to bypass the setcc.
1736     if (DemandedBits.isSignMask() &&
1737         Op0.getScalarValueSizeInBits() == BitWidth &&
1738         getBooleanContents(Op0.getValueType()) ==
1739             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1740       // If we're testing X < 0, then this compare isn't needed - just use X!
1741       // FIXME: We're limiting to integer types here, but this should also work
1742       // if we don't care about FP signed-zero. The use of SETLT with FP means
1743       // that we don't care about NaNs.
1744       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1745           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1746         return TLO.CombineTo(Op, Op0);
1747 
1748       // TODO: Should we check for other forms of sign-bit comparisons?
1749       // Examples: X <= -1, X >= 0
1750     }
1751     if (getBooleanContents(Op0.getValueType()) ==
1752             TargetLowering::ZeroOrOneBooleanContent &&
1753         BitWidth > 1)
1754       Known.Zero.setBitsFrom(1);
1755     break;
1756   }
1757   case ISD::SHL: {
1758     SDValue Op0 = Op.getOperand(0);
1759     SDValue Op1 = Op.getOperand(1);
1760     EVT ShiftVT = Op1.getValueType();
1761 
1762     if (std::optional<uint64_t> KnownSA =
1763             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1764       unsigned ShAmt = *KnownSA;
1765       if (ShAmt == 0)
1766         return TLO.CombineTo(Op, Op0);
1767 
1768       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1769       // single shift.  We can do this if the bottom bits (which are shifted
1770       // out) are never demanded.
1771       // TODO - support non-uniform vector amounts.
1772       if (Op0.getOpcode() == ISD::SRL) {
1773         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1774           if (std::optional<uint64_t> InnerSA =
1775                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1776             unsigned C1 = *InnerSA;
1777             unsigned Opc = ISD::SHL;
1778             int Diff = ShAmt - C1;
1779             if (Diff < 0) {
1780               Diff = -Diff;
1781               Opc = ISD::SRL;
1782             }
1783             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1784             return TLO.CombineTo(
1785                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1786           }
1787         }
1788       }
1789 
1790       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1791       // are not demanded. This will likely allow the anyext to be folded away.
1792       // TODO - support non-uniform vector amounts.
1793       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1794         SDValue InnerOp = Op0.getOperand(0);
1795         EVT InnerVT = InnerOp.getValueType();
1796         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1797         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1798             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1799           SDValue NarrowShl = TLO.DAG.getNode(
1800               ISD::SHL, dl, InnerVT, InnerOp,
1801               TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1802           return TLO.CombineTo(
1803               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1804         }
1805 
1806         // Repeat the SHL optimization above in cases where an extension
1807         // intervenes: (shl (anyext (shr x, c1)), c2) to
1808         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1809         // aren't demanded (as above) and that the shifted upper c1 bits of
1810         // x aren't demanded.
1811         // TODO - support non-uniform vector amounts.
1812         if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1813             InnerOp.hasOneUse()) {
1814           if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1815                   InnerOp, DemandedElts, Depth + 2)) {
1816             unsigned InnerShAmt = *SA2;
1817             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1818                 DemandedBits.getActiveBits() <=
1819                     (InnerBits - InnerShAmt + ShAmt) &&
1820                 DemandedBits.countr_zero() >= ShAmt) {
1821               SDValue NewSA =
1822                   TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1823               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1824                                                InnerOp.getOperand(0));
1825               return TLO.CombineTo(
1826                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1827             }
1828           }
1829         }
1830       }
1831 
1832       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1833       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1834                                Depth + 1)) {
1835         // Disable the nsw and nuw flags. We can no longer guarantee that we
1836         // won't wrap after simplification.
1837         Op->dropFlags(SDNodeFlags::NoWrap);
1838         return true;
1839       }
1840       Known.Zero <<= ShAmt;
1841       Known.One <<= ShAmt;
1842       // low bits known zero.
1843       Known.Zero.setLowBits(ShAmt);
1844 
1845       // Attempt to avoid multi-use ops if we don't need anything from them.
1846       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1847         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1848             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1849         if (DemandedOp0) {
1850           SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1851           return TLO.CombineTo(Op, NewOp);
1852         }
1853       }
1854 
1855       // TODO: Can we merge this fold with the one below?
1856       // Try shrinking the operation as long as the shift amount will still be
1857       // in range.
1858       if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1859           Op.getNode()->hasOneUse()) {
1860         // Search for the smallest integer type with free casts to and from
1861         // Op's type. For expedience, just check power-of-2 integer types.
1862         unsigned DemandedSize = DemandedBits.getActiveBits();
1863         for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1864              SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1865           EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1866           if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1867               isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1868               isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1869               (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1870             assert(DemandedSize <= SmallVTBits &&
1871                    "Narrowed below demanded bits?");
1872             // We found a type with free casts.
1873             SDValue NarrowShl = TLO.DAG.getNode(
1874                 ISD::SHL, dl, SmallVT,
1875                 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1876                 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1877             return TLO.CombineTo(
1878                 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1879           }
1880         }
1881       }
1882 
1883       // Narrow shift to lower half - similar to ShrinkDemandedOp.
1884       // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1885       // Only do this if we demand the upper half so the knownbits are correct.
1886       unsigned HalfWidth = BitWidth / 2;
1887       if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1888           DemandedBits.countLeadingOnes() >= HalfWidth) {
1889         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1890         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1891             isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1892             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1893             (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1894           // If we're demanding the upper bits at all, we must ensure
1895           // that the upper bits of the shift result are known to be zero,
1896           // which is equivalent to the narrow shift being NUW.
1897           if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1898             bool IsNSW = Known.countMinSignBits() > HalfWidth;
1899             SDNodeFlags Flags;
1900             Flags.setNoSignedWrap(IsNSW);
1901             Flags.setNoUnsignedWrap(IsNUW);
1902             SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1903             SDValue NewShiftAmt =
1904                 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1905             SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1906                                                NewShiftAmt, Flags);
1907             SDValue NewExt =
1908                 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1909             return TLO.CombineTo(Op, NewExt);
1910           }
1911         }
1912       }
1913     } else {
1914       // This is a variable shift, so we can't shift the demand mask by a known
1915       // amount. But if we are not demanding high bits, then we are not
1916       // demanding those bits from the pre-shifted operand either.
1917       if (unsigned CTLZ = DemandedBits.countl_zero()) {
1918         APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1919         if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1920                                  Depth + 1)) {
1921           // Disable the nsw and nuw flags. We can no longer guarantee that we
1922           // won't wrap after simplification.
1923           Op->dropFlags(SDNodeFlags::NoWrap);
1924           return true;
1925         }
1926         Known.resetAll();
1927       }
1928     }
1929 
1930     // If we are only demanding sign bits then we can use the shift source
1931     // directly.
1932     if (std::optional<uint64_t> MaxSA =
1933             TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1934       unsigned ShAmt = *MaxSA;
1935       unsigned NumSignBits =
1936           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1937       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1938       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1939         return TLO.CombineTo(Op, Op0);
1940     }
1941     break;
1942   }
1943   case ISD::SRL: {
1944     SDValue Op0 = Op.getOperand(0);
1945     SDValue Op1 = Op.getOperand(1);
1946     EVT ShiftVT = Op1.getValueType();
1947 
1948     if (std::optional<uint64_t> KnownSA =
1949             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1950       unsigned ShAmt = *KnownSA;
1951       if (ShAmt == 0)
1952         return TLO.CombineTo(Op, Op0);
1953 
1954       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1955       // single shift.  We can do this if the top bits (which are shifted out)
1956       // are never demanded.
1957       // TODO - support non-uniform vector amounts.
1958       if (Op0.getOpcode() == ISD::SHL) {
1959         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1960           if (std::optional<uint64_t> InnerSA =
1961                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1962             unsigned C1 = *InnerSA;
1963             unsigned Opc = ISD::SRL;
1964             int Diff = ShAmt - C1;
1965             if (Diff < 0) {
1966               Diff = -Diff;
1967               Opc = ISD::SHL;
1968             }
1969             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1970             return TLO.CombineTo(
1971                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1972           }
1973         }
1974       }
1975 
1976       // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1977       // single sra. We can do this if the top bits are never demanded.
1978       if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1979         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1980           if (std::optional<uint64_t> InnerSA =
1981                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1982             unsigned C1 = *InnerSA;
1983             // Clamp the combined shift amount if it exceeds the bit width.
1984             unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
1985             SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
1986             return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
1987                                                      Op0.getOperand(0), NewSA));
1988           }
1989         }
1990       }
1991 
1992       APInt InDemandedMask = (DemandedBits << ShAmt);
1993 
1994       // If the shift is exact, then it does demand the low bits (and knows that
1995       // they are zero).
1996       if (Op->getFlags().hasExact())
1997         InDemandedMask.setLowBits(ShAmt);
1998 
1999       // Narrow shift to lower half - similar to ShrinkDemandedOp.
2000       // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2001       if ((BitWidth % 2) == 0 && !VT.isVector()) {
2002         APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2);
2003         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2004         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2005             isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2006             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2007             (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2008             ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2009              TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2010           SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2011           SDValue NewShiftAmt =
2012               TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2013           SDValue NewShift =
2014               TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2015           return TLO.CombineTo(
2016               Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2017         }
2018       }
2019 
2020       // Compute the new bits that are at the top now.
2021       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2022                                Depth + 1))
2023         return true;
2024       Known.Zero.lshrInPlace(ShAmt);
2025       Known.One.lshrInPlace(ShAmt);
2026       // High bits known zero.
2027       Known.Zero.setHighBits(ShAmt);
2028 
2029       // Attempt to avoid multi-use ops if we don't need anything from them.
2030       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2031         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2032             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2033         if (DemandedOp0) {
2034           SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2035           return TLO.CombineTo(Op, NewOp);
2036         }
2037       }
2038     } else {
2039       // Use generic knownbits computation as it has support for non-uniform
2040       // shift amounts.
2041       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2042     }
2043 
2044     // If we are only demanding sign bits then we can use the shift source
2045     // directly.
2046     if (std::optional<uint64_t> MaxSA =
2047             TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2048       unsigned ShAmt = *MaxSA;
2049       // Must already be signbits in DemandedBits bounds, and can't demand any
2050       // shifted in zeroes.
2051       if (DemandedBits.countl_zero() >= ShAmt) {
2052         unsigned NumSignBits =
2053             TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2054         if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2055           return TLO.CombineTo(Op, Op0);
2056       }
2057     }
2058 
2059     // Try to match AVG patterns (after shift simplification).
2060     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2061                                         DemandedElts, Depth + 1))
2062       return TLO.CombineTo(Op, AVG);
2063 
2064     break;
2065   }
2066   case ISD::SRA: {
2067     SDValue Op0 = Op.getOperand(0);
2068     SDValue Op1 = Op.getOperand(1);
2069     EVT ShiftVT = Op1.getValueType();
2070 
2071     // If we only want bits that already match the signbit then we don't need
2072     // to shift.
2073     unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2074     if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2075         NumHiDemandedBits)
2076       return TLO.CombineTo(Op, Op0);
2077 
2078     // If this is an arithmetic shift right and only the low-bit is set, we can
2079     // always convert this into a logical shr, even if the shift amount is
2080     // variable.  The low bit of the shift cannot be an input sign bit unless
2081     // the shift amount is >= the size of the datatype, which is undefined.
2082     if (DemandedBits.isOne())
2083       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2084 
2085     if (std::optional<uint64_t> KnownSA =
2086             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2087       unsigned ShAmt = *KnownSA;
2088       if (ShAmt == 0)
2089         return TLO.CombineTo(Op, Op0);
2090 
2091       // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2092       // supports sext_inreg.
2093       if (Op0.getOpcode() == ISD::SHL) {
2094         if (std::optional<uint64_t> InnerSA =
2095                 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2096           unsigned LowBits = BitWidth - ShAmt;
2097           EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2098           if (VT.isVector())
2099             ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2100                                      VT.getVectorElementCount());
2101 
2102           if (*InnerSA == ShAmt) {
2103             if (!TLO.LegalOperations() ||
2104                 getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
2105               return TLO.CombineTo(
2106                   Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2107                                       Op0.getOperand(0),
2108                                       TLO.DAG.getValueType(ExtVT)));
2109 
2110             // Even if we can't convert to sext_inreg, we might be able to
2111             // remove this shift pair if the input is already sign extended.
2112             unsigned NumSignBits =
2113                 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2114             if (NumSignBits > ShAmt)
2115               return TLO.CombineTo(Op, Op0.getOperand(0));
2116           }
2117         }
2118       }
2119 
2120       APInt InDemandedMask = (DemandedBits << ShAmt);
2121 
2122       // If the shift is exact, then it does demand the low bits (and knows that
2123       // they are zero).
2124       if (Op->getFlags().hasExact())
2125         InDemandedMask.setLowBits(ShAmt);
2126 
2127       // If any of the demanded bits are produced by the sign extension, we also
2128       // demand the input sign bit.
2129       if (DemandedBits.countl_zero() < ShAmt)
2130         InDemandedMask.setSignBit();
2131 
2132       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2133                                Depth + 1))
2134         return true;
2135       Known.Zero.lshrInPlace(ShAmt);
2136       Known.One.lshrInPlace(ShAmt);
2137 
2138       // If the input sign bit is known to be zero, or if none of the top bits
2139       // are demanded, turn this into an unsigned shift right.
2140       if (Known.Zero[BitWidth - ShAmt - 1] ||
2141           DemandedBits.countl_zero() >= ShAmt) {
2142         SDNodeFlags Flags;
2143         Flags.setExact(Op->getFlags().hasExact());
2144         return TLO.CombineTo(
2145             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2146       }
2147 
2148       int Log2 = DemandedBits.exactLogBase2();
2149       if (Log2 >= 0) {
2150         // The bit must come from the sign.
2151         SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2152         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2153       }
2154 
2155       if (Known.One[BitWidth - ShAmt - 1])
2156         // New bits are known one.
2157         Known.One.setHighBits(ShAmt);
2158 
2159       // Attempt to avoid multi-use ops if we don't need anything from them.
2160       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2161         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2162             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2163         if (DemandedOp0) {
2164           SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2165           return TLO.CombineTo(Op, NewOp);
2166         }
2167       }
2168     }
2169 
2170     // Try to match AVG patterns (after shift simplification).
2171     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2172                                         DemandedElts, Depth + 1))
2173       return TLO.CombineTo(Op, AVG);
2174 
2175     break;
2176   }
2177   case ISD::FSHL:
2178   case ISD::FSHR: {
2179     SDValue Op0 = Op.getOperand(0);
2180     SDValue Op1 = Op.getOperand(1);
2181     SDValue Op2 = Op.getOperand(2);
2182     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2183 
2184     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2185       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2186 
2187       // For fshl, 0-shift returns the 1st arg.
2188       // For fshr, 0-shift returns the 2nd arg.
2189       if (Amt == 0) {
2190         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2191                                  Known, TLO, Depth + 1))
2192           return true;
2193         break;
2194       }
2195 
2196       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2197       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2198       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2199       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2200       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2201                                Depth + 1))
2202         return true;
2203       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2204                                Depth + 1))
2205         return true;
2206 
2207       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2208       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2209       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2210       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2211       Known = Known.unionWith(Known2);
2212 
2213       // Attempt to avoid multi-use ops if we don't need anything from them.
2214       if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2215           !DemandedElts.isAllOnes()) {
2216         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2217             Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2218         SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2219             Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2220         if (DemandedOp0 || DemandedOp1) {
2221           DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2222           DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2223           SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2224                                           DemandedOp1, Op2);
2225           return TLO.CombineTo(Op, NewOp);
2226         }
2227       }
2228     }
2229 
2230     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2231     if (isPowerOf2_32(BitWidth)) {
2232       APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2233       if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2234                                Known2, TLO, Depth + 1))
2235         return true;
2236     }
2237     break;
2238   }
2239   case ISD::ROTL:
2240   case ISD::ROTR: {
2241     SDValue Op0 = Op.getOperand(0);
2242     SDValue Op1 = Op.getOperand(1);
2243     bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2244 
2245     // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2246     if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2247       return TLO.CombineTo(Op, Op0);
2248 
2249     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2250       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2251       unsigned RevAmt = BitWidth - Amt;
2252 
2253       // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2254       // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2255       APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2256       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2257                                Depth + 1))
2258         return true;
2259 
2260       // rot*(x, 0) --> x
2261       if (Amt == 0)
2262         return TLO.CombineTo(Op, Op0);
2263 
2264       // See if we don't demand either half of the rotated bits.
2265       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2266           DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2267         Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2268         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2269       }
2270       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2271           DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2272         Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2273         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2274       }
2275     }
2276 
2277     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2278     if (isPowerOf2_32(BitWidth)) {
2279       APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2280       if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2281                                Depth + 1))
2282         return true;
2283     }
2284     break;
2285   }
2286   case ISD::SMIN:
2287   case ISD::SMAX:
2288   case ISD::UMIN:
2289   case ISD::UMAX: {
2290     unsigned Opc = Op.getOpcode();
2291     SDValue Op0 = Op.getOperand(0);
2292     SDValue Op1 = Op.getOperand(1);
2293 
2294     // If we're only demanding signbits, then we can simplify to OR/AND node.
2295     unsigned BitOp =
2296         (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2297     unsigned NumSignBits =
2298         std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2299                  TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2300     unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2301     if (NumSignBits >= NumDemandedUpperBits)
2302       return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2303 
2304     // Check if one arg is always less/greater than (or equal) to the other arg.
2305     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2306     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2307     switch (Opc) {
2308     case ISD::SMIN:
2309       if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2310         return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2311       if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2312         return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2313       Known = KnownBits::smin(Known0, Known1);
2314       break;
2315     case ISD::SMAX:
2316       if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2317         return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2318       if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2319         return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2320       Known = KnownBits::smax(Known0, Known1);
2321       break;
2322     case ISD::UMIN:
2323       if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2324         return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2325       if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2326         return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2327       Known = KnownBits::umin(Known0, Known1);
2328       break;
2329     case ISD::UMAX:
2330       if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2331         return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2332       if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2333         return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2334       Known = KnownBits::umax(Known0, Known1);
2335       break;
2336     }
2337     break;
2338   }
2339   case ISD::BITREVERSE: {
2340     SDValue Src = Op.getOperand(0);
2341     APInt DemandedSrcBits = DemandedBits.reverseBits();
2342     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2343                              Depth + 1))
2344       return true;
2345     Known.One = Known2.One.reverseBits();
2346     Known.Zero = Known2.Zero.reverseBits();
2347     break;
2348   }
2349   case ISD::BSWAP: {
2350     SDValue Src = Op.getOperand(0);
2351 
2352     // If the only bits demanded come from one byte of the bswap result,
2353     // just shift the input byte into position to eliminate the bswap.
2354     unsigned NLZ = DemandedBits.countl_zero();
2355     unsigned NTZ = DemandedBits.countr_zero();
2356 
2357     // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
2358     // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
2359     // have 14 leading zeros, round to 8.
2360     NLZ = alignDown(NLZ, 8);
2361     NTZ = alignDown(NTZ, 8);
2362     // If we need exactly one byte, we can do this transformation.
2363     if (BitWidth - NLZ - NTZ == 8) {
2364       // Replace this with either a left or right shift to get the byte into
2365       // the right place.
2366       unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2367       if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2368         unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2369         SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2370         SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2371         return TLO.CombineTo(Op, NewOp);
2372       }
2373     }
2374 
2375     APInt DemandedSrcBits = DemandedBits.byteSwap();
2376     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2377                              Depth + 1))
2378       return true;
2379     Known.One = Known2.One.byteSwap();
2380     Known.Zero = Known2.Zero.byteSwap();
2381     break;
2382   }
2383   case ISD::CTPOP: {
2384     // If only 1 bit is demanded, replace with PARITY as long as we're before
2385     // op legalization.
2386     // FIXME: Limit to scalars for now.
2387     if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2388       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2389                                                Op.getOperand(0)));
2390 
2391     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2392     break;
2393   }
2394   case ISD::SIGN_EXTEND_INREG: {
2395     SDValue Op0 = Op.getOperand(0);
2396     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2397     unsigned ExVTBits = ExVT.getScalarSizeInBits();
2398 
2399     // If we only care about the highest bit, don't bother shifting right.
2400     if (DemandedBits.isSignMask()) {
2401       unsigned MinSignedBits =
2402           TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2403       bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2404       // However if the input is already sign extended we expect the sign
2405       // extension to be dropped altogether later and do not simplify.
2406       if (!AlreadySignExtended) {
2407         // Compute the correct shift amount type, which must be getShiftAmountTy
2408         // for scalar types after legalization.
2409         SDValue ShiftAmt =
2410             TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2411         return TLO.CombineTo(Op,
2412                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2413       }
2414     }
2415 
2416     // If none of the extended bits are demanded, eliminate the sextinreg.
2417     if (DemandedBits.getActiveBits() <= ExVTBits)
2418       return TLO.CombineTo(Op, Op0);
2419 
2420     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2421 
2422     // Since the sign extended bits are demanded, we know that the sign
2423     // bit is demanded.
2424     InputDemandedBits.setBit(ExVTBits - 1);
2425 
2426     if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2427                              Depth + 1))
2428       return true;
2429 
2430     // If the sign bit of the input is known set or clear, then we know the
2431     // top bits of the result.
2432 
2433     // If the input sign bit is known zero, convert this into a zero extension.
2434     if (Known.Zero[ExVTBits - 1])
2435       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2436 
2437     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2438     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2439       Known.One.setBitsFrom(ExVTBits);
2440       Known.Zero &= Mask;
2441     } else { // Input sign bit unknown
2442       Known.Zero &= Mask;
2443       Known.One &= Mask;
2444     }
2445     break;
2446   }
2447   case ISD::BUILD_PAIR: {
2448     EVT HalfVT = Op.getOperand(0).getValueType();
2449     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2450 
2451     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2452     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2453 
2454     KnownBits KnownLo, KnownHi;
2455 
2456     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2457       return true;
2458 
2459     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2460       return true;
2461 
2462     Known = KnownHi.concat(KnownLo);
2463     break;
2464   }
2465   case ISD::ZERO_EXTEND_VECTOR_INREG:
2466     if (VT.isScalableVector())
2467       return false;
2468     [[fallthrough]];
2469   case ISD::ZERO_EXTEND: {
2470     SDValue Src = Op.getOperand(0);
2471     EVT SrcVT = Src.getValueType();
2472     unsigned InBits = SrcVT.getScalarSizeInBits();
2473     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2474     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2475 
2476     // If none of the top bits are demanded, convert this into an any_extend.
2477     if (DemandedBits.getActiveBits() <= InBits) {
2478       // If we only need the non-extended bits of the bottom element
2479       // then we can just bitcast to the result.
2480       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2481           VT.getSizeInBits() == SrcVT.getSizeInBits())
2482         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2483 
2484       unsigned Opc =
2485           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2486       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2487         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2488     }
2489 
2490     APInt InDemandedBits = DemandedBits.trunc(InBits);
2491     APInt InDemandedElts = DemandedElts.zext(InElts);
2492     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2493                              Depth + 1)) {
2494       Op->dropFlags(SDNodeFlags::NonNeg);
2495       return true;
2496     }
2497     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2498     Known = Known.zext(BitWidth);
2499 
2500     // Attempt to avoid multi-use ops if we don't need anything from them.
2501     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2502             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2503       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2504     break;
2505   }
2506   case ISD::SIGN_EXTEND_VECTOR_INREG:
2507     if (VT.isScalableVector())
2508       return false;
2509     [[fallthrough]];
2510   case ISD::SIGN_EXTEND: {
2511     SDValue Src = Op.getOperand(0);
2512     EVT SrcVT = Src.getValueType();
2513     unsigned InBits = SrcVT.getScalarSizeInBits();
2514     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2515     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2516 
2517     APInt InDemandedElts = DemandedElts.zext(InElts);
2518     APInt InDemandedBits = DemandedBits.trunc(InBits);
2519 
2520     // Since some of the sign extended bits are demanded, we know that the sign
2521     // bit is demanded.
2522     InDemandedBits.setBit(InBits - 1);
2523 
2524     // If none of the top bits are demanded, convert this into an any_extend.
2525     if (DemandedBits.getActiveBits() <= InBits) {
2526       // If we only need the non-extended bits of the bottom element
2527       // then we can just bitcast to the result.
2528       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2529           VT.getSizeInBits() == SrcVT.getSizeInBits())
2530         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2531 
2532       // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2533       if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
2534           TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2535               InBits) {
2536         unsigned Opc =
2537             IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2538         if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2539           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2540       }
2541     }
2542 
2543     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2544                              Depth + 1))
2545       return true;
2546     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2547 
2548     // If the sign bit is known one, the top bits match.
2549     Known = Known.sext(BitWidth);
2550 
2551     // If the sign bit is known zero, convert this to a zero extend.
2552     if (Known.isNonNegative()) {
2553       unsigned Opc =
2554           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2555       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2556         SDNodeFlags Flags;
2557         if (!IsVecInReg)
2558           Flags |= SDNodeFlags::NonNeg;
2559         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2560       }
2561     }
2562 
2563     // Attempt to avoid multi-use ops if we don't need anything from them.
2564     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2565             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2566       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2567     break;
2568   }
2569   case ISD::ANY_EXTEND_VECTOR_INREG:
2570     if (VT.isScalableVector())
2571       return false;
2572     [[fallthrough]];
2573   case ISD::ANY_EXTEND: {
2574     SDValue Src = Op.getOperand(0);
2575     EVT SrcVT = Src.getValueType();
2576     unsigned InBits = SrcVT.getScalarSizeInBits();
2577     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2578     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2579 
2580     // If we only need the bottom element then we can just bitcast.
2581     // TODO: Handle ANY_EXTEND?
2582     if (IsLE && IsVecInReg && DemandedElts == 1 &&
2583         VT.getSizeInBits() == SrcVT.getSizeInBits())
2584       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2585 
2586     APInt InDemandedBits = DemandedBits.trunc(InBits);
2587     APInt InDemandedElts = DemandedElts.zext(InElts);
2588     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2589                              Depth + 1))
2590       return true;
2591     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2592     Known = Known.anyext(BitWidth);
2593 
2594     // Attempt to avoid multi-use ops if we don't need anything from them.
2595     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2596             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2597       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2598     break;
2599   }
2600   case ISD::TRUNCATE: {
2601     SDValue Src = Op.getOperand(0);
2602 
2603     // Simplify the input, using demanded bit information, and compute the known
2604     // zero/one bits live out.
2605     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2606     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2607     if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2608                              Depth + 1)) {
2609       // Disable the nsw and nuw flags. We can no longer guarantee that we
2610       // won't wrap after simplification.
2611       Op->dropFlags(SDNodeFlags::NoWrap);
2612       return true;
2613     }
2614     Known = Known.trunc(BitWidth);
2615 
2616     // Attempt to avoid multi-use ops if we don't need anything from them.
2617     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2618             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2619       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2620 
2621     // If the input is only used by this truncate, see if we can shrink it based
2622     // on the known demanded bits.
2623     switch (Src.getOpcode()) {
2624     default:
2625       break;
2626     case ISD::SRL:
2627       // Shrink SRL by a constant if none of the high bits shifted in are
2628       // demanded.
2629       if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2630         // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2631         // undesirable.
2632         break;
2633 
2634       if (Src.getNode()->hasOneUse()) {
2635         if (isTruncateFree(Src, VT) &&
2636             !isTruncateFree(Src.getValueType(), VT)) {
2637           // If truncate is only free at trunc(srl), do not turn it into
2638           // srl(trunc). The check is done by first check the truncate is free
2639           // at Src's opcode(srl), then check the truncate is not done by
2640           // referencing sub-register. In test, if both trunc(srl) and
2641           // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2642           // trunc(srl)'s trunc is free, trunc(srl) is better.
2643           break;
2644         }
2645 
2646         std::optional<uint64_t> ShAmtC =
2647             TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2648         if (!ShAmtC || *ShAmtC >= BitWidth)
2649           break;
2650         uint64_t ShVal = *ShAmtC;
2651 
2652         APInt HighBits =
2653             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2654         HighBits.lshrInPlace(ShVal);
2655         HighBits = HighBits.trunc(BitWidth);
2656         if (!(HighBits & DemandedBits)) {
2657           // None of the shifted in bits are needed.  Add a truncate of the
2658           // shift input, then shift it.
2659           SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2660           SDValue NewTrunc =
2661               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2662           return TLO.CombineTo(
2663               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2664         }
2665       }
2666       break;
2667     }
2668 
2669     break;
2670   }
2671   case ISD::AssertZext: {
2672     // AssertZext demands all of the high bits, plus any of the low bits
2673     // demanded by its users.
2674     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2675     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2676     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2677                              TLO, Depth + 1))
2678       return true;
2679 
2680     Known.Zero |= ~InMask;
2681     Known.One &= (~Known.Zero);
2682     break;
2683   }
2684   case ISD::EXTRACT_VECTOR_ELT: {
2685     SDValue Src = Op.getOperand(0);
2686     SDValue Idx = Op.getOperand(1);
2687     ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2688     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2689 
2690     if (SrcEltCnt.isScalable())
2691       return false;
2692 
2693     // Demand the bits from every vector element without a constant index.
2694     unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2695     APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2696     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2697       if (CIdx->getAPIntValue().ult(NumSrcElts))
2698         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2699 
2700     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2701     // anything about the extended bits.
2702     APInt DemandedSrcBits = DemandedBits;
2703     if (BitWidth > EltBitWidth)
2704       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2705 
2706     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2707                              Depth + 1))
2708       return true;
2709 
2710     // Attempt to avoid multi-use ops if we don't need anything from them.
2711     if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2712       if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2713               Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2714         SDValue NewOp =
2715             TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2716         return TLO.CombineTo(Op, NewOp);
2717       }
2718     }
2719 
2720     Known = Known2;
2721     if (BitWidth > EltBitWidth)
2722       Known = Known.anyext(BitWidth);
2723     break;
2724   }
2725   case ISD::BITCAST: {
2726     if (VT.isScalableVector())
2727       return false;
2728     SDValue Src = Op.getOperand(0);
2729     EVT SrcVT = Src.getValueType();
2730     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2731 
2732     // If this is an FP->Int bitcast and if the sign bit is the only
2733     // thing demanded, turn this into a FGETSIGN.
2734     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2735         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2736         SrcVT.isFloatingPoint()) {
2737       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2738       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2739       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2740           SrcVT != MVT::f128) {
2741         // Cannot eliminate/lower SHL for f128 yet.
2742         EVT Ty = OpVTLegal ? VT : MVT::i32;
2743         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2744         // place.  We expect the SHL to be eliminated by other optimizations.
2745         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2746         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2747         if (!OpVTLegal && OpVTSizeInBits > 32)
2748           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2749         unsigned ShVal = Op.getValueSizeInBits() - 1;
2750         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2751         return TLO.CombineTo(Op,
2752                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2753       }
2754     }
2755 
2756     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2757     // Demand the elt/bit if any of the original elts/bits are demanded.
2758     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2759       unsigned Scale = BitWidth / NumSrcEltBits;
2760       unsigned NumSrcElts = SrcVT.getVectorNumElements();
2761       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2762       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2763       for (unsigned i = 0; i != Scale; ++i) {
2764         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2765         unsigned BitOffset = EltOffset * NumSrcEltBits;
2766         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2767         if (!Sub.isZero()) {
2768           DemandedSrcBits |= Sub;
2769           for (unsigned j = 0; j != NumElts; ++j)
2770             if (DemandedElts[j])
2771               DemandedSrcElts.setBit((j * Scale) + i);
2772         }
2773       }
2774 
2775       APInt KnownSrcUndef, KnownSrcZero;
2776       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2777                                      KnownSrcZero, TLO, Depth + 1))
2778         return true;
2779 
2780       KnownBits KnownSrcBits;
2781       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2782                                KnownSrcBits, TLO, Depth + 1))
2783         return true;
2784     } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2785       // TODO - bigendian once we have test coverage.
2786       unsigned Scale = NumSrcEltBits / BitWidth;
2787       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2788       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2789       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2790       for (unsigned i = 0; i != NumElts; ++i)
2791         if (DemandedElts[i]) {
2792           unsigned Offset = (i % Scale) * BitWidth;
2793           DemandedSrcBits.insertBits(DemandedBits, Offset);
2794           DemandedSrcElts.setBit(i / Scale);
2795         }
2796 
2797       if (SrcVT.isVector()) {
2798         APInt KnownSrcUndef, KnownSrcZero;
2799         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2800                                        KnownSrcZero, TLO, Depth + 1))
2801           return true;
2802       }
2803 
2804       KnownBits KnownSrcBits;
2805       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2806                                KnownSrcBits, TLO, Depth + 1))
2807         return true;
2808 
2809       // Attempt to avoid multi-use ops if we don't need anything from them.
2810       if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2811         if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2812                 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2813           SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2814           return TLO.CombineTo(Op, NewOp);
2815         }
2816       }
2817     }
2818 
2819     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2820     // recursive call where Known may be useful to the caller.
2821     if (Depth > 0) {
2822       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2823       return false;
2824     }
2825     break;
2826   }
2827   case ISD::MUL:
2828     if (DemandedBits.isPowerOf2()) {
2829       // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2830       // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2831       // odd (has LSB set), then the left-shifted low bit of X is the answer.
2832       unsigned CTZ = DemandedBits.countr_zero();
2833       ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2834       if (C && C->getAPIntValue().countr_zero() == CTZ) {
2835         SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2836         SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2837         return TLO.CombineTo(Op, Shl);
2838       }
2839     }
2840     // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2841     // X * X is odd iff X is odd.
2842     // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2843     if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2844       SDValue One = TLO.DAG.getConstant(1, dl, VT);
2845       SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2846       return TLO.CombineTo(Op, And1);
2847     }
2848     [[fallthrough]];
2849   case ISD::ADD:
2850   case ISD::SUB: {
2851     // Add, Sub, and Mul don't demand any bits in positions beyond that
2852     // of the highest bit demanded of them.
2853     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2854     SDNodeFlags Flags = Op.getNode()->getFlags();
2855     unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2856     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2857     KnownBits KnownOp0, KnownOp1;
2858     auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2859                                       const KnownBits &KnownRHS) {
2860       if (Op.getOpcode() == ISD::MUL)
2861         Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2862       return Demanded;
2863     };
2864     if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2865                              Depth + 1) ||
2866         SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2867                              DemandedElts, KnownOp0, TLO, Depth + 1) ||
2868         // See if the operation should be performed at a smaller bit width.
2869         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2870       // Disable the nsw and nuw flags. We can no longer guarantee that we
2871       // won't wrap after simplification.
2872       Op->dropFlags(SDNodeFlags::NoWrap);
2873       return true;
2874     }
2875 
2876     // neg x with only low bit demanded is simply x.
2877     if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2878         isNullConstant(Op0))
2879       return TLO.CombineTo(Op, Op1);
2880 
2881     // Attempt to avoid multi-use ops if we don't need anything from them.
2882     if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2883       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2884           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2885       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2886           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2887       if (DemandedOp0 || DemandedOp1) {
2888         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2889         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2890         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2891                                         Flags & ~SDNodeFlags::NoWrap);
2892         return TLO.CombineTo(Op, NewOp);
2893       }
2894     }
2895 
2896     // If we have a constant operand, we may be able to turn it into -1 if we
2897     // do not demand the high bits. This can make the constant smaller to
2898     // encode, allow more general folding, or match specialized instruction
2899     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2900     // is probably not useful (and could be detrimental).
2901     ConstantSDNode *C = isConstOrConstSplat(Op1);
2902     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2903     if (C && !C->isAllOnes() && !C->isOne() &&
2904         (C->getAPIntValue() | HighMask).isAllOnes()) {
2905       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2906       // Disable the nsw and nuw flags. We can no longer guarantee that we
2907       // won't wrap after simplification.
2908       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2909                                       Flags & ~SDNodeFlags::NoWrap);
2910       return TLO.CombineTo(Op, NewOp);
2911     }
2912 
2913     // Match a multiply with a disguised negated-power-of-2 and convert to a
2914     // an equivalent shift-left amount.
2915     // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2916     auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2917       if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2918         return 0;
2919 
2920       // Don't touch opaque constants. Also, ignore zero and power-of-2
2921       // multiplies. Those will get folded later.
2922       ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2923       if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2924           !MulC->getAPIntValue().isPowerOf2()) {
2925         APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2926         if (UnmaskedC.isNegatedPowerOf2())
2927           return (-UnmaskedC).logBase2();
2928       }
2929       return 0;
2930     };
2931 
2932     auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2933                        unsigned ShlAmt) {
2934       SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2935       SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2936       SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2937       return TLO.CombineTo(Op, Res);
2938     };
2939 
2940     if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2941       if (Op.getOpcode() == ISD::ADD) {
2942         // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2943         if (unsigned ShAmt = getShiftLeftAmt(Op0))
2944           return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2945         // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2946         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2947           return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2948       }
2949       if (Op.getOpcode() == ISD::SUB) {
2950         // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2951         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2952           return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2953       }
2954     }
2955 
2956     if (Op.getOpcode() == ISD::MUL) {
2957       Known = KnownBits::mul(KnownOp0, KnownOp1);
2958     } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2959       Known = KnownBits::computeForAddSub(
2960           Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2961           Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2962     }
2963     break;
2964   }
2965   default:
2966     // We also ask the target about intrinsics (which could be specific to it).
2967     if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2968         Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2969       // TODO: Probably okay to remove after audit; here to reduce change size
2970       // in initial enablement patch for scalable vectors
2971       if (Op.getValueType().isScalableVector())
2972         break;
2973       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2974                                             Known, TLO, Depth))
2975         return true;
2976       break;
2977     }
2978 
2979     // Just use computeKnownBits to compute output bits.
2980     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2981     break;
2982   }
2983 
2984   // If we know the value of all of the demanded bits, return this as a
2985   // constant.
2986   if (!isTargetCanonicalConstantNode(Op) &&
2987       DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2988     // Avoid folding to a constant if any OpaqueConstant is involved.
2989     if (llvm::any_of(Op->ops(), [](SDValue V) {
2990           auto *C = dyn_cast<ConstantSDNode>(V);
2991           return C && C->isOpaque();
2992         }))
2993       return false;
2994     if (VT.isInteger())
2995       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2996     if (VT.isFloatingPoint())
2997       return TLO.CombineTo(
2998           Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
2999                                     dl, VT));
3000   }
3001 
3002   // A multi use 'all demanded elts' simplify failed to find any knownbits.
3003   // Try again just for the original demanded elts.
3004   // Ensure we do this AFTER constant folding above.
3005   if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3006     Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3007 
3008   return false;
3009 }
3010 
3011 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
3012                                                 const APInt &DemandedElts,
3013                                                 DAGCombinerInfo &DCI) const {
3014   SelectionDAG &DAG = DCI.DAG;
3015   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3016                         !DCI.isBeforeLegalizeOps());
3017 
3018   APInt KnownUndef, KnownZero;
3019   bool Simplified =
3020       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3021   if (Simplified) {
3022     DCI.AddToWorklist(Op.getNode());
3023     DCI.CommitTargetLoweringOpt(TLO);
3024   }
3025 
3026   return Simplified;
3027 }
3028 
3029 /// Given a vector binary operation and known undefined elements for each input
3030 /// operand, compute whether each element of the output is undefined.
3031 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
3032                                          const APInt &UndefOp0,
3033                                          const APInt &UndefOp1) {
3034   EVT VT = BO.getValueType();
3035   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3036          "Vector binop only");
3037 
3038   EVT EltVT = VT.getVectorElementType();
3039   unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3040   assert(UndefOp0.getBitWidth() == NumElts &&
3041          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3042 
3043   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3044                                    const APInt &UndefVals) {
3045     if (UndefVals[Index])
3046       return DAG.getUNDEF(EltVT);
3047 
3048     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3049       // Try hard to make sure that the getNode() call is not creating temporary
3050       // nodes. Ignore opaque integers because they do not constant fold.
3051       SDValue Elt = BV->getOperand(Index);
3052       auto *C = dyn_cast<ConstantSDNode>(Elt);
3053       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3054         return Elt;
3055     }
3056 
3057     return SDValue();
3058   };
3059 
3060   APInt KnownUndef = APInt::getZero(NumElts);
3061   for (unsigned i = 0; i != NumElts; ++i) {
3062     // If both inputs for this element are either constant or undef and match
3063     // the element type, compute the constant/undef result for this element of
3064     // the vector.
3065     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3066     // not handle FP constants. The code within getNode() should be refactored
3067     // to avoid the danger of creating a bogus temporary node here.
3068     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3069     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3070     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3071       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3072         KnownUndef.setBit(i);
3073   }
3074   return KnownUndef;
3075 }
3076 
3077 bool TargetLowering::SimplifyDemandedVectorElts(
3078     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3079     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3080     bool AssumeSingleUse) const {
3081   EVT VT = Op.getValueType();
3082   unsigned Opcode = Op.getOpcode();
3083   APInt DemandedElts = OriginalDemandedElts;
3084   unsigned NumElts = DemandedElts.getBitWidth();
3085   assert(VT.isVector() && "Expected vector op");
3086 
3087   KnownUndef = KnownZero = APInt::getZero(NumElts);
3088 
3089   if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3090     return false;
3091 
3092   // TODO: For now we assume we know nothing about scalable vectors.
3093   if (VT.isScalableVector())
3094     return false;
3095 
3096   assert(VT.getVectorNumElements() == NumElts &&
3097          "Mask size mismatches value type element count!");
3098 
3099   // Undef operand.
3100   if (Op.isUndef()) {
3101     KnownUndef.setAllBits();
3102     return false;
3103   }
3104 
3105   // If Op has other users, assume that all elements are needed.
3106   if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3107     DemandedElts.setAllBits();
3108 
3109   // Not demanding any elements from Op.
3110   if (DemandedElts == 0) {
3111     KnownUndef.setAllBits();
3112     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3113   }
3114 
3115   // Limit search depth.
3116   if (Depth >= SelectionDAG::MaxRecursionDepth)
3117     return false;
3118 
3119   SDLoc DL(Op);
3120   unsigned EltSizeInBits = VT.getScalarSizeInBits();
3121   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3122 
3123   // Helper for demanding the specified elements and all the bits of both binary
3124   // operands.
3125   auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3126     SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3127                                                            TLO.DAG, Depth + 1);
3128     SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3129                                                            TLO.DAG, Depth + 1);
3130     if (NewOp0 || NewOp1) {
3131       SDValue NewOp =
3132           TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3133                           NewOp1 ? NewOp1 : Op1, Op->getFlags());
3134       return TLO.CombineTo(Op, NewOp);
3135     }
3136     return false;
3137   };
3138 
3139   switch (Opcode) {
3140   case ISD::SCALAR_TO_VECTOR: {
3141     if (!DemandedElts[0]) {
3142       KnownUndef.setAllBits();
3143       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3144     }
3145     SDValue ScalarSrc = Op.getOperand(0);
3146     if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3147       SDValue Src = ScalarSrc.getOperand(0);
3148       SDValue Idx = ScalarSrc.getOperand(1);
3149       EVT SrcVT = Src.getValueType();
3150 
3151       ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3152 
3153       if (SrcEltCnt.isScalable())
3154         return false;
3155 
3156       unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3157       if (isNullConstant(Idx)) {
3158         APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3159         APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3160         APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3161         if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3162                                        TLO, Depth + 1))
3163           return true;
3164       }
3165     }
3166     KnownUndef.setHighBits(NumElts - 1);
3167     break;
3168   }
3169   case ISD::BITCAST: {
3170     SDValue Src = Op.getOperand(0);
3171     EVT SrcVT = Src.getValueType();
3172 
3173     // We only handle vectors here.
3174     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3175     if (!SrcVT.isVector())
3176       break;
3177 
3178     // Fast handling of 'identity' bitcasts.
3179     unsigned NumSrcElts = SrcVT.getVectorNumElements();
3180     if (NumSrcElts == NumElts)
3181       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3182                                         KnownZero, TLO, Depth + 1);
3183 
3184     APInt SrcDemandedElts, SrcZero, SrcUndef;
3185 
3186     // Bitcast from 'large element' src vector to 'small element' vector, we
3187     // must demand a source element if any DemandedElt maps to it.
3188     if ((NumElts % NumSrcElts) == 0) {
3189       unsigned Scale = NumElts / NumSrcElts;
3190       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3191       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3192                                      TLO, Depth + 1))
3193         return true;
3194 
3195       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3196       // of the large element.
3197       // TODO - bigendian once we have test coverage.
3198       if (IsLE) {
3199         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3200         APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3201         for (unsigned i = 0; i != NumElts; ++i)
3202           if (DemandedElts[i]) {
3203             unsigned Ofs = (i % Scale) * EltSizeInBits;
3204             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3205           }
3206 
3207         KnownBits Known;
3208         if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3209                                  TLO, Depth + 1))
3210           return true;
3211 
3212         // The bitcast has split each wide element into a number of
3213         // narrow subelements. We have just computed the Known bits
3214         // for wide elements. See if element splitting results in
3215         // some subelements being zero. Only for demanded elements!
3216         for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3217           if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3218                    .isAllOnes())
3219             continue;
3220           for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3221             unsigned Elt = Scale * SrcElt + SubElt;
3222             if (DemandedElts[Elt])
3223               KnownZero.setBit(Elt);
3224           }
3225         }
3226       }
3227 
3228       // If the src element is zero/undef then all the output elements will be -
3229       // only demanded elements are guaranteed to be correct.
3230       for (unsigned i = 0; i != NumSrcElts; ++i) {
3231         if (SrcDemandedElts[i]) {
3232           if (SrcZero[i])
3233             KnownZero.setBits(i * Scale, (i + 1) * Scale);
3234           if (SrcUndef[i])
3235             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3236         }
3237       }
3238     }
3239 
3240     // Bitcast from 'small element' src vector to 'large element' vector, we
3241     // demand all smaller source elements covered by the larger demanded element
3242     // of this vector.
3243     if ((NumSrcElts % NumElts) == 0) {
3244       unsigned Scale = NumSrcElts / NumElts;
3245       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3246       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3247                                      TLO, Depth + 1))
3248         return true;
3249 
3250       // If all the src elements covering an output element are zero/undef, then
3251       // the output element will be as well, assuming it was demanded.
3252       for (unsigned i = 0; i != NumElts; ++i) {
3253         if (DemandedElts[i]) {
3254           if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3255             KnownZero.setBit(i);
3256           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3257             KnownUndef.setBit(i);
3258         }
3259       }
3260     }
3261     break;
3262   }
3263   case ISD::FREEZE: {
3264     SDValue N0 = Op.getOperand(0);
3265     if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3266                                                  /*PoisonOnly=*/false))
3267       return TLO.CombineTo(Op, N0);
3268 
3269     // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3270     // freeze(op(x, ...)) -> op(freeze(x), ...).
3271     if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3272       return TLO.CombineTo(
3273           Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
3274                               TLO.DAG.getFreeze(N0.getOperand(0))));
3275     break;
3276   }
3277   case ISD::BUILD_VECTOR: {
3278     // Check all elements and simplify any unused elements with UNDEF.
3279     if (!DemandedElts.isAllOnes()) {
3280       // Don't simplify BROADCASTS.
3281       if (llvm::any_of(Op->op_values(),
3282                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3283         SmallVector<SDValue, 32> Ops(Op->ops());
3284         bool Updated = false;
3285         for (unsigned i = 0; i != NumElts; ++i) {
3286           if (!DemandedElts[i] && !Ops[i].isUndef()) {
3287             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3288             KnownUndef.setBit(i);
3289             Updated = true;
3290           }
3291         }
3292         if (Updated)
3293           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3294       }
3295     }
3296     for (unsigned i = 0; i != NumElts; ++i) {
3297       SDValue SrcOp = Op.getOperand(i);
3298       if (SrcOp.isUndef()) {
3299         KnownUndef.setBit(i);
3300       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3301                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
3302         KnownZero.setBit(i);
3303       }
3304     }
3305     break;
3306   }
3307   case ISD::CONCAT_VECTORS: {
3308     EVT SubVT = Op.getOperand(0).getValueType();
3309     unsigned NumSubVecs = Op.getNumOperands();
3310     unsigned NumSubElts = SubVT.getVectorNumElements();
3311     for (unsigned i = 0; i != NumSubVecs; ++i) {
3312       SDValue SubOp = Op.getOperand(i);
3313       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3314       APInt SubUndef, SubZero;
3315       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3316                                      Depth + 1))
3317         return true;
3318       KnownUndef.insertBits(SubUndef, i * NumSubElts);
3319       KnownZero.insertBits(SubZero, i * NumSubElts);
3320     }
3321 
3322     // Attempt to avoid multi-use ops if we don't need anything from them.
3323     if (!DemandedElts.isAllOnes()) {
3324       bool FoundNewSub = false;
3325       SmallVector<SDValue, 2> DemandedSubOps;
3326       for (unsigned i = 0; i != NumSubVecs; ++i) {
3327         SDValue SubOp = Op.getOperand(i);
3328         APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3329         SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3330             SubOp, SubElts, TLO.DAG, Depth + 1);
3331         DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3332         FoundNewSub = NewSubOp ? true : FoundNewSub;
3333       }
3334       if (FoundNewSub) {
3335         SDValue NewOp =
3336             TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3337         return TLO.CombineTo(Op, NewOp);
3338       }
3339     }
3340     break;
3341   }
3342   case ISD::INSERT_SUBVECTOR: {
3343     // Demand any elements from the subvector and the remainder from the src its
3344     // inserted into.
3345     SDValue Src = Op.getOperand(0);
3346     SDValue Sub = Op.getOperand(1);
3347     uint64_t Idx = Op.getConstantOperandVal(2);
3348     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3349     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3350     APInt DemandedSrcElts = DemandedElts;
3351     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3352 
3353     APInt SubUndef, SubZero;
3354     if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3355                                    Depth + 1))
3356       return true;
3357 
3358     // If none of the src operand elements are demanded, replace it with undef.
3359     if (!DemandedSrcElts && !Src.isUndef())
3360       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3361                                                TLO.DAG.getUNDEF(VT), Sub,
3362                                                Op.getOperand(2)));
3363 
3364     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3365                                    TLO, Depth + 1))
3366       return true;
3367     KnownUndef.insertBits(SubUndef, Idx);
3368     KnownZero.insertBits(SubZero, Idx);
3369 
3370     // Attempt to avoid multi-use ops if we don't need anything from them.
3371     if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3372       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3373           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3374       SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3375           Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3376       if (NewSrc || NewSub) {
3377         NewSrc = NewSrc ? NewSrc : Src;
3378         NewSub = NewSub ? NewSub : Sub;
3379         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3380                                         NewSub, Op.getOperand(2));
3381         return TLO.CombineTo(Op, NewOp);
3382       }
3383     }
3384     break;
3385   }
3386   case ISD::EXTRACT_SUBVECTOR: {
3387     // Offset the demanded elts by the subvector index.
3388     SDValue Src = Op.getOperand(0);
3389     if (Src.getValueType().isScalableVector())
3390       break;
3391     uint64_t Idx = Op.getConstantOperandVal(1);
3392     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3393     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3394 
3395     APInt SrcUndef, SrcZero;
3396     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3397                                    Depth + 1))
3398       return true;
3399     KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3400     KnownZero = SrcZero.extractBits(NumElts, Idx);
3401 
3402     // Attempt to avoid multi-use ops if we don't need anything from them.
3403     if (!DemandedElts.isAllOnes()) {
3404       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3405           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3406       if (NewSrc) {
3407         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3408                                         Op.getOperand(1));
3409         return TLO.CombineTo(Op, NewOp);
3410       }
3411     }
3412     break;
3413   }
3414   case ISD::INSERT_VECTOR_ELT: {
3415     SDValue Vec = Op.getOperand(0);
3416     SDValue Scl = Op.getOperand(1);
3417     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3418 
3419     // For a legal, constant insertion index, if we don't need this insertion
3420     // then strip it, else remove it from the demanded elts.
3421     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3422       unsigned Idx = CIdx->getZExtValue();
3423       if (!DemandedElts[Idx])
3424         return TLO.CombineTo(Op, Vec);
3425 
3426       APInt DemandedVecElts(DemandedElts);
3427       DemandedVecElts.clearBit(Idx);
3428       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3429                                      KnownZero, TLO, Depth + 1))
3430         return true;
3431 
3432       KnownUndef.setBitVal(Idx, Scl.isUndef());
3433 
3434       KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3435       break;
3436     }
3437 
3438     APInt VecUndef, VecZero;
3439     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3440                                    Depth + 1))
3441       return true;
3442     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3443     break;
3444   }
3445   case ISD::VSELECT: {
3446     SDValue Sel = Op.getOperand(0);
3447     SDValue LHS = Op.getOperand(1);
3448     SDValue RHS = Op.getOperand(2);
3449 
3450     // Try to transform the select condition based on the current demanded
3451     // elements.
3452     APInt UndefSel, ZeroSel;
3453     if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3454                                    Depth + 1))
3455       return true;
3456 
3457     // See if we can simplify either vselect operand.
3458     APInt DemandedLHS(DemandedElts);
3459     APInt DemandedRHS(DemandedElts);
3460     APInt UndefLHS, ZeroLHS;
3461     APInt UndefRHS, ZeroRHS;
3462     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3463                                    Depth + 1))
3464       return true;
3465     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3466                                    Depth + 1))
3467       return true;
3468 
3469     KnownUndef = UndefLHS & UndefRHS;
3470     KnownZero = ZeroLHS & ZeroRHS;
3471 
3472     // If we know that the selected element is always zero, we don't need the
3473     // select value element.
3474     APInt DemandedSel = DemandedElts & ~KnownZero;
3475     if (DemandedSel != DemandedElts)
3476       if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3477                                      Depth + 1))
3478         return true;
3479 
3480     break;
3481   }
3482   case ISD::VECTOR_SHUFFLE: {
3483     SDValue LHS = Op.getOperand(0);
3484     SDValue RHS = Op.getOperand(1);
3485     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3486 
3487     // Collect demanded elements from shuffle operands..
3488     APInt DemandedLHS(NumElts, 0);
3489     APInt DemandedRHS(NumElts, 0);
3490     for (unsigned i = 0; i != NumElts; ++i) {
3491       int M = ShuffleMask[i];
3492       if (M < 0 || !DemandedElts[i])
3493         continue;
3494       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3495       if (M < (int)NumElts)
3496         DemandedLHS.setBit(M);
3497       else
3498         DemandedRHS.setBit(M - NumElts);
3499     }
3500 
3501     // See if we can simplify either shuffle operand.
3502     APInt UndefLHS, ZeroLHS;
3503     APInt UndefRHS, ZeroRHS;
3504     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3505                                    Depth + 1))
3506       return true;
3507     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3508                                    Depth + 1))
3509       return true;
3510 
3511     // Simplify mask using undef elements from LHS/RHS.
3512     bool Updated = false;
3513     bool IdentityLHS = true, IdentityRHS = true;
3514     SmallVector<int, 32> NewMask(ShuffleMask);
3515     for (unsigned i = 0; i != NumElts; ++i) {
3516       int &M = NewMask[i];
3517       if (M < 0)
3518         continue;
3519       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3520           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3521         Updated = true;
3522         M = -1;
3523       }
3524       IdentityLHS &= (M < 0) || (M == (int)i);
3525       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3526     }
3527 
3528     // Update legal shuffle masks based on demanded elements if it won't reduce
3529     // to Identity which can cause premature removal of the shuffle mask.
3530     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3531       SDValue LegalShuffle =
3532           buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3533       if (LegalShuffle)
3534         return TLO.CombineTo(Op, LegalShuffle);
3535     }
3536 
3537     // Propagate undef/zero elements from LHS/RHS.
3538     for (unsigned i = 0; i != NumElts; ++i) {
3539       int M = ShuffleMask[i];
3540       if (M < 0) {
3541         KnownUndef.setBit(i);
3542       } else if (M < (int)NumElts) {
3543         if (UndefLHS[M])
3544           KnownUndef.setBit(i);
3545         if (ZeroLHS[M])
3546           KnownZero.setBit(i);
3547       } else {
3548         if (UndefRHS[M - NumElts])
3549           KnownUndef.setBit(i);
3550         if (ZeroRHS[M - NumElts])
3551           KnownZero.setBit(i);
3552       }
3553     }
3554     break;
3555   }
3556   case ISD::ANY_EXTEND_VECTOR_INREG:
3557   case ISD::SIGN_EXTEND_VECTOR_INREG:
3558   case ISD::ZERO_EXTEND_VECTOR_INREG: {
3559     APInt SrcUndef, SrcZero;
3560     SDValue Src = Op.getOperand(0);
3561     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3562     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3563     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3564                                    Depth + 1))
3565       return true;
3566     KnownZero = SrcZero.zextOrTrunc(NumElts);
3567     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3568 
3569     if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3570         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3571         DemandedSrcElts == 1) {
3572       // aext - if we just need the bottom element then we can bitcast.
3573       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3574     }
3575 
3576     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3577       // zext(undef) upper bits are guaranteed to be zero.
3578       if (DemandedElts.isSubsetOf(KnownUndef))
3579         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3580       KnownUndef.clearAllBits();
3581 
3582       // zext - if we just need the bottom element then we can mask:
3583       // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3584       if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3585           Op->isOnlyUserOf(Src.getNode()) &&
3586           Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3587         SDLoc DL(Op);
3588         EVT SrcVT = Src.getValueType();
3589         EVT SrcSVT = SrcVT.getScalarType();
3590         SmallVector<SDValue> MaskElts;
3591         MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3592         MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3593         SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3594         if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3595                 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3596           Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3597           return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3598         }
3599       }
3600     }
3601     break;
3602   }
3603 
3604   // TODO: There are more binop opcodes that could be handled here - MIN,
3605   // MAX, saturated math, etc.
3606   case ISD::ADD: {
3607     SDValue Op0 = Op.getOperand(0);
3608     SDValue Op1 = Op.getOperand(1);
3609     if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3610       APInt UndefLHS, ZeroLHS;
3611       if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3612                                      Depth + 1, /*AssumeSingleUse*/ true))
3613         return true;
3614     }
3615     [[fallthrough]];
3616   }
3617   case ISD::AVGCEILS:
3618   case ISD::AVGCEILU:
3619   case ISD::AVGFLOORS:
3620   case ISD::AVGFLOORU:
3621   case ISD::OR:
3622   case ISD::XOR:
3623   case ISD::SUB:
3624   case ISD::FADD:
3625   case ISD::FSUB:
3626   case ISD::FMUL:
3627   case ISD::FDIV:
3628   case ISD::FREM: {
3629     SDValue Op0 = Op.getOperand(0);
3630     SDValue Op1 = Op.getOperand(1);
3631 
3632     APInt UndefRHS, ZeroRHS;
3633     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3634                                    Depth + 1))
3635       return true;
3636     APInt UndefLHS, ZeroLHS;
3637     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3638                                    Depth + 1))
3639       return true;
3640 
3641     KnownZero = ZeroLHS & ZeroRHS;
3642     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3643 
3644     // Attempt to avoid multi-use ops if we don't need anything from them.
3645     // TODO - use KnownUndef to relax the demandedelts?
3646     if (!DemandedElts.isAllOnes())
3647       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3648         return true;
3649     break;
3650   }
3651   case ISD::SHL:
3652   case ISD::SRL:
3653   case ISD::SRA:
3654   case ISD::ROTL:
3655   case ISD::ROTR: {
3656     SDValue Op0 = Op.getOperand(0);
3657     SDValue Op1 = Op.getOperand(1);
3658 
3659     APInt UndefRHS, ZeroRHS;
3660     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3661                                    Depth + 1))
3662       return true;
3663     APInt UndefLHS, ZeroLHS;
3664     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3665                                    Depth + 1))
3666       return true;
3667 
3668     KnownZero = ZeroLHS;
3669     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3670 
3671     // Attempt to avoid multi-use ops if we don't need anything from them.
3672     // TODO - use KnownUndef to relax the demandedelts?
3673     if (!DemandedElts.isAllOnes())
3674       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3675         return true;
3676     break;
3677   }
3678   case ISD::MUL:
3679   case ISD::MULHU:
3680   case ISD::MULHS:
3681   case ISD::AND: {
3682     SDValue Op0 = Op.getOperand(0);
3683     SDValue Op1 = Op.getOperand(1);
3684 
3685     APInt SrcUndef, SrcZero;
3686     if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3687                                    Depth + 1))
3688       return true;
3689     // If we know that a demanded element was zero in Op1 we don't need to
3690     // demand it in Op0 - its guaranteed to be zero.
3691     APInt DemandedElts0 = DemandedElts & ~SrcZero;
3692     if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3693                                    TLO, Depth + 1))
3694       return true;
3695 
3696     KnownUndef &= DemandedElts0;
3697     KnownZero &= DemandedElts0;
3698 
3699     // If every element pair has a zero/undef then just fold to zero.
3700     // fold (and x, undef) -> 0  /  (and x, 0) -> 0
3701     // fold (mul x, undef) -> 0  /  (mul x, 0) -> 0
3702     if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3703       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3704 
3705     // If either side has a zero element, then the result element is zero, even
3706     // if the other is an UNDEF.
3707     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3708     // and then handle 'and' nodes with the rest of the binop opcodes.
3709     KnownZero |= SrcZero;
3710     KnownUndef &= SrcUndef;
3711     KnownUndef &= ~KnownZero;
3712 
3713     // Attempt to avoid multi-use ops if we don't need anything from them.
3714     if (!DemandedElts.isAllOnes())
3715       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3716         return true;
3717     break;
3718   }
3719   case ISD::TRUNCATE:
3720   case ISD::SIGN_EXTEND:
3721   case ISD::ZERO_EXTEND:
3722     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3723                                    KnownZero, TLO, Depth + 1))
3724       return true;
3725 
3726     if (!DemandedElts.isAllOnes())
3727       if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3728               Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3729         return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3730 
3731     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3732       // zext(undef) upper bits are guaranteed to be zero.
3733       if (DemandedElts.isSubsetOf(KnownUndef))
3734         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3735       KnownUndef.clearAllBits();
3736     }
3737     break;
3738   case ISD::SINT_TO_FP:
3739   case ISD::UINT_TO_FP:
3740   case ISD::FP_TO_SINT:
3741   case ISD::FP_TO_UINT:
3742     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3743                                    KnownZero, TLO, Depth + 1))
3744       return true;
3745     // Don't fall through to generic undef -> undef handling.
3746     return false;
3747   default: {
3748     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3749       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3750                                                   KnownZero, TLO, Depth))
3751         return true;
3752     } else {
3753       KnownBits Known;
3754       APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3755       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3756                                TLO, Depth, AssumeSingleUse))
3757         return true;
3758     }
3759     break;
3760   }
3761   }
3762   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3763 
3764   // Constant fold all undef cases.
3765   // TODO: Handle zero cases as well.
3766   if (DemandedElts.isSubsetOf(KnownUndef))
3767     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3768 
3769   return false;
3770 }
3771 
3772 /// Determine which of the bits specified in Mask are known to be either zero or
3773 /// one and return them in the Known.
3774 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3775                                                    KnownBits &Known,
3776                                                    const APInt &DemandedElts,
3777                                                    const SelectionDAG &DAG,
3778                                                    unsigned Depth) const {
3779   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3780           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3781           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3782           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3783          "Should use MaskedValueIsZero if you don't know whether Op"
3784          " is a target node!");
3785   Known.resetAll();
3786 }
3787 
3788 void TargetLowering::computeKnownBitsForTargetInstr(
3789     GISelKnownBits &Analysis, Register R, KnownBits &Known,
3790     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3791     unsigned Depth) const {
3792   Known.resetAll();
3793 }
3794 
3795 void TargetLowering::computeKnownBitsForFrameIndex(
3796   const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3797   // The low bits are known zero if the pointer is aligned.
3798   Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3799 }
3800 
3801 Align TargetLowering::computeKnownAlignForTargetInstr(
3802   GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3803   unsigned Depth) const {
3804   return Align(1);
3805 }
3806 
3807 /// This method can be implemented by targets that want to expose additional
3808 /// information about sign bits to the DAG Combiner.
3809 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3810                                                          const APInt &,
3811                                                          const SelectionDAG &,
3812                                                          unsigned Depth) const {
3813   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3814           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3815           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3816           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3817          "Should use ComputeNumSignBits if you don't know whether Op"
3818          " is a target node!");
3819   return 1;
3820 }
3821 
3822 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3823   GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3824   const MachineRegisterInfo &MRI, unsigned Depth) const {
3825   return 1;
3826 }
3827 
3828 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3829     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3830     TargetLoweringOpt &TLO, unsigned Depth) const {
3831   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3832           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3833           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3834           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3835          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3836          " is a target node!");
3837   return false;
3838 }
3839 
3840 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3841     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3842     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3843   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3844           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3845           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3846           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3847          "Should use SimplifyDemandedBits if you don't know whether Op"
3848          " is a target node!");
3849   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3850   return false;
3851 }
3852 
3853 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3854     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3855     SelectionDAG &DAG, unsigned Depth) const {
3856   assert(
3857       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3858        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3859        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3860        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3861       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3862       " is a target node!");
3863   return SDValue();
3864 }
3865 
3866 SDValue
3867 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3868                                         SDValue N1, MutableArrayRef<int> Mask,
3869                                         SelectionDAG &DAG) const {
3870   bool LegalMask = isShuffleMaskLegal(Mask, VT);
3871   if (!LegalMask) {
3872     std::swap(N0, N1);
3873     ShuffleVectorSDNode::commuteMask(Mask);
3874     LegalMask = isShuffleMaskLegal(Mask, VT);
3875   }
3876 
3877   if (!LegalMask)
3878     return SDValue();
3879 
3880   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3881 }
3882 
3883 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3884   return nullptr;
3885 }
3886 
3887 bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3888     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3889     bool PoisonOnly, unsigned Depth) const {
3890   assert(
3891       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3892        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3893        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3894        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3895       "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3896       " is a target node!");
3897 
3898   // If Op can't create undef/poison and none of its operands are undef/poison
3899   // then Op is never undef/poison.
3900   return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3901                                               /*ConsiderFlags*/ true, Depth) &&
3902          all_of(Op->ops(), [&](SDValue V) {
3903            return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3904                                                        Depth + 1);
3905          });
3906 }
3907 
3908 bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3909     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3910     bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3911   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3912           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3913           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3914           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3915          "Should use canCreateUndefOrPoison if you don't know whether Op"
3916          " is a target node!");
3917   // Be conservative and return true.
3918   return true;
3919 }
3920 
3921 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3922                                                   const SelectionDAG &DAG,
3923                                                   bool SNaN,
3924                                                   unsigned Depth) const {
3925   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3926           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3927           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3928           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3929          "Should use isKnownNeverNaN if you don't know whether Op"
3930          " is a target node!");
3931   return false;
3932 }
3933 
3934 bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3935                                                const APInt &DemandedElts,
3936                                                APInt &UndefElts,
3937                                                const SelectionDAG &DAG,
3938                                                unsigned Depth) const {
3939   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3940           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3941           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3942           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3943          "Should use isSplatValue if you don't know whether Op"
3944          " is a target node!");
3945   return false;
3946 }
3947 
3948 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3949 // work with truncating build vectors and vectors with elements of less than
3950 // 8 bits.
3951 bool TargetLowering::isConstTrueVal(SDValue N) const {
3952   if (!N)
3953     return false;
3954 
3955   unsigned EltWidth;
3956   APInt CVal;
3957   if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3958                                                /*AllowTruncation=*/true)) {
3959     CVal = CN->getAPIntValue();
3960     EltWidth = N.getValueType().getScalarSizeInBits();
3961   } else
3962     return false;
3963 
3964   // If this is a truncating splat, truncate the splat value.
3965   // Otherwise, we may fail to match the expected values below.
3966   if (EltWidth < CVal.getBitWidth())
3967     CVal = CVal.trunc(EltWidth);
3968 
3969   switch (getBooleanContents(N.getValueType())) {
3970   case UndefinedBooleanContent:
3971     return CVal[0];
3972   case ZeroOrOneBooleanContent:
3973     return CVal.isOne();
3974   case ZeroOrNegativeOneBooleanContent:
3975     return CVal.isAllOnes();
3976   }
3977 
3978   llvm_unreachable("Invalid boolean contents");
3979 }
3980 
3981 bool TargetLowering::isConstFalseVal(SDValue N) const {
3982   if (!N)
3983     return false;
3984 
3985   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3986   if (!CN) {
3987     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3988     if (!BV)
3989       return false;
3990 
3991     // Only interested in constant splats, we don't care about undef
3992     // elements in identifying boolean constants and getConstantSplatNode
3993     // returns NULL if all ops are undef;
3994     CN = BV->getConstantSplatNode();
3995     if (!CN)
3996       return false;
3997   }
3998 
3999   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4000     return !CN->getAPIntValue()[0];
4001 
4002   return CN->isZero();
4003 }
4004 
4005 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
4006                                        bool SExt) const {
4007   if (VT == MVT::i1)
4008     return N->isOne();
4009 
4010   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
4011   switch (Cnt) {
4012   case TargetLowering::ZeroOrOneBooleanContent:
4013     // An extended value of 1 is always true, unless its original type is i1,
4014     // in which case it will be sign extended to -1.
4015     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4016   case TargetLowering::UndefinedBooleanContent:
4017   case TargetLowering::ZeroOrNegativeOneBooleanContent:
4018     return N->isAllOnes() && SExt;
4019   }
4020   llvm_unreachable("Unexpected enumeration.");
4021 }
4022 
4023 /// This helper function of SimplifySetCC tries to optimize the comparison when
4024 /// either operand of the SetCC node is a bitwise-and instruction.
4025 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4026                                          ISD::CondCode Cond, const SDLoc &DL,
4027                                          DAGCombinerInfo &DCI) const {
4028   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4029     std::swap(N0, N1);
4030 
4031   SelectionDAG &DAG = DCI.DAG;
4032   EVT OpVT = N0.getValueType();
4033   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4034       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4035     return SDValue();
4036 
4037   // (X & Y) != 0 --> zextOrTrunc(X & Y)
4038   // iff everything but LSB is known zero:
4039   if (Cond == ISD::SETNE && isNullConstant(N1) &&
4040       (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
4041        getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
4042     unsigned NumEltBits = OpVT.getScalarSizeInBits();
4043     APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4044     if (DAG.MaskedValueIsZero(N0, UpperBits))
4045       return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4046   }
4047 
4048   // Try to eliminate a power-of-2 mask constant by converting to a signbit
4049   // test in a narrow type that we can truncate to with no cost. Examples:
4050   // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4051   // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4052   // TODO: This conservatively checks for type legality on the source and
4053   //       destination types. That may inhibit optimizations, but it also
4054   //       allows setcc->shift transforms that may be more beneficial.
4055   auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4056   if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4057       isTypeLegal(OpVT) && N0.hasOneUse()) {
4058     EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4059                                      AndC->getAPIntValue().getActiveBits());
4060     if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4061       SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4062       SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4063       return DAG.getSetCC(DL, VT, Trunc, Zero,
4064                           Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4065     }
4066   }
4067 
4068   // Match these patterns in any of their permutations:
4069   // (X & Y) == Y
4070   // (X & Y) != Y
4071   SDValue X, Y;
4072   if (N0.getOperand(0) == N1) {
4073     X = N0.getOperand(1);
4074     Y = N0.getOperand(0);
4075   } else if (N0.getOperand(1) == N1) {
4076     X = N0.getOperand(0);
4077     Y = N0.getOperand(1);
4078   } else {
4079     return SDValue();
4080   }
4081 
4082   // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4083   // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4084   // its liable to create and infinite loop.
4085   SDValue Zero = DAG.getConstant(0, DL, OpVT);
4086   if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4087       DAG.isKnownToBeAPowerOfTwo(Y)) {
4088     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4089     // Note that where Y is variable and is known to have at most one bit set
4090     // (for example, if it is Z & 1) we cannot do this; the expressions are not
4091     // equivalent when Y == 0.
4092     assert(OpVT.isInteger());
4093     Cond = ISD::getSetCCInverse(Cond, OpVT);
4094     if (DCI.isBeforeLegalizeOps() ||
4095         isCondCodeLegal(Cond, N0.getSimpleValueType()))
4096       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4097   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4098     // If the target supports an 'and-not' or 'and-complement' logic operation,
4099     // try to use that to make a comparison operation more efficient.
4100     // But don't do this transform if the mask is a single bit because there are
4101     // more efficient ways to deal with that case (for example, 'bt' on x86 or
4102     // 'rlwinm' on PPC).
4103 
4104     // Bail out if the compare operand that we want to turn into a zero is
4105     // already a zero (otherwise, infinite loop).
4106     if (isNullConstant(Y))
4107       return SDValue();
4108 
4109     // Transform this into: ~X & Y == 0.
4110     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4111     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4112     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4113   }
4114 
4115   return SDValue();
4116 }
4117 
4118 /// There are multiple IR patterns that could be checking whether certain
4119 /// truncation of a signed number would be lossy or not. The pattern which is
4120 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
4121 /// We are looking for the following pattern: (KeptBits is a constant)
4122 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4123 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4124 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
4125 /// We will unfold it into the natural trunc+sext pattern:
4126 ///   ((%x << C) a>> C) dstcond %x
4127 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
4128 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4129     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4130     const SDLoc &DL) const {
4131   // We must be comparing with a constant.
4132   ConstantSDNode *C1;
4133   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4134     return SDValue();
4135 
4136   // N0 should be:  add %x, (1 << (KeptBits-1))
4137   if (N0->getOpcode() != ISD::ADD)
4138     return SDValue();
4139 
4140   // And we must be 'add'ing a constant.
4141   ConstantSDNode *C01;
4142   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4143     return SDValue();
4144 
4145   SDValue X = N0->getOperand(0);
4146   EVT XVT = X.getValueType();
4147 
4148   // Validate constants ...
4149 
4150   APInt I1 = C1->getAPIntValue();
4151 
4152   ISD::CondCode NewCond;
4153   if (Cond == ISD::CondCode::SETULT) {
4154     NewCond = ISD::CondCode::SETEQ;
4155   } else if (Cond == ISD::CondCode::SETULE) {
4156     NewCond = ISD::CondCode::SETEQ;
4157     // But need to 'canonicalize' the constant.
4158     I1 += 1;
4159   } else if (Cond == ISD::CondCode::SETUGT) {
4160     NewCond = ISD::CondCode::SETNE;
4161     // But need to 'canonicalize' the constant.
4162     I1 += 1;
4163   } else if (Cond == ISD::CondCode::SETUGE) {
4164     NewCond = ISD::CondCode::SETNE;
4165   } else
4166     return SDValue();
4167 
4168   APInt I01 = C01->getAPIntValue();
4169 
4170   auto checkConstants = [&I1, &I01]() -> bool {
4171     // Both of them must be power-of-two, and the constant from setcc is bigger.
4172     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4173   };
4174 
4175   if (checkConstants()) {
4176     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
4177   } else {
4178     // What if we invert constants? (and the target predicate)
4179     I1.negate();
4180     I01.negate();
4181     assert(XVT.isInteger());
4182     NewCond = getSetCCInverse(NewCond, XVT);
4183     if (!checkConstants())
4184       return SDValue();
4185     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
4186   }
4187 
4188   // They are power-of-two, so which bit is set?
4189   const unsigned KeptBits = I1.logBase2();
4190   const unsigned KeptBitsMinusOne = I01.logBase2();
4191 
4192   // Magic!
4193   if (KeptBits != (KeptBitsMinusOne + 1))
4194     return SDValue();
4195   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4196 
4197   // We don't want to do this in every single case.
4198   SelectionDAG &DAG = DCI.DAG;
4199   if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4200     return SDValue();
4201 
4202   // Unfold into:  sext_inreg(%x) cond %x
4203   // Where 'cond' will be either 'eq' or 'ne'.
4204   SDValue SExtInReg = DAG.getNode(
4205       ISD::SIGN_EXTEND_INREG, DL, XVT, X,
4206       DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4207   return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4208 }
4209 
4210 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4211 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4212     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4213     DAGCombinerInfo &DCI, const SDLoc &DL) const {
4214   assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4215          "Should be a comparison with 0.");
4216   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4217          "Valid only for [in]equality comparisons.");
4218 
4219   unsigned NewShiftOpcode;
4220   SDValue X, C, Y;
4221 
4222   SelectionDAG &DAG = DCI.DAG;
4223 
4224   // Look for '(C l>>/<< Y)'.
4225   auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4226     // The shift should be one-use.
4227     if (!V.hasOneUse())
4228       return false;
4229     unsigned OldShiftOpcode = V.getOpcode();
4230     switch (OldShiftOpcode) {
4231     case ISD::SHL:
4232       NewShiftOpcode = ISD::SRL;
4233       break;
4234     case ISD::SRL:
4235       NewShiftOpcode = ISD::SHL;
4236       break;
4237     default:
4238       return false; // must be a logical shift.
4239     }
4240     // We should be shifting a constant.
4241     // FIXME: best to use isConstantOrConstantVector().
4242     C = V.getOperand(0);
4243     ConstantSDNode *CC =
4244         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4245     if (!CC)
4246       return false;
4247     Y = V.getOperand(1);
4248 
4249     ConstantSDNode *XC =
4250         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4251     return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4252         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4253   };
4254 
4255   // LHS of comparison should be an one-use 'and'.
4256   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4257     return SDValue();
4258 
4259   X = N0.getOperand(0);
4260   SDValue Mask = N0.getOperand(1);
4261 
4262   // 'and' is commutative!
4263   if (!Match(Mask)) {
4264     std::swap(X, Mask);
4265     if (!Match(Mask))
4266       return SDValue();
4267   }
4268 
4269   EVT VT = X.getValueType();
4270 
4271   // Produce:
4272   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4273   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4274   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4275   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4276   return T2;
4277 }
4278 
4279 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4280 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4281 /// handle the commuted versions of these patterns.
4282 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4283                                            ISD::CondCode Cond, const SDLoc &DL,
4284                                            DAGCombinerInfo &DCI) const {
4285   unsigned BOpcode = N0.getOpcode();
4286   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4287          "Unexpected binop");
4288   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4289 
4290   // (X + Y) == X --> Y == 0
4291   // (X - Y) == X --> Y == 0
4292   // (X ^ Y) == X --> Y == 0
4293   SelectionDAG &DAG = DCI.DAG;
4294   EVT OpVT = N0.getValueType();
4295   SDValue X = N0.getOperand(0);
4296   SDValue Y = N0.getOperand(1);
4297   if (X == N1)
4298     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4299 
4300   if (Y != N1)
4301     return SDValue();
4302 
4303   // (X + Y) == Y --> X == 0
4304   // (X ^ Y) == Y --> X == 0
4305   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4306     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4307 
4308   // The shift would not be valid if the operands are boolean (i1).
4309   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4310     return SDValue();
4311 
4312   // (X - Y) == Y --> X == Y << 1
4313   SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4314   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4315   if (!DCI.isCalledByLegalizer())
4316     DCI.AddToWorklist(YShl1.getNode());
4317   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4318 }
4319 
4320 static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4321                                       SDValue N0, const APInt &C1,
4322                                       ISD::CondCode Cond, const SDLoc &dl,
4323                                       SelectionDAG &DAG) {
4324   // Look through truncs that don't change the value of a ctpop.
4325   // FIXME: Add vector support? Need to be careful with setcc result type below.
4326   SDValue CTPOP = N0;
4327   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4328       N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
4329     CTPOP = N0.getOperand(0);
4330 
4331   if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4332     return SDValue();
4333 
4334   EVT CTVT = CTPOP.getValueType();
4335   SDValue CTOp = CTPOP.getOperand(0);
4336 
4337   // Expand a power-of-2-or-zero comparison based on ctpop:
4338   // (ctpop x) u< 2 -> (x & x-1) == 0
4339   // (ctpop x) u> 1 -> (x & x-1) != 0
4340   if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4341     // Keep the CTPOP if it is a cheap vector op.
4342     if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4343       return SDValue();
4344 
4345     unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4346     if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4347       return SDValue();
4348     if (C1 == 0 && (Cond == ISD::SETULT))
4349       return SDValue(); // This is handled elsewhere.
4350 
4351     unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4352 
4353     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4354     SDValue Result = CTOp;
4355     for (unsigned i = 0; i < Passes; i++) {
4356       SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4357       Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4358     }
4359     ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4360     return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4361   }
4362 
4363   // Expand a power-of-2 comparison based on ctpop
4364   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4365     // Keep the CTPOP if it is cheap.
4366     if (TLI.isCtpopFast(CTVT))
4367       return SDValue();
4368 
4369     SDValue Zero = DAG.getConstant(0, dl, CTVT);
4370     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4371     assert(CTVT.isInteger());
4372     SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4373 
4374     // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4375     // check before emitting a potentially unnecessary op.
4376     if (DAG.isKnownNeverZero(CTOp)) {
4377       // (ctpop x) == 1 --> (x & x-1) == 0
4378       // (ctpop x) != 1 --> (x & x-1) != 0
4379       SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4380       SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4381       return RHS;
4382     }
4383 
4384     // (ctpop x) == 1 --> (x ^ x-1) >  x-1
4385     // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4386     SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4387     ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4388     return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4389   }
4390 
4391   return SDValue();
4392 }
4393 
4394 static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4395                                    ISD::CondCode Cond, const SDLoc &dl,
4396                                    SelectionDAG &DAG) {
4397   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4398     return SDValue();
4399 
4400   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4401   if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4402     return SDValue();
4403 
4404   auto getRotateSource = [](SDValue X) {
4405     if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4406       return X.getOperand(0);
4407     return SDValue();
4408   };
4409 
4410   // Peek through a rotated value compared against 0 or -1:
4411   // (rot X, Y) == 0/-1 --> X == 0/-1
4412   // (rot X, Y) != 0/-1 --> X != 0/-1
4413   if (SDValue R = getRotateSource(N0))
4414     return DAG.getSetCC(dl, VT, R, N1, Cond);
4415 
4416   // Peek through an 'or' of a rotated value compared against 0:
4417   // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4418   // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4419   //
4420   // TODO: Add the 'and' with -1 sibling.
4421   // TODO: Recurse through a series of 'or' ops to find the rotate.
4422   EVT OpVT = N0.getValueType();
4423   if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4424     if (SDValue R = getRotateSource(N0.getOperand(0))) {
4425       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4426       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4427     }
4428     if (SDValue R = getRotateSource(N0.getOperand(1))) {
4429       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4430       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4431     }
4432   }
4433 
4434   return SDValue();
4435 }
4436 
4437 static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4438                                         ISD::CondCode Cond, const SDLoc &dl,
4439                                         SelectionDAG &DAG) {
4440   // If we are testing for all-bits-clear, we might be able to do that with
4441   // less shifting since bit-order does not matter.
4442   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4443     return SDValue();
4444 
4445   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4446   if (!C1 || !C1->isZero())
4447     return SDValue();
4448 
4449   if (!N0.hasOneUse() ||
4450       (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4451     return SDValue();
4452 
4453   unsigned BitWidth = N0.getScalarValueSizeInBits();
4454   auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4455   if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4456     return SDValue();
4457 
4458   // Canonicalize fshr as fshl to reduce pattern-matching.
4459   unsigned ShAmt = ShAmtC->getZExtValue();
4460   if (N0.getOpcode() == ISD::FSHR)
4461     ShAmt = BitWidth - ShAmt;
4462 
4463   // Match an 'or' with a specific operand 'Other' in either commuted variant.
4464   SDValue X, Y;
4465   auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4466     if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4467       return false;
4468     if (Or.getOperand(0) == Other) {
4469       X = Or.getOperand(0);
4470       Y = Or.getOperand(1);
4471       return true;
4472     }
4473     if (Or.getOperand(1) == Other) {
4474       X = Or.getOperand(1);
4475       Y = Or.getOperand(0);
4476       return true;
4477     }
4478     return false;
4479   };
4480 
4481   EVT OpVT = N0.getValueType();
4482   EVT ShAmtVT = N0.getOperand(2).getValueType();
4483   SDValue F0 = N0.getOperand(0);
4484   SDValue F1 = N0.getOperand(1);
4485   if (matchOr(F0, F1)) {
4486     // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4487     SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4488     SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4489     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4490     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4491   }
4492   if (matchOr(F1, F0)) {
4493     // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4494     SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4495     SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4496     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4497     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4498   }
4499 
4500   return SDValue();
4501 }
4502 
4503 /// Try to simplify a setcc built with the specified operands and cc. If it is
4504 /// unable to simplify it, return a null SDValue.
4505 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4506                                       ISD::CondCode Cond, bool foldBooleans,
4507                                       DAGCombinerInfo &DCI,
4508                                       const SDLoc &dl) const {
4509   SelectionDAG &DAG = DCI.DAG;
4510   const DataLayout &Layout = DAG.getDataLayout();
4511   EVT OpVT = N0.getValueType();
4512   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4513 
4514   // Constant fold or commute setcc.
4515   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4516     return Fold;
4517 
4518   bool N0ConstOrSplat =
4519       isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4520   bool N1ConstOrSplat =
4521       isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4522 
4523   // Canonicalize toward having the constant on the RHS.
4524   // TODO: Handle non-splat vector constants. All undef causes trouble.
4525   // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4526   // infinite loop here when we encounter one.
4527   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
4528   if (N0ConstOrSplat && !N1ConstOrSplat &&
4529       (DCI.isBeforeLegalizeOps() ||
4530        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4531     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4532 
4533   // If we have a subtract with the same 2 non-constant operands as this setcc
4534   // -- but in reverse order -- then try to commute the operands of this setcc
4535   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4536   // instruction on some targets.
4537   if (!N0ConstOrSplat && !N1ConstOrSplat &&
4538       (DCI.isBeforeLegalizeOps() ||
4539        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4540       DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4541       !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4542     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4543 
4544   if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4545     return V;
4546 
4547   if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4548     return V;
4549 
4550   if (auto *N1C = isConstOrConstSplat(N1)) {
4551     const APInt &C1 = N1C->getAPIntValue();
4552 
4553     // Optimize some CTPOP cases.
4554     if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4555       return V;
4556 
4557     // For equality to 0 of a no-wrap multiply, decompose and test each op:
4558     // X * Y == 0 --> (X == 0) || (Y == 0)
4559     // X * Y != 0 --> (X != 0) && (Y != 0)
4560     // TODO: This bails out if minsize is set, but if the target doesn't have a
4561     //       single instruction multiply for this type, it would likely be
4562     //       smaller to decompose.
4563     if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4564         N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4565         (N0->getFlags().hasNoUnsignedWrap() ||
4566          N0->getFlags().hasNoSignedWrap()) &&
4567         !Attr.hasFnAttr(Attribute::MinSize)) {
4568       SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4569       SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4570       unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4571       return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4572     }
4573 
4574     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4575     // equality comparison, then we're just comparing whether X itself is
4576     // zero.
4577     if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4578         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4579         llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4580       if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4581         if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4582             ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4583           if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4584             // (srl (ctlz x), 5) == 0  -> X != 0
4585             // (srl (ctlz x), 5) != 1  -> X != 0
4586             Cond = ISD::SETNE;
4587           } else {
4588             // (srl (ctlz x), 5) != 0  -> X == 0
4589             // (srl (ctlz x), 5) == 1  -> X == 0
4590             Cond = ISD::SETEQ;
4591           }
4592           SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4593           return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4594                               Cond);
4595         }
4596       }
4597     }
4598   }
4599 
4600   // FIXME: Support vectors.
4601   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4602     const APInt &C1 = N1C->getAPIntValue();
4603 
4604     // (zext x) == C --> x == (trunc C)
4605     // (sext x) == C --> x == (trunc C)
4606     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4607         DCI.isBeforeLegalize() && N0->hasOneUse()) {
4608       unsigned MinBits = N0.getValueSizeInBits();
4609       SDValue PreExt;
4610       bool Signed = false;
4611       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4612         // ZExt
4613         MinBits = N0->getOperand(0).getValueSizeInBits();
4614         PreExt = N0->getOperand(0);
4615       } else if (N0->getOpcode() == ISD::AND) {
4616         // DAGCombine turns costly ZExts into ANDs
4617         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4618           if ((C->getAPIntValue()+1).isPowerOf2()) {
4619             MinBits = C->getAPIntValue().countr_one();
4620             PreExt = N0->getOperand(0);
4621           }
4622       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4623         // SExt
4624         MinBits = N0->getOperand(0).getValueSizeInBits();
4625         PreExt = N0->getOperand(0);
4626         Signed = true;
4627       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4628         // ZEXTLOAD / SEXTLOAD
4629         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4630           MinBits = LN0->getMemoryVT().getSizeInBits();
4631           PreExt = N0;
4632         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4633           Signed = true;
4634           MinBits = LN0->getMemoryVT().getSizeInBits();
4635           PreExt = N0;
4636         }
4637       }
4638 
4639       // Figure out how many bits we need to preserve this constant.
4640       unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4641 
4642       // Make sure we're not losing bits from the constant.
4643       if (MinBits > 0 &&
4644           MinBits < C1.getBitWidth() &&
4645           MinBits >= ReqdBits) {
4646         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4647         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4648           // Will get folded away.
4649           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4650           if (MinBits == 1 && C1 == 1)
4651             // Invert the condition.
4652             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4653                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4654           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4655           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4656         }
4657 
4658         // If truncating the setcc operands is not desirable, we can still
4659         // simplify the expression in some cases:
4660         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4661         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4662         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4663         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4664         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4665         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4666         SDValue TopSetCC = N0->getOperand(0);
4667         unsigned N0Opc = N0->getOpcode();
4668         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4669         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4670             TopSetCC.getOpcode() == ISD::SETCC &&
4671             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4672             (isConstFalseVal(N1) ||
4673              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4674 
4675           bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4676                          (!N1C->isZero() && Cond == ISD::SETNE);
4677 
4678           if (!Inverse)
4679             return TopSetCC;
4680 
4681           ISD::CondCode InvCond = ISD::getSetCCInverse(
4682               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4683               TopSetCC.getOperand(0).getValueType());
4684           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4685                                       TopSetCC.getOperand(1),
4686                                       InvCond);
4687         }
4688       }
4689     }
4690 
4691     // If the LHS is '(and load, const)', the RHS is 0, the test is for
4692     // equality or unsigned, and all 1 bits of the const are in the same
4693     // partial word, see if we can shorten the load.
4694     if (DCI.isBeforeLegalize() &&
4695         !ISD::isSignedIntSetCC(Cond) &&
4696         N0.getOpcode() == ISD::AND && C1 == 0 &&
4697         N0.getNode()->hasOneUse() &&
4698         isa<LoadSDNode>(N0.getOperand(0)) &&
4699         N0.getOperand(0).getNode()->hasOneUse() &&
4700         isa<ConstantSDNode>(N0.getOperand(1))) {
4701       auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4702       APInt bestMask;
4703       unsigned bestWidth = 0, bestOffset = 0;
4704       if (Lod->isSimple() && Lod->isUnindexed() &&
4705           (Lod->getMemoryVT().isByteSized() ||
4706            isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4707         unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4708         unsigned origWidth = N0.getValueSizeInBits();
4709         unsigned maskWidth = origWidth;
4710         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4711         // 8 bits, but have to be careful...
4712         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4713           origWidth = Lod->getMemoryVT().getSizeInBits();
4714         const APInt &Mask = N0.getConstantOperandAPInt(1);
4715         // Only consider power-of-2 widths (and at least one byte) as candiates
4716         // for the narrowed load.
4717         for (unsigned width = 8; width < origWidth; width *= 2) {
4718           EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4719           if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4720             continue;
4721           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4722           // Avoid accessing any padding here for now (we could use memWidth
4723           // instead of origWidth here otherwise).
4724           unsigned maxOffset = origWidth - width;
4725           for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4726             if (Mask.isSubsetOf(newMask)) {
4727               unsigned ptrOffset =
4728                   Layout.isLittleEndian() ? offset : memWidth - width - offset;
4729               unsigned IsFast = 0;
4730               Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4731               if (allowsMemoryAccess(
4732                       *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4733                       NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4734                   IsFast) {
4735                 bestOffset = ptrOffset / 8;
4736                 bestMask = Mask.lshr(offset);
4737                 bestWidth = width;
4738                 break;
4739               }
4740             }
4741             newMask <<= 8;
4742           }
4743           if (bestWidth)
4744             break;
4745         }
4746       }
4747       if (bestWidth) {
4748         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4749         SDValue Ptr = Lod->getBasePtr();
4750         if (bestOffset != 0)
4751           Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4752         SDValue NewLoad =
4753             DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4754                         Lod->getPointerInfo().getWithOffset(bestOffset),
4755                         Lod->getOriginalAlign());
4756         SDValue And =
4757             DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4758                         DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4759         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4760       }
4761     }
4762 
4763     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4764     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4765       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4766 
4767       // If the comparison constant has bits in the upper part, the
4768       // zero-extended value could never match.
4769       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4770                                               C1.getBitWidth() - InSize))) {
4771         switch (Cond) {
4772         case ISD::SETUGT:
4773         case ISD::SETUGE:
4774         case ISD::SETEQ:
4775           return DAG.getConstant(0, dl, VT);
4776         case ISD::SETULT:
4777         case ISD::SETULE:
4778         case ISD::SETNE:
4779           return DAG.getConstant(1, dl, VT);
4780         case ISD::SETGT:
4781         case ISD::SETGE:
4782           // True if the sign bit of C1 is set.
4783           return DAG.getConstant(C1.isNegative(), dl, VT);
4784         case ISD::SETLT:
4785         case ISD::SETLE:
4786           // True if the sign bit of C1 isn't set.
4787           return DAG.getConstant(C1.isNonNegative(), dl, VT);
4788         default:
4789           break;
4790         }
4791       }
4792 
4793       // Otherwise, we can perform the comparison with the low bits.
4794       switch (Cond) {
4795       case ISD::SETEQ:
4796       case ISD::SETNE:
4797       case ISD::SETUGT:
4798       case ISD::SETUGE:
4799       case ISD::SETULT:
4800       case ISD::SETULE: {
4801         EVT newVT = N0.getOperand(0).getValueType();
4802         // FIXME: Should use isNarrowingProfitable.
4803         if (DCI.isBeforeLegalizeOps() ||
4804             (isOperationLegal(ISD::SETCC, newVT) &&
4805              isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4806              isTypeDesirableForOp(ISD::SETCC, newVT))) {
4807           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4808           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4809 
4810           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4811                                           NewConst, Cond);
4812           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4813         }
4814         break;
4815       }
4816       default:
4817         break; // todo, be more careful with signed comparisons
4818       }
4819     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4820                (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4821                !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4822                                       OpVT)) {
4823       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4824       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4825       EVT ExtDstTy = N0.getValueType();
4826       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4827 
4828       // If the constant doesn't fit into the number of bits for the source of
4829       // the sign extension, it is impossible for both sides to be equal.
4830       if (C1.getSignificantBits() > ExtSrcTyBits)
4831         return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4832 
4833       assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4834              ExtDstTy != ExtSrcTy && "Unexpected types!");
4835       APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4836       SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4837                                    DAG.getConstant(Imm, dl, ExtDstTy));
4838       if (!DCI.isCalledByLegalizer())
4839         DCI.AddToWorklist(ZextOp.getNode());
4840       // Otherwise, make this a use of a zext.
4841       return DAG.getSetCC(dl, VT, ZextOp,
4842                           DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4843     } else if ((N1C->isZero() || N1C->isOne()) &&
4844                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4845       // SETCC (X), [0|1], [EQ|NE]  -> X if X is known 0/1. i1 types are
4846       // excluded as they are handled below whilst checking for foldBooleans.
4847       if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4848           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4849           (N0.getValueType() == MVT::i1 ||
4850            getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4851           DAG.MaskedValueIsZero(
4852               N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {
4853         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4854         if (TrueWhenTrue)
4855           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4856         // Invert the condition.
4857         if (N0.getOpcode() == ISD::SETCC) {
4858           ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4859           CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4860           if (DCI.isBeforeLegalizeOps() ||
4861               isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4862             return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4863         }
4864       }
4865 
4866       if ((N0.getOpcode() == ISD::XOR ||
4867            (N0.getOpcode() == ISD::AND &&
4868             N0.getOperand(0).getOpcode() == ISD::XOR &&
4869             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4870           isOneConstant(N0.getOperand(1))) {
4871         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
4872         // can only do this if the top bits are known zero.
4873         unsigned BitWidth = N0.getValueSizeInBits();
4874         if (DAG.MaskedValueIsZero(N0,
4875                                   APInt::getHighBitsSet(BitWidth,
4876                                                         BitWidth-1))) {
4877           // Okay, get the un-inverted input value.
4878           SDValue Val;
4879           if (N0.getOpcode() == ISD::XOR) {
4880             Val = N0.getOperand(0);
4881           } else {
4882             assert(N0.getOpcode() == ISD::AND &&
4883                     N0.getOperand(0).getOpcode() == ISD::XOR);
4884             // ((X^1)&1)^1 -> X & 1
4885             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4886                               N0.getOperand(0).getOperand(0),
4887                               N0.getOperand(1));
4888           }
4889 
4890           return DAG.getSetCC(dl, VT, Val, N1,
4891                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4892         }
4893       } else if (N1C->isOne()) {
4894         SDValue Op0 = N0;
4895         if (Op0.getOpcode() == ISD::TRUNCATE)
4896           Op0 = Op0.getOperand(0);
4897 
4898         if ((Op0.getOpcode() == ISD::XOR) &&
4899             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4900             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4901           SDValue XorLHS = Op0.getOperand(0);
4902           SDValue XorRHS = Op0.getOperand(1);
4903           // Ensure that the input setccs return an i1 type or 0/1 value.
4904           if (Op0.getValueType() == MVT::i1 ||
4905               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4906                       ZeroOrOneBooleanContent &&
4907                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4908                         ZeroOrOneBooleanContent)) {
4909             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4910             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4911             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4912           }
4913         }
4914         if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4915           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4916           if (Op0.getValueType().bitsGT(VT))
4917             Op0 = DAG.getNode(ISD::AND, dl, VT,
4918                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4919                           DAG.getConstant(1, dl, VT));
4920           else if (Op0.getValueType().bitsLT(VT))
4921             Op0 = DAG.getNode(ISD::AND, dl, VT,
4922                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4923                         DAG.getConstant(1, dl, VT));
4924 
4925           return DAG.getSetCC(dl, VT, Op0,
4926                               DAG.getConstant(0, dl, Op0.getValueType()),
4927                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4928         }
4929         if (Op0.getOpcode() == ISD::AssertZext &&
4930             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4931           return DAG.getSetCC(dl, VT, Op0,
4932                               DAG.getConstant(0, dl, Op0.getValueType()),
4933                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4934       }
4935     }
4936 
4937     // Given:
4938     //   icmp eq/ne (urem %x, %y), 0
4939     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4940     //   icmp eq/ne %x, 0
4941     if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4942         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4943       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4944       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4945       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4946         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4947     }
4948 
4949     // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4950     //  and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4951     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4952         N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4953         N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4954         N1C->isAllOnes()) {
4955       return DAG.getSetCC(dl, VT, N0.getOperand(0),
4956                           DAG.getConstant(0, dl, OpVT),
4957                           Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4958     }
4959 
4960     if (SDValue V =
4961             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4962       return V;
4963   }
4964 
4965   // These simplifications apply to splat vectors as well.
4966   // TODO: Handle more splat vector cases.
4967   if (auto *N1C = isConstOrConstSplat(N1)) {
4968     const APInt &C1 = N1C->getAPIntValue();
4969 
4970     APInt MinVal, MaxVal;
4971     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4972     if (ISD::isSignedIntSetCC(Cond)) {
4973       MinVal = APInt::getSignedMinValue(OperandBitSize);
4974       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4975     } else {
4976       MinVal = APInt::getMinValue(OperandBitSize);
4977       MaxVal = APInt::getMaxValue(OperandBitSize);
4978     }
4979 
4980     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4981     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4982       // X >= MIN --> true
4983       if (C1 == MinVal)
4984         return DAG.getBoolConstant(true, dl, VT, OpVT);
4985 
4986       if (!VT.isVector()) { // TODO: Support this for vectors.
4987         // X >= C0 --> X > (C0 - 1)
4988         APInt C = C1 - 1;
4989         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4990         if ((DCI.isBeforeLegalizeOps() ||
4991              isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
4992             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4993                                   isLegalICmpImmediate(C.getSExtValue())))) {
4994           return DAG.getSetCC(dl, VT, N0,
4995                               DAG.getConstant(C, dl, N1.getValueType()),
4996                               NewCC);
4997         }
4998       }
4999     }
5000 
5001     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5002       // X <= MAX --> true
5003       if (C1 == MaxVal)
5004         return DAG.getBoolConstant(true, dl, VT, OpVT);
5005 
5006       // X <= C0 --> X < (C0 + 1)
5007       if (!VT.isVector()) { // TODO: Support this for vectors.
5008         APInt C = C1 + 1;
5009         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
5010         if ((DCI.isBeforeLegalizeOps() ||
5011              isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5012             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5013                                   isLegalICmpImmediate(C.getSExtValue())))) {
5014           return DAG.getSetCC(dl, VT, N0,
5015                               DAG.getConstant(C, dl, N1.getValueType()),
5016                               NewCC);
5017         }
5018       }
5019     }
5020 
5021     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5022       if (C1 == MinVal)
5023         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5024 
5025       // TODO: Support this for vectors after legalize ops.
5026       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5027         // Canonicalize setlt X, Max --> setne X, Max
5028         if (C1 == MaxVal)
5029           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5030 
5031         // If we have setult X, 1, turn it into seteq X, 0
5032         if (C1 == MinVal+1)
5033           return DAG.getSetCC(dl, VT, N0,
5034                               DAG.getConstant(MinVal, dl, N0.getValueType()),
5035                               ISD::SETEQ);
5036       }
5037     }
5038 
5039     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5040       if (C1 == MaxVal)
5041         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5042 
5043       // TODO: Support this for vectors after legalize ops.
5044       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5045         // Canonicalize setgt X, Min --> setne X, Min
5046         if (C1 == MinVal)
5047           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5048 
5049         // If we have setugt X, Max-1, turn it into seteq X, Max
5050         if (C1 == MaxVal-1)
5051           return DAG.getSetCC(dl, VT, N0,
5052                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
5053                               ISD::SETEQ);
5054       }
5055     }
5056 
5057     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5058       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
5059       if (C1.isZero())
5060         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5061                 VT, N0, N1, Cond, DCI, dl))
5062           return CC;
5063 
5064       // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5065       // For example, when high 32-bits of i64 X are known clear:
5066       // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0
5067       // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1
5068       bool CmpZero = N1C->isZero();
5069       bool CmpNegOne = N1C->isAllOnes();
5070       if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5071         // Match or(lo,shl(hi,bw/2)) pattern.
5072         auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5073           unsigned EltBits = V.getScalarValueSizeInBits();
5074           if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5075             return false;
5076           SDValue LHS = V.getOperand(0);
5077           SDValue RHS = V.getOperand(1);
5078           APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5079           // Unshifted element must have zero upperbits.
5080           if (RHS.getOpcode() == ISD::SHL &&
5081               isa<ConstantSDNode>(RHS.getOperand(1)) &&
5082               RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5083               DAG.MaskedValueIsZero(LHS, HiBits)) {
5084             Lo = LHS;
5085             Hi = RHS.getOperand(0);
5086             return true;
5087           }
5088           if (LHS.getOpcode() == ISD::SHL &&
5089               isa<ConstantSDNode>(LHS.getOperand(1)) &&
5090               LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5091               DAG.MaskedValueIsZero(RHS, HiBits)) {
5092             Lo = RHS;
5093             Hi = LHS.getOperand(0);
5094             return true;
5095           }
5096           return false;
5097         };
5098 
5099         auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5100           unsigned EltBits = N0.getScalarValueSizeInBits();
5101           unsigned HalfBits = EltBits / 2;
5102           APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5103           SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5104           SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5105           SDValue NewN0 =
5106               DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5107           SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5108           return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5109         };
5110 
5111         SDValue Lo, Hi;
5112         if (IsConcat(N0, Lo, Hi))
5113           return MergeConcat(Lo, Hi);
5114 
5115         if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5116           SDValue Lo0, Lo1, Hi0, Hi1;
5117           if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5118               IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5119             return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5120                                DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5121           }
5122         }
5123       }
5124     }
5125 
5126     // If we have "setcc X, C0", check to see if we can shrink the immediate
5127     // by changing cc.
5128     // TODO: Support this for vectors after legalize ops.
5129     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5130       // SETUGT X, SINTMAX  -> SETLT X, 0
5131       // SETUGE X, SINTMIN -> SETLT X, 0
5132       if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5133           (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5134         return DAG.getSetCC(dl, VT, N0,
5135                             DAG.getConstant(0, dl, N1.getValueType()),
5136                             ISD::SETLT);
5137 
5138       // SETULT X, SINTMIN  -> SETGT X, -1
5139       // SETULE X, SINTMAX  -> SETGT X, -1
5140       if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5141           (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5142         return DAG.getSetCC(dl, VT, N0,
5143                             DAG.getAllOnesConstant(dl, N1.getValueType()),
5144                             ISD::SETGT);
5145     }
5146   }
5147 
5148   // Back to non-vector simplifications.
5149   // TODO: Can we do these for vector splats?
5150   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5151     const APInt &C1 = N1C->getAPIntValue();
5152     EVT ShValTy = N0.getValueType();
5153 
5154     // Fold bit comparisons when we can. This will result in an
5155     // incorrect value when boolean false is negative one, unless
5156     // the bitsize is 1 in which case the false value is the same
5157     // in practice regardless of the representation.
5158     if ((VT.getSizeInBits() == 1 ||
5159          getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
5160         (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5161         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5162         N0.getOpcode() == ISD::AND) {
5163       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5164         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
5165           // Perform the xform if the AND RHS is a single bit.
5166           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5167           if (AndRHS->getAPIntValue().isPowerOf2() &&
5168               !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5169             return DAG.getNode(
5170                 ISD::TRUNCATE, dl, VT,
5171                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5172                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5173           }
5174         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5175           // (X & 8) == 8  -->  (X & 8) >> 3
5176           // Perform the xform if C1 is a single bit.
5177           unsigned ShCt = C1.logBase2();
5178           if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5179             return DAG.getNode(
5180                 ISD::TRUNCATE, dl, VT,
5181                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5182                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5183           }
5184         }
5185       }
5186     }
5187 
5188     if (C1.getSignificantBits() <= 64 &&
5189         !isLegalICmpImmediate(C1.getSExtValue())) {
5190       // (X & -256) == 256 -> (X >> 8) == 1
5191       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5192           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5193         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5194           const APInt &AndRHSC = AndRHS->getAPIntValue();
5195           if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5196             unsigned ShiftBits = AndRHSC.countr_zero();
5197             if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5198               SDValue Shift = DAG.getNode(
5199                   ISD::SRL, dl, ShValTy, N0.getOperand(0),
5200                   DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5201               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5202               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5203             }
5204           }
5205         }
5206       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5207                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5208         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5209         // X <  0x100000000 -> (X >> 32) <  1
5210         // X >= 0x100000000 -> (X >> 32) >= 1
5211         // X <= 0x0ffffffff -> (X >> 32) <  1
5212         // X >  0x0ffffffff -> (X >> 32) >= 1
5213         unsigned ShiftBits;
5214         APInt NewC = C1;
5215         ISD::CondCode NewCond = Cond;
5216         if (AdjOne) {
5217           ShiftBits = C1.countr_one();
5218           NewC = NewC + 1;
5219           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5220         } else {
5221           ShiftBits = C1.countr_zero();
5222         }
5223         NewC.lshrInPlace(ShiftBits);
5224         if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5225             isLegalICmpImmediate(NewC.getSExtValue()) &&
5226             !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5227           SDValue Shift =
5228               DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5229                           DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5230           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5231           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5232         }
5233       }
5234     }
5235   }
5236 
5237   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5238     auto *CFP = cast<ConstantFPSDNode>(N1);
5239     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5240 
5241     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
5242     // constant if knowing that the operand is non-nan is enough.  We prefer to
5243     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5244     // materialize 0.0.
5245     if (Cond == ISD::SETO || Cond == ISD::SETUO)
5246       return DAG.getSetCC(dl, VT, N0, N0, Cond);
5247 
5248     // setcc (fneg x), C -> setcc swap(pred) x, -C
5249     if (N0.getOpcode() == ISD::FNEG) {
5250       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
5251       if (DCI.isBeforeLegalizeOps() ||
5252           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5253         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5254         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5255       }
5256     }
5257 
5258     // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5259     if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
5260         !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5261       bool IsFabs = N0.getOpcode() == ISD::FABS;
5262       SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5263       if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5264         FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5265                                              : (IsFabs ? fcInf : fcPosInf);
5266         if (Cond == ISD::SETUEQ)
5267           Flag |= fcNan;
5268         return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5269                            DAG.getTargetConstant(Flag, dl, MVT::i32));
5270       }
5271     }
5272 
5273     // If the condition is not legal, see if we can find an equivalent one
5274     // which is legal.
5275     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
5276       // If the comparison was an awkward floating-point == or != and one of
5277       // the comparison operands is infinity or negative infinity, convert the
5278       // condition to a less-awkward <= or >=.
5279       if (CFP->getValueAPF().isInfinity()) {
5280         bool IsNegInf = CFP->getValueAPF().isNegative();
5281         ISD::CondCode NewCond = ISD::SETCC_INVALID;
5282         switch (Cond) {
5283         case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5284         case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5285         case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5286         case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5287         default: break;
5288         }
5289         if (NewCond != ISD::SETCC_INVALID &&
5290             isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5291           return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5292       }
5293     }
5294   }
5295 
5296   if (N0 == N1) {
5297     // The sext(setcc()) => setcc() optimization relies on the appropriate
5298     // constant being emitted.
5299     assert(!N0.getValueType().isInteger() &&
5300            "Integer types should be handled by FoldSetCC");
5301 
5302     bool EqTrue = ISD::isTrueWhenEqual(Cond);
5303     unsigned UOF = ISD::getUnorderedFlavor(Cond);
5304     if (UOF == 2) // FP operators that are undefined on NaNs.
5305       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5306     if (UOF == unsigned(EqTrue))
5307       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5308     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
5309     // if it is not already.
5310     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5311     if (NewCond != Cond &&
5312         (DCI.isBeforeLegalizeOps() ||
5313                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5314       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5315   }
5316 
5317   // ~X > ~Y --> Y > X
5318   // ~X < ~Y --> Y < X
5319   // ~X < C --> X > ~C
5320   // ~X > C --> X < ~C
5321   if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5322       N0.getValueType().isInteger()) {
5323     if (isBitwiseNot(N0)) {
5324       if (isBitwiseNot(N1))
5325         return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5326 
5327       if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
5328           !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
5329         SDValue Not = DAG.getNOT(dl, N1, OpVT);
5330         return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5331       }
5332     }
5333   }
5334 
5335   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5336       N0.getValueType().isInteger()) {
5337     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5338         N0.getOpcode() == ISD::XOR) {
5339       // Simplify (X+Y) == (X+Z) -->  Y == Z
5340       if (N0.getOpcode() == N1.getOpcode()) {
5341         if (N0.getOperand(0) == N1.getOperand(0))
5342           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5343         if (N0.getOperand(1) == N1.getOperand(1))
5344           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5345         if (isCommutativeBinOp(N0.getOpcode())) {
5346           // If X op Y == Y op X, try other combinations.
5347           if (N0.getOperand(0) == N1.getOperand(1))
5348             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5349                                 Cond);
5350           if (N0.getOperand(1) == N1.getOperand(0))
5351             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5352                                 Cond);
5353         }
5354       }
5355 
5356       // If RHS is a legal immediate value for a compare instruction, we need
5357       // to be careful about increasing register pressure needlessly.
5358       bool LegalRHSImm = false;
5359 
5360       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5361         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5362           // Turn (X+C1) == C2 --> X == C2-C1
5363           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5364             return DAG.getSetCC(
5365                 dl, VT, N0.getOperand(0),
5366                 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5367                                 dl, N0.getValueType()),
5368                 Cond);
5369 
5370           // Turn (X^C1) == C2 --> X == C1^C2
5371           if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5372             return DAG.getSetCC(
5373                 dl, VT, N0.getOperand(0),
5374                 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5375                                 dl, N0.getValueType()),
5376                 Cond);
5377         }
5378 
5379         // Turn (C1-X) == C2 --> X == C1-C2
5380         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5381           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5382             return DAG.getSetCC(
5383                 dl, VT, N0.getOperand(1),
5384                 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5385                                 dl, N0.getValueType()),
5386                 Cond);
5387 
5388         // Could RHSC fold directly into a compare?
5389         if (RHSC->getValueType(0).getSizeInBits() <= 64)
5390           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5391       }
5392 
5393       // (X+Y) == X --> Y == 0 and similar folds.
5394       // Don't do this if X is an immediate that can fold into a cmp
5395       // instruction and X+Y has other uses. It could be an induction variable
5396       // chain, and the transform would increase register pressure.
5397       if (!LegalRHSImm || N0.hasOneUse())
5398         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5399           return V;
5400     }
5401 
5402     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5403         N1.getOpcode() == ISD::XOR)
5404       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5405         return V;
5406 
5407     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5408       return V;
5409   }
5410 
5411   // Fold remainder of division by a constant.
5412   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5413       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5414     // When division is cheap or optimizing for minimum size,
5415     // fall through to DIVREM creation by skipping this fold.
5416     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5417       if (N0.getOpcode() == ISD::UREM) {
5418         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5419           return Folded;
5420       } else if (N0.getOpcode() == ISD::SREM) {
5421         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5422           return Folded;
5423       }
5424     }
5425   }
5426 
5427   // Fold away ALL boolean setcc's.
5428   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5429     SDValue Temp;
5430     switch (Cond) {
5431     default: llvm_unreachable("Unknown integer setcc!");
5432     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
5433       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5434       N0 = DAG.getNOT(dl, Temp, OpVT);
5435       if (!DCI.isCalledByLegalizer())
5436         DCI.AddToWorklist(Temp.getNode());
5437       break;
5438     case ISD::SETNE:  // X != Y   -->  (X^Y)
5439       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5440       break;
5441     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
5442     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
5443       Temp = DAG.getNOT(dl, N0, OpVT);
5444       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5445       if (!DCI.isCalledByLegalizer())
5446         DCI.AddToWorklist(Temp.getNode());
5447       break;
5448     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
5449     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
5450       Temp = DAG.getNOT(dl, N1, OpVT);
5451       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5452       if (!DCI.isCalledByLegalizer())
5453         DCI.AddToWorklist(Temp.getNode());
5454       break;
5455     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
5456     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
5457       Temp = DAG.getNOT(dl, N0, OpVT);
5458       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5459       if (!DCI.isCalledByLegalizer())
5460         DCI.AddToWorklist(Temp.getNode());
5461       break;
5462     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
5463     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
5464       Temp = DAG.getNOT(dl, N1, OpVT);
5465       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5466       break;
5467     }
5468     if (VT.getScalarType() != MVT::i1) {
5469       if (!DCI.isCalledByLegalizer())
5470         DCI.AddToWorklist(N0.getNode());
5471       // FIXME: If running after legalize, we probably can't do this.
5472       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
5473       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5474     }
5475     return N0;
5476   }
5477 
5478   // Could not fold it.
5479   return SDValue();
5480 }
5481 
5482 /// Returns true (and the GlobalValue and the offset) if the node is a
5483 /// GlobalAddress + offset.
5484 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5485                                     int64_t &Offset) const {
5486 
5487   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5488 
5489   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5490     GA = GASD->getGlobal();
5491     Offset += GASD->getOffset();
5492     return true;
5493   }
5494 
5495   if (N->getOpcode() == ISD::ADD) {
5496     SDValue N1 = N->getOperand(0);
5497     SDValue N2 = N->getOperand(1);
5498     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5499       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5500         Offset += V->getSExtValue();
5501         return true;
5502       }
5503     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5504       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5505         Offset += V->getSExtValue();
5506         return true;
5507       }
5508     }
5509   }
5510 
5511   return false;
5512 }
5513 
5514 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5515                                           DAGCombinerInfo &DCI) const {
5516   // Default implementation: no optimization.
5517   return SDValue();
5518 }
5519 
5520 //===----------------------------------------------------------------------===//
5521 //  Inline Assembler Implementation Methods
5522 //===----------------------------------------------------------------------===//
5523 
5524 TargetLowering::ConstraintType
5525 TargetLowering::getConstraintType(StringRef Constraint) const {
5526   unsigned S = Constraint.size();
5527 
5528   if (S == 1) {
5529     switch (Constraint[0]) {
5530     default: break;
5531     case 'r':
5532       return C_RegisterClass;
5533     case 'm': // memory
5534     case 'o': // offsetable
5535     case 'V': // not offsetable
5536       return C_Memory;
5537     case 'p': // Address.
5538       return C_Address;
5539     case 'n': // Simple Integer
5540     case 'E': // Floating Point Constant
5541     case 'F': // Floating Point Constant
5542       return C_Immediate;
5543     case 'i': // Simple Integer or Relocatable Constant
5544     case 's': // Relocatable Constant
5545     case 'X': // Allow ANY value.
5546     case 'I': // Target registers.
5547     case 'J':
5548     case 'K':
5549     case 'L':
5550     case 'M':
5551     case 'N':
5552     case 'O':
5553     case 'P':
5554     case '<':
5555     case '>':
5556       return C_Other;
5557     }
5558   }
5559 
5560   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5561     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5562       return C_Memory;
5563     return C_Register;
5564   }
5565   return C_Unknown;
5566 }
5567 
5568 /// Try to replace an X constraint, which matches anything, with another that
5569 /// has more specific requirements based on the type of the corresponding
5570 /// operand.
5571 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5572   if (ConstraintVT.isInteger())
5573     return "r";
5574   if (ConstraintVT.isFloatingPoint())
5575     return "f"; // works for many targets
5576   return nullptr;
5577 }
5578 
5579 SDValue TargetLowering::LowerAsmOutputForConstraint(
5580     SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5581     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5582   return SDValue();
5583 }
5584 
5585 /// Lower the specified operand into the Ops vector.
5586 /// If it is invalid, don't add anything to Ops.
5587 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5588                                                   StringRef Constraint,
5589                                                   std::vector<SDValue> &Ops,
5590                                                   SelectionDAG &DAG) const {
5591 
5592   if (Constraint.size() > 1)
5593     return;
5594 
5595   char ConstraintLetter = Constraint[0];
5596   switch (ConstraintLetter) {
5597   default: break;
5598   case 'X':    // Allows any operand
5599   case 'i':    // Simple Integer or Relocatable Constant
5600   case 'n':    // Simple Integer
5601   case 's': {  // Relocatable Constant
5602 
5603     ConstantSDNode *C;
5604     uint64_t Offset = 0;
5605 
5606     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5607     // etc., since getelementpointer is variadic. We can't use
5608     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5609     // while in this case the GA may be furthest from the root node which is
5610     // likely an ISD::ADD.
5611     while (true) {
5612       if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5613         // gcc prints these as sign extended.  Sign extend value to 64 bits
5614         // now; without this it would get ZExt'd later in
5615         // ScheduleDAGSDNodes::EmitNode, which is very generic.
5616         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5617         BooleanContent BCont = getBooleanContents(MVT::i64);
5618         ISD::NodeType ExtOpc =
5619             IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5620         int64_t ExtVal =
5621             ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5622         Ops.push_back(
5623             DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5624         return;
5625       }
5626       if (ConstraintLetter != 'n') {
5627         if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5628           Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5629                                                    GA->getValueType(0),
5630                                                    Offset + GA->getOffset()));
5631           return;
5632         }
5633         if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5634           Ops.push_back(DAG.getTargetBlockAddress(
5635               BA->getBlockAddress(), BA->getValueType(0),
5636               Offset + BA->getOffset(), BA->getTargetFlags()));
5637           return;
5638         }
5639         if (isa<BasicBlockSDNode>(Op)) {
5640           Ops.push_back(Op);
5641           return;
5642         }
5643       }
5644       const unsigned OpCode = Op.getOpcode();
5645       if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5646         if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5647           Op = Op.getOperand(1);
5648         // Subtraction is not commutative.
5649         else if (OpCode == ISD::ADD &&
5650                  (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5651           Op = Op.getOperand(0);
5652         else
5653           return;
5654         Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5655         continue;
5656       }
5657       return;
5658     }
5659     break;
5660   }
5661   }
5662 }
5663 
5664 void TargetLowering::CollectTargetIntrinsicOperands(
5665     const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5666 }
5667 
5668 std::pair<unsigned, const TargetRegisterClass *>
5669 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5670                                              StringRef Constraint,
5671                                              MVT VT) const {
5672   if (!Constraint.starts_with("{"))
5673     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5674   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5675 
5676   // Remove the braces from around the name.
5677   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5678 
5679   std::pair<unsigned, const TargetRegisterClass *> R =
5680       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5681 
5682   // Figure out which register class contains this reg.
5683   for (const TargetRegisterClass *RC : RI->regclasses()) {
5684     // If none of the value types for this register class are valid, we
5685     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5686     if (!isLegalRC(*RI, *RC))
5687       continue;
5688 
5689     for (const MCPhysReg &PR : *RC) {
5690       if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5691         std::pair<unsigned, const TargetRegisterClass *> S =
5692             std::make_pair(PR, RC);
5693 
5694         // If this register class has the requested value type, return it,
5695         // otherwise keep searching and return the first class found
5696         // if no other is found which explicitly has the requested type.
5697         if (RI->isTypeLegalForClass(*RC, VT))
5698           return S;
5699         if (!R.second)
5700           R = S;
5701       }
5702     }
5703   }
5704 
5705   return R;
5706 }
5707 
5708 //===----------------------------------------------------------------------===//
5709 // Constraint Selection.
5710 
5711 /// Return true of this is an input operand that is a matching constraint like
5712 /// "4".
5713 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5714   assert(!ConstraintCode.empty() && "No known constraint!");
5715   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5716 }
5717 
5718 /// If this is an input matching constraint, this method returns the output
5719 /// operand it matches.
5720 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5721   assert(!ConstraintCode.empty() && "No known constraint!");
5722   return atoi(ConstraintCode.c_str());
5723 }
5724 
5725 /// Split up the constraint string from the inline assembly value into the
5726 /// specific constraints and their prefixes, and also tie in the associated
5727 /// operand values.
5728 /// If this returns an empty vector, and if the constraint string itself
5729 /// isn't empty, there was an error parsing.
5730 TargetLowering::AsmOperandInfoVector
5731 TargetLowering::ParseConstraints(const DataLayout &DL,
5732                                  const TargetRegisterInfo *TRI,
5733                                  const CallBase &Call) const {
5734   /// Information about all of the constraints.
5735   AsmOperandInfoVector ConstraintOperands;
5736   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5737   unsigned maCount = 0; // Largest number of multiple alternative constraints.
5738 
5739   // Do a prepass over the constraints, canonicalizing them, and building up the
5740   // ConstraintOperands list.
5741   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5742   unsigned ResNo = 0; // ResNo - The result number of the next output.
5743   unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5744 
5745   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5746     ConstraintOperands.emplace_back(std::move(CI));
5747     AsmOperandInfo &OpInfo = ConstraintOperands.back();
5748 
5749     // Update multiple alternative constraint count.
5750     if (OpInfo.multipleAlternatives.size() > maCount)
5751       maCount = OpInfo.multipleAlternatives.size();
5752 
5753     OpInfo.ConstraintVT = MVT::Other;
5754 
5755     // Compute the value type for each operand.
5756     switch (OpInfo.Type) {
5757     case InlineAsm::isOutput:
5758       // Indirect outputs just consume an argument.
5759       if (OpInfo.isIndirect) {
5760         OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5761         break;
5762       }
5763 
5764       // The return value of the call is this value.  As such, there is no
5765       // corresponding argument.
5766       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5767       if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5768         OpInfo.ConstraintVT =
5769             getAsmOperandValueType(DL, STy->getElementType(ResNo))
5770                 .getSimpleVT();
5771       } else {
5772         assert(ResNo == 0 && "Asm only has one result!");
5773         OpInfo.ConstraintVT =
5774             getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5775       }
5776       ++ResNo;
5777       break;
5778     case InlineAsm::isInput:
5779       OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5780       break;
5781     case InlineAsm::isLabel:
5782       OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5783       ++LabelNo;
5784       continue;
5785     case InlineAsm::isClobber:
5786       // Nothing to do.
5787       break;
5788     }
5789 
5790     if (OpInfo.CallOperandVal) {
5791       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5792       if (OpInfo.isIndirect) {
5793         OpTy = Call.getParamElementType(ArgNo);
5794         assert(OpTy && "Indirect operand must have elementtype attribute");
5795       }
5796 
5797       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5798       if (StructType *STy = dyn_cast<StructType>(OpTy))
5799         if (STy->getNumElements() == 1)
5800           OpTy = STy->getElementType(0);
5801 
5802       // If OpTy is not a single value, it may be a struct/union that we
5803       // can tile with integers.
5804       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5805         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5806         switch (BitSize) {
5807         default: break;
5808         case 1:
5809         case 8:
5810         case 16:
5811         case 32:
5812         case 64:
5813         case 128:
5814           OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5815           break;
5816         }
5817       }
5818 
5819       EVT VT = getAsmOperandValueType(DL, OpTy, true);
5820       OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5821       ArgNo++;
5822     }
5823   }
5824 
5825   // If we have multiple alternative constraints, select the best alternative.
5826   if (!ConstraintOperands.empty()) {
5827     if (maCount) {
5828       unsigned bestMAIndex = 0;
5829       int bestWeight = -1;
5830       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
5831       int weight = -1;
5832       unsigned maIndex;
5833       // Compute the sums of the weights for each alternative, keeping track
5834       // of the best (highest weight) one so far.
5835       for (maIndex = 0; maIndex < maCount; ++maIndex) {
5836         int weightSum = 0;
5837         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5838              cIndex != eIndex; ++cIndex) {
5839           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5840           if (OpInfo.Type == InlineAsm::isClobber)
5841             continue;
5842 
5843           // If this is an output operand with a matching input operand,
5844           // look up the matching input. If their types mismatch, e.g. one
5845           // is an integer, the other is floating point, or their sizes are
5846           // different, flag it as an maCantMatch.
5847           if (OpInfo.hasMatchingInput()) {
5848             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5849             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5850               if ((OpInfo.ConstraintVT.isInteger() !=
5851                    Input.ConstraintVT.isInteger()) ||
5852                   (OpInfo.ConstraintVT.getSizeInBits() !=
5853                    Input.ConstraintVT.getSizeInBits())) {
5854                 weightSum = -1; // Can't match.
5855                 break;
5856               }
5857             }
5858           }
5859           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5860           if (weight == -1) {
5861             weightSum = -1;
5862             break;
5863           }
5864           weightSum += weight;
5865         }
5866         // Update best.
5867         if (weightSum > bestWeight) {
5868           bestWeight = weightSum;
5869           bestMAIndex = maIndex;
5870         }
5871       }
5872 
5873       // Now select chosen alternative in each constraint.
5874       for (AsmOperandInfo &cInfo : ConstraintOperands)
5875         if (cInfo.Type != InlineAsm::isClobber)
5876           cInfo.selectAlternative(bestMAIndex);
5877     }
5878   }
5879 
5880   // Check and hook up tied operands, choose constraint code to use.
5881   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5882        cIndex != eIndex; ++cIndex) {
5883     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5884 
5885     // If this is an output operand with a matching input operand, look up the
5886     // matching input. If their types mismatch, e.g. one is an integer, the
5887     // other is floating point, or their sizes are different, flag it as an
5888     // error.
5889     if (OpInfo.hasMatchingInput()) {
5890       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5891 
5892       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5893         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5894             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5895                                          OpInfo.ConstraintVT);
5896         std::pair<unsigned, const TargetRegisterClass *> InputRC =
5897             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5898                                          Input.ConstraintVT);
5899         const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
5900                                     OpInfo.ConstraintVT.isFloatingPoint();
5901         const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
5902                                    Input.ConstraintVT.isFloatingPoint();
5903         if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
5904             (MatchRC.second != InputRC.second)) {
5905           report_fatal_error("Unsupported asm: input constraint"
5906                              " with a matching output constraint of"
5907                              " incompatible type!");
5908         }
5909       }
5910     }
5911   }
5912 
5913   return ConstraintOperands;
5914 }
5915 
5916 /// Return a number indicating our preference for chosing a type of constraint
5917 /// over another, for the purpose of sorting them. Immediates are almost always
5918 /// preferrable (when they can be emitted). A higher return value means a
5919 /// stronger preference for one constraint type relative to another.
5920 /// FIXME: We should prefer registers over memory but doing so may lead to
5921 /// unrecoverable register exhaustion later.
5922 /// https://github.com/llvm/llvm-project/issues/20571
5923 static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5924   switch (CT) {
5925   case TargetLowering::C_Immediate:
5926   case TargetLowering::C_Other:
5927     return 4;
5928   case TargetLowering::C_Memory:
5929   case TargetLowering::C_Address:
5930     return 3;
5931   case TargetLowering::C_RegisterClass:
5932     return 2;
5933   case TargetLowering::C_Register:
5934     return 1;
5935   case TargetLowering::C_Unknown:
5936     return 0;
5937   }
5938   llvm_unreachable("Invalid constraint type");
5939 }
5940 
5941 /// Examine constraint type and operand type and determine a weight value.
5942 /// This object must already have been set up with the operand type
5943 /// and the current alternative constraint selected.
5944 TargetLowering::ConstraintWeight
5945   TargetLowering::getMultipleConstraintMatchWeight(
5946     AsmOperandInfo &info, int maIndex) const {
5947   InlineAsm::ConstraintCodeVector *rCodes;
5948   if (maIndex >= (int)info.multipleAlternatives.size())
5949     rCodes = &info.Codes;
5950   else
5951     rCodes = &info.multipleAlternatives[maIndex].Codes;
5952   ConstraintWeight BestWeight = CW_Invalid;
5953 
5954   // Loop over the options, keeping track of the most general one.
5955   for (const std::string &rCode : *rCodes) {
5956     ConstraintWeight weight =
5957         getSingleConstraintMatchWeight(info, rCode.c_str());
5958     if (weight > BestWeight)
5959       BestWeight = weight;
5960   }
5961 
5962   return BestWeight;
5963 }
5964 
5965 /// Examine constraint type and operand type and determine a weight value.
5966 /// This object must already have been set up with the operand type
5967 /// and the current alternative constraint selected.
5968 TargetLowering::ConstraintWeight
5969   TargetLowering::getSingleConstraintMatchWeight(
5970     AsmOperandInfo &info, const char *constraint) const {
5971   ConstraintWeight weight = CW_Invalid;
5972   Value *CallOperandVal = info.CallOperandVal;
5973     // If we don't have a value, we can't do a match,
5974     // but allow it at the lowest weight.
5975   if (!CallOperandVal)
5976     return CW_Default;
5977   // Look at the constraint type.
5978   switch (*constraint) {
5979     case 'i': // immediate integer.
5980     case 'n': // immediate integer with a known value.
5981       if (isa<ConstantInt>(CallOperandVal))
5982         weight = CW_Constant;
5983       break;
5984     case 's': // non-explicit intregal immediate.
5985       if (isa<GlobalValue>(CallOperandVal))
5986         weight = CW_Constant;
5987       break;
5988     case 'E': // immediate float if host format.
5989     case 'F': // immediate float.
5990       if (isa<ConstantFP>(CallOperandVal))
5991         weight = CW_Constant;
5992       break;
5993     case '<': // memory operand with autodecrement.
5994     case '>': // memory operand with autoincrement.
5995     case 'm': // memory operand.
5996     case 'o': // offsettable memory operand
5997     case 'V': // non-offsettable memory operand
5998       weight = CW_Memory;
5999       break;
6000     case 'r': // general register.
6001     case 'g': // general register, memory operand or immediate integer.
6002               // note: Clang converts "g" to "imr".
6003       if (CallOperandVal->getType()->isIntegerTy())
6004         weight = CW_Register;
6005       break;
6006     case 'X': // any operand.
6007   default:
6008     weight = CW_Default;
6009     break;
6010   }
6011   return weight;
6012 }
6013 
6014 /// If there are multiple different constraints that we could pick for this
6015 /// operand (e.g. "imr") try to pick the 'best' one.
6016 /// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6017 /// into seven classes:
6018 ///    Register      -> one specific register
6019 ///    RegisterClass -> a group of regs
6020 ///    Memory        -> memory
6021 ///    Address       -> a symbolic memory reference
6022 ///    Immediate     -> immediate values
6023 ///    Other         -> magic values (such as "Flag Output Operands")
6024 ///    Unknown       -> something we don't recognize yet and can't handle
6025 /// Ideally, we would pick the most specific constraint possible: if we have
6026 /// something that fits into a register, we would pick it.  The problem here
6027 /// is that if we have something that could either be in a register or in
6028 /// memory that use of the register could cause selection of *other*
6029 /// operands to fail: they might only succeed if we pick memory.  Because of
6030 /// this the heuristic we use is:
6031 ///
6032 ///  1) If there is an 'other' constraint, and if the operand is valid for
6033 ///     that constraint, use it.  This makes us take advantage of 'i'
6034 ///     constraints when available.
6035 ///  2) Otherwise, pick the most general constraint present.  This prefers
6036 ///     'm' over 'r', for example.
6037 ///
6038 TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
6039     TargetLowering::AsmOperandInfo &OpInfo) const {
6040   ConstraintGroup Ret;
6041 
6042   Ret.reserve(OpInfo.Codes.size());
6043   for (StringRef Code : OpInfo.Codes) {
6044     TargetLowering::ConstraintType CType = getConstraintType(Code);
6045 
6046     // Indirect 'other' or 'immediate' constraints are not allowed.
6047     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6048                                CType == TargetLowering::C_Register ||
6049                                CType == TargetLowering::C_RegisterClass))
6050       continue;
6051 
6052     // Things with matching constraints can only be registers, per gcc
6053     // documentation.  This mainly affects "g" constraints.
6054     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6055       continue;
6056 
6057     Ret.emplace_back(Code, CType);
6058   }
6059 
6060   std::stable_sort(
6061       Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
6062         return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6063       });
6064 
6065   return Ret;
6066 }
6067 
6068 /// If we have an immediate, see if we can lower it. Return true if we can,
6069 /// false otherwise.
6070 static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6071                                      SDValue Op, SelectionDAG *DAG,
6072                                      const TargetLowering &TLI) {
6073 
6074   assert((P.second == TargetLowering::C_Other ||
6075           P.second == TargetLowering::C_Immediate) &&
6076          "need immediate or other");
6077 
6078   if (!Op.getNode())
6079     return false;
6080 
6081   std::vector<SDValue> ResultOps;
6082   TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6083   return !ResultOps.empty();
6084 }
6085 
6086 /// Determines the constraint code and constraint type to use for the specific
6087 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6088 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6089                                             SDValue Op,
6090                                             SelectionDAG *DAG) const {
6091   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6092 
6093   // Single-letter constraints ('r') are very common.
6094   if (OpInfo.Codes.size() == 1) {
6095     OpInfo.ConstraintCode = OpInfo.Codes[0];
6096     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6097   } else {
6098     ConstraintGroup G = getConstraintPreferences(OpInfo);
6099     if (G.empty())
6100       return;
6101 
6102     unsigned BestIdx = 0;
6103     for (const unsigned E = G.size();
6104          BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6105                          G[BestIdx].second == TargetLowering::C_Immediate);
6106          ++BestIdx) {
6107       if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6108         break;
6109       // If we're out of constraints, just pick the first one.
6110       if (BestIdx + 1 == E) {
6111         BestIdx = 0;
6112         break;
6113       }
6114     }
6115 
6116     OpInfo.ConstraintCode = G[BestIdx].first;
6117     OpInfo.ConstraintType = G[BestIdx].second;
6118   }
6119 
6120   // 'X' matches anything.
6121   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6122     // Constants are handled elsewhere.  For Functions, the type here is the
6123     // type of the result, which is not what we want to look at; leave them
6124     // alone.
6125     Value *v = OpInfo.CallOperandVal;
6126     if (isa<ConstantInt>(v) || isa<Function>(v)) {
6127       return;
6128     }
6129 
6130     if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6131       OpInfo.ConstraintCode = "i";
6132       return;
6133     }
6134 
6135     // Otherwise, try to resolve it to something we know about by looking at
6136     // the actual operand type.
6137     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6138       OpInfo.ConstraintCode = Repl;
6139       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6140     }
6141   }
6142 }
6143 
6144 /// Given an exact SDIV by a constant, create a multiplication
6145 /// with the multiplicative inverse of the constant.
6146 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6147 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6148                               const SDLoc &dl, SelectionDAG &DAG,
6149                               SmallVectorImpl<SDNode *> &Created) {
6150   SDValue Op0 = N->getOperand(0);
6151   SDValue Op1 = N->getOperand(1);
6152   EVT VT = N->getValueType(0);
6153   EVT SVT = VT.getScalarType();
6154   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6155   EVT ShSVT = ShVT.getScalarType();
6156 
6157   bool UseSRA = false;
6158   SmallVector<SDValue, 16> Shifts, Factors;
6159 
6160   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6161     if (C->isZero())
6162       return false;
6163     APInt Divisor = C->getAPIntValue();
6164     unsigned Shift = Divisor.countr_zero();
6165     if (Shift) {
6166       Divisor.ashrInPlace(Shift);
6167       UseSRA = true;
6168     }
6169     APInt Factor = Divisor.multiplicativeInverse();
6170     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6171     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6172     return true;
6173   };
6174 
6175   // Collect all magic values from the build vector.
6176   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6177     return SDValue();
6178 
6179   SDValue Shift, Factor;
6180   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6181     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6182     Factor = DAG.getBuildVector(VT, dl, Factors);
6183   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6184     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6185            "Expected matchUnaryPredicate to return one element for scalable "
6186            "vectors");
6187     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6188     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6189   } else {
6190     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6191     Shift = Shifts[0];
6192     Factor = Factors[0];
6193   }
6194 
6195   SDValue Res = Op0;
6196   if (UseSRA) {
6197     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6198     Created.push_back(Res.getNode());
6199   }
6200 
6201   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6202 }
6203 
6204 /// Given an exact UDIV by a constant, create a multiplication
6205 /// with the multiplicative inverse of the constant.
6206 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6207 static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6208                               const SDLoc &dl, SelectionDAG &DAG,
6209                               SmallVectorImpl<SDNode *> &Created) {
6210   EVT VT = N->getValueType(0);
6211   EVT SVT = VT.getScalarType();
6212   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6213   EVT ShSVT = ShVT.getScalarType();
6214 
6215   bool UseSRL = false;
6216   SmallVector<SDValue, 16> Shifts, Factors;
6217 
6218   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6219     if (C->isZero())
6220       return false;
6221     APInt Divisor = C->getAPIntValue();
6222     unsigned Shift = Divisor.countr_zero();
6223     if (Shift) {
6224       Divisor.lshrInPlace(Shift);
6225       UseSRL = true;
6226     }
6227     // Calculate the multiplicative inverse modulo BW.
6228     APInt Factor = Divisor.multiplicativeInverse();
6229     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6230     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6231     return true;
6232   };
6233 
6234   SDValue Op1 = N->getOperand(1);
6235 
6236   // Collect all magic values from the build vector.
6237   if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6238     return SDValue();
6239 
6240   SDValue Shift, Factor;
6241   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6242     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6243     Factor = DAG.getBuildVector(VT, dl, Factors);
6244   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6245     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6246            "Expected matchUnaryPredicate to return one element for scalable "
6247            "vectors");
6248     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6249     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6250   } else {
6251     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6252     Shift = Shifts[0];
6253     Factor = Factors[0];
6254   }
6255 
6256   SDValue Res = N->getOperand(0);
6257   if (UseSRL) {
6258     Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6259     Created.push_back(Res.getNode());
6260   }
6261 
6262   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6263 }
6264 
6265 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6266                               SelectionDAG &DAG,
6267                               SmallVectorImpl<SDNode *> &Created) const {
6268   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6269   if (isIntDivCheap(N->getValueType(0), Attr))
6270     return SDValue(N, 0); // Lower SDIV as SDIV
6271   return SDValue();
6272 }
6273 
6274 SDValue
6275 TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6276                               SelectionDAG &DAG,
6277                               SmallVectorImpl<SDNode *> &Created) const {
6278   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6279   if (isIntDivCheap(N->getValueType(0), Attr))
6280     return SDValue(N, 0); // Lower SREM as SREM
6281   return SDValue();
6282 }
6283 
6284 /// Build sdiv by power-of-2 with conditional move instructions
6285 /// Ref: "Hacker's Delight" by Henry Warren 10-1
6286 /// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6287 ///   bgez x, label
6288 ///   add x, x, 2**k-1
6289 /// label:
6290 ///   sra res, x, k
6291 ///   neg res, res (when the divisor is negative)
6292 SDValue TargetLowering::buildSDIVPow2WithCMov(
6293     SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6294     SmallVectorImpl<SDNode *> &Created) const {
6295   unsigned Lg2 = Divisor.countr_zero();
6296   EVT VT = N->getValueType(0);
6297 
6298   SDLoc DL(N);
6299   SDValue N0 = N->getOperand(0);
6300   SDValue Zero = DAG.getConstant(0, DL, VT);
6301   APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6302   SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6303 
6304   // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6305   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6306   SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6307   SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6308   SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6309 
6310   Created.push_back(Cmp.getNode());
6311   Created.push_back(Add.getNode());
6312   Created.push_back(CMov.getNode());
6313 
6314   // Divide by pow2.
6315   SDValue SRA =
6316       DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6317 
6318   // If we're dividing by a positive value, we're done.  Otherwise, we must
6319   // negate the result.
6320   if (Divisor.isNonNegative())
6321     return SRA;
6322 
6323   Created.push_back(SRA.getNode());
6324   return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6325 }
6326 
6327 /// Given an ISD::SDIV node expressing a divide by constant,
6328 /// return a DAG expression to select that will generate the same value by
6329 /// multiplying by a magic number.
6330 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6331 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6332                                   bool IsAfterLegalization,
6333                                   bool IsAfterLegalTypes,
6334                                   SmallVectorImpl<SDNode *> &Created) const {
6335   SDLoc dl(N);
6336   EVT VT = N->getValueType(0);
6337   EVT SVT = VT.getScalarType();
6338   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6339   EVT ShSVT = ShVT.getScalarType();
6340   unsigned EltBits = VT.getScalarSizeInBits();
6341   EVT MulVT;
6342 
6343   // Check to see if we can do this.
6344   // FIXME: We should be more aggressive here.
6345   if (!isTypeLegal(VT)) {
6346     // Limit this to simple scalars for now.
6347     if (VT.isVector() || !VT.isSimple())
6348       return SDValue();
6349 
6350     // If this type will be promoted to a large enough type with a legal
6351     // multiply operation, we can go ahead and do this transform.
6352     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6353       return SDValue();
6354 
6355     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6356     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6357         !isOperationLegal(ISD::MUL, MulVT))
6358       return SDValue();
6359   }
6360 
6361   // If the sdiv has an 'exact' bit we can use a simpler lowering.
6362   if (N->getFlags().hasExact())
6363     return BuildExactSDIV(*this, N, dl, DAG, Created);
6364 
6365   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6366 
6367   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6368     if (C->isZero())
6369       return false;
6370 
6371     const APInt &Divisor = C->getAPIntValue();
6372     SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
6373     int NumeratorFactor = 0;
6374     int ShiftMask = -1;
6375 
6376     if (Divisor.isOne() || Divisor.isAllOnes()) {
6377       // If d is +1/-1, we just multiply the numerator by +1/-1.
6378       NumeratorFactor = Divisor.getSExtValue();
6379       magics.Magic = 0;
6380       magics.ShiftAmount = 0;
6381       ShiftMask = 0;
6382     } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6383       // If d > 0 and m < 0, add the numerator.
6384       NumeratorFactor = 1;
6385     } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6386       // If d < 0 and m > 0, subtract the numerator.
6387       NumeratorFactor = -1;
6388     }
6389 
6390     MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6391     Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6392     Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6393     ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6394     return true;
6395   };
6396 
6397   SDValue N0 = N->getOperand(0);
6398   SDValue N1 = N->getOperand(1);
6399 
6400   // Collect the shifts / magic values from each element.
6401   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6402     return SDValue();
6403 
6404   SDValue MagicFactor, Factor, Shift, ShiftMask;
6405   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6406     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6407     Factor = DAG.getBuildVector(VT, dl, Factors);
6408     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6409     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6410   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6411     assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6412            Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6413            "Expected matchUnaryPredicate to return one element for scalable "
6414            "vectors");
6415     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6416     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6417     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6418     ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6419   } else {
6420     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6421     MagicFactor = MagicFactors[0];
6422     Factor = Factors[0];
6423     Shift = Shifts[0];
6424     ShiftMask = ShiftMasks[0];
6425   }
6426 
6427   // Multiply the numerator (operand 0) by the magic value.
6428   // FIXME: We should support doing a MUL in a wider type.
6429   auto GetMULHS = [&](SDValue X, SDValue Y) {
6430     // If the type isn't legal, use a wider mul of the type calculated
6431     // earlier.
6432     if (!isTypeLegal(VT)) {
6433       X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6434       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6435       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6436       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6437                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6438       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6439     }
6440 
6441     if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6442       return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6443     if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6444       SDValue LoHi =
6445           DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6446       return SDValue(LoHi.getNode(), 1);
6447     }
6448     // If type twice as wide legal, widen and use a mul plus a shift.
6449     unsigned Size = VT.getScalarSizeInBits();
6450     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6451     if (VT.isVector())
6452       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6453                                 VT.getVectorElementCount());
6454     // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6455     // custom lowered. This is very expensive so avoid it at all costs for
6456     // constant divisors.
6457     if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6458          isOperationCustom(ISD::SDIVREM, VT.getScalarType())) ||
6459         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6460       X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6461       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6462       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6463       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6464                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6465       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6466     }
6467     return SDValue();
6468   };
6469 
6470   SDValue Q = GetMULHS(N0, MagicFactor);
6471   if (!Q)
6472     return SDValue();
6473 
6474   Created.push_back(Q.getNode());
6475 
6476   // (Optionally) Add/subtract the numerator using Factor.
6477   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6478   Created.push_back(Factor.getNode());
6479   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6480   Created.push_back(Q.getNode());
6481 
6482   // Shift right algebraic by shift value.
6483   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6484   Created.push_back(Q.getNode());
6485 
6486   // Extract the sign bit, mask it and add it to the quotient.
6487   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6488   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6489   Created.push_back(T.getNode());
6490   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6491   Created.push_back(T.getNode());
6492   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6493 }
6494 
6495 /// Given an ISD::UDIV node expressing a divide by constant,
6496 /// return a DAG expression to select that will generate the same value by
6497 /// multiplying by a magic number.
6498 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6499 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6500                                   bool IsAfterLegalization,
6501                                   bool IsAfterLegalTypes,
6502                                   SmallVectorImpl<SDNode *> &Created) const {
6503   SDLoc dl(N);
6504   EVT VT = N->getValueType(0);
6505   EVT SVT = VT.getScalarType();
6506   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6507   EVT ShSVT = ShVT.getScalarType();
6508   unsigned EltBits = VT.getScalarSizeInBits();
6509   EVT MulVT;
6510 
6511   // Check to see if we can do this.
6512   // FIXME: We should be more aggressive here.
6513   if (!isTypeLegal(VT)) {
6514     // Limit this to simple scalars for now.
6515     if (VT.isVector() || !VT.isSimple())
6516       return SDValue();
6517 
6518     // If this type will be promoted to a large enough type with a legal
6519     // multiply operation, we can go ahead and do this transform.
6520     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6521       return SDValue();
6522 
6523     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6524     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6525         !isOperationLegal(ISD::MUL, MulVT))
6526       return SDValue();
6527   }
6528 
6529   // If the udiv has an 'exact' bit we can use a simpler lowering.
6530   if (N->getFlags().hasExact())
6531     return BuildExactUDIV(*this, N, dl, DAG, Created);
6532 
6533   SDValue N0 = N->getOperand(0);
6534   SDValue N1 = N->getOperand(1);
6535 
6536   // Try to use leading zeros of the dividend to reduce the multiplier and
6537   // avoid expensive fixups.
6538   unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6539 
6540   bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6541   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6542 
6543   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6544     if (C->isZero())
6545       return false;
6546     const APInt& Divisor = C->getAPIntValue();
6547 
6548     SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6549 
6550     // Magic algorithm doesn't work for division by 1. We need to emit a select
6551     // at the end.
6552     if (Divisor.isOne()) {
6553       PreShift = PostShift = DAG.getUNDEF(ShSVT);
6554       MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6555     } else {
6556       UnsignedDivisionByConstantInfo magics =
6557           UnsignedDivisionByConstantInfo::get(
6558               Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6559 
6560       MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6561 
6562       assert(magics.PreShift < Divisor.getBitWidth() &&
6563              "We shouldn't generate an undefined shift!");
6564       assert(magics.PostShift < Divisor.getBitWidth() &&
6565              "We shouldn't generate an undefined shift!");
6566       assert((!magics.IsAdd || magics.PreShift == 0) &&
6567              "Unexpected pre-shift");
6568       PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6569       PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6570       NPQFactor = DAG.getConstant(
6571           magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6572                        : APInt::getZero(EltBits),
6573           dl, SVT);
6574       UseNPQ |= magics.IsAdd;
6575       UsePreShift |= magics.PreShift != 0;
6576       UsePostShift |= magics.PostShift != 0;
6577     }
6578 
6579     PreShifts.push_back(PreShift);
6580     MagicFactors.push_back(MagicFactor);
6581     NPQFactors.push_back(NPQFactor);
6582     PostShifts.push_back(PostShift);
6583     return true;
6584   };
6585 
6586   // Collect the shifts/magic values from each element.
6587   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6588     return SDValue();
6589 
6590   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6591   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6592     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6593     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6594     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6595     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6596   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6597     assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6598            NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6599            "Expected matchUnaryPredicate to return one for scalable vectors");
6600     PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6601     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6602     NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6603     PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6604   } else {
6605     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6606     PreShift = PreShifts[0];
6607     MagicFactor = MagicFactors[0];
6608     PostShift = PostShifts[0];
6609   }
6610 
6611   SDValue Q = N0;
6612   if (UsePreShift) {
6613     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6614     Created.push_back(Q.getNode());
6615   }
6616 
6617   // FIXME: We should support doing a MUL in a wider type.
6618   auto GetMULHU = [&](SDValue X, SDValue Y) {
6619     // If the type isn't legal, use a wider mul of the type calculated
6620     // earlier.
6621     if (!isTypeLegal(VT)) {
6622       X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6623       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6624       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6625       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6626                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6627       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6628     }
6629 
6630     if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6631       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6632     if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6633       SDValue LoHi =
6634           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6635       return SDValue(LoHi.getNode(), 1);
6636     }
6637     // If type twice as wide legal, widen and use a mul plus a shift.
6638     unsigned Size = VT.getScalarSizeInBits();
6639     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6640     if (VT.isVector())
6641       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6642                                 VT.getVectorElementCount());
6643     // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6644     // custom lowered. This is very expensive so avoid it at all costs for
6645     // constant divisors.
6646     if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6647          isOperationCustom(ISD::UDIVREM, VT.getScalarType())) ||
6648         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6649       X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6650       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6651       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6652       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6653                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6654       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6655     }
6656     return SDValue(); // No mulhu or equivalent
6657   };
6658 
6659   // Multiply the numerator (operand 0) by the magic value.
6660   Q = GetMULHU(Q, MagicFactor);
6661   if (!Q)
6662     return SDValue();
6663 
6664   Created.push_back(Q.getNode());
6665 
6666   if (UseNPQ) {
6667     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6668     Created.push_back(NPQ.getNode());
6669 
6670     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6671     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6672     if (VT.isVector())
6673       NPQ = GetMULHU(NPQ, NPQFactor);
6674     else
6675       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6676 
6677     Created.push_back(NPQ.getNode());
6678 
6679     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6680     Created.push_back(Q.getNode());
6681   }
6682 
6683   if (UsePostShift) {
6684     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6685     Created.push_back(Q.getNode());
6686   }
6687 
6688   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6689 
6690   SDValue One = DAG.getConstant(1, dl, VT);
6691   SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6692   return DAG.getSelect(dl, VT, IsOne, N0, Q);
6693 }
6694 
6695 /// If all values in Values that *don't* match the predicate are same 'splat'
6696 /// value, then replace all values with that splat value.
6697 /// Else, if AlternativeReplacement was provided, then replace all values that
6698 /// do match predicate with AlternativeReplacement value.
6699 static void
6700 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6701                           std::function<bool(SDValue)> Predicate,
6702                           SDValue AlternativeReplacement = SDValue()) {
6703   SDValue Replacement;
6704   // Is there a value for which the Predicate does *NOT* match? What is it?
6705   auto SplatValue = llvm::find_if_not(Values, Predicate);
6706   if (SplatValue != Values.end()) {
6707     // Does Values consist only of SplatValue's and values matching Predicate?
6708     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6709           return Value == *SplatValue || Predicate(Value);
6710         })) // Then we shall replace values matching predicate with SplatValue.
6711       Replacement = *SplatValue;
6712   }
6713   if (!Replacement) {
6714     // Oops, we did not find the "baseline" splat value.
6715     if (!AlternativeReplacement)
6716       return; // Nothing to do.
6717     // Let's replace with provided value then.
6718     Replacement = AlternativeReplacement;
6719   }
6720   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6721 }
6722 
6723 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6724 /// where the divisor is constant and the comparison target is zero,
6725 /// return a DAG expression that will generate the same comparison result
6726 /// using only multiplications, additions and shifts/rotations.
6727 /// Ref: "Hacker's Delight" 10-17.
6728 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6729                                         SDValue CompTargetNode,
6730                                         ISD::CondCode Cond,
6731                                         DAGCombinerInfo &DCI,
6732                                         const SDLoc &DL) const {
6733   SmallVector<SDNode *, 5> Built;
6734   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6735                                          DCI, DL, Built)) {
6736     for (SDNode *N : Built)
6737       DCI.AddToWorklist(N);
6738     return Folded;
6739   }
6740 
6741   return SDValue();
6742 }
6743 
6744 SDValue
6745 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6746                                   SDValue CompTargetNode, ISD::CondCode Cond,
6747                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6748                                   SmallVectorImpl<SDNode *> &Created) const {
6749   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6750   // - D must be constant, with D = D0 * 2^K where D0 is odd
6751   // - P is the multiplicative inverse of D0 modulo 2^W
6752   // - Q = floor(((2^W) - 1) / D)
6753   // where W is the width of the common type of N and D.
6754   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6755          "Only applicable for (in)equality comparisons.");
6756 
6757   SelectionDAG &DAG = DCI.DAG;
6758 
6759   EVT VT = REMNode.getValueType();
6760   EVT SVT = VT.getScalarType();
6761   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6762   EVT ShSVT = ShVT.getScalarType();
6763 
6764   // If MUL is unavailable, we cannot proceed in any case.
6765   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6766     return SDValue();
6767 
6768   bool ComparingWithAllZeros = true;
6769   bool AllComparisonsWithNonZerosAreTautological = true;
6770   bool HadTautologicalLanes = false;
6771   bool AllLanesAreTautological = true;
6772   bool HadEvenDivisor = false;
6773   bool AllDivisorsArePowerOfTwo = true;
6774   bool HadTautologicalInvertedLanes = false;
6775   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6776 
6777   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6778     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6779     if (CDiv->isZero())
6780       return false;
6781 
6782     const APInt &D = CDiv->getAPIntValue();
6783     const APInt &Cmp = CCmp->getAPIntValue();
6784 
6785     ComparingWithAllZeros &= Cmp.isZero();
6786 
6787     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6788     // if C2 is not less than C1, the comparison is always false.
6789     // But we will only be able to produce the comparison that will give the
6790     // opposive tautological answer. So this lane would need to be fixed up.
6791     bool TautologicalInvertedLane = D.ule(Cmp);
6792     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6793 
6794     // If all lanes are tautological (either all divisors are ones, or divisor
6795     // is not greater than the constant we are comparing with),
6796     // we will prefer to avoid the fold.
6797     bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6798     HadTautologicalLanes |= TautologicalLane;
6799     AllLanesAreTautological &= TautologicalLane;
6800 
6801     // If we are comparing with non-zero, we need'll need  to subtract said
6802     // comparison value from the LHS. But there is no point in doing that if
6803     // every lane where we are comparing with non-zero is tautological..
6804     if (!Cmp.isZero())
6805       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6806 
6807     // Decompose D into D0 * 2^K
6808     unsigned K = D.countr_zero();
6809     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6810     APInt D0 = D.lshr(K);
6811 
6812     // D is even if it has trailing zeros.
6813     HadEvenDivisor |= (K != 0);
6814     // D is a power-of-two if D0 is one.
6815     // If all divisors are power-of-two, we will prefer to avoid the fold.
6816     AllDivisorsArePowerOfTwo &= D0.isOne();
6817 
6818     // P = inv(D0, 2^W)
6819     // 2^W requires W + 1 bits, so we have to extend and then truncate.
6820     unsigned W = D.getBitWidth();
6821     APInt P = D0.multiplicativeInverse();
6822     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6823 
6824     // Q = floor((2^W - 1) u/ D)
6825     // R = ((2^W - 1) u% D)
6826     APInt Q, R;
6827     APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6828 
6829     // If we are comparing with zero, then that comparison constant is okay,
6830     // else it may need to be one less than that.
6831     if (Cmp.ugt(R))
6832       Q -= 1;
6833 
6834     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6835            "We are expecting that K is always less than all-ones for ShSVT");
6836 
6837     // If the lane is tautological the result can be constant-folded.
6838     if (TautologicalLane) {
6839       // Set P and K amount to a bogus values so we can try to splat them.
6840       P = 0;
6841       K = -1;
6842       // And ensure that comparison constant is tautological,
6843       // it will always compare true/false.
6844       Q = -1;
6845     }
6846 
6847     PAmts.push_back(DAG.getConstant(P, DL, SVT));
6848     KAmts.push_back(
6849         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
6850                               /*implicitTrunc=*/true),
6851                         DL, ShSVT));
6852     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6853     return true;
6854   };
6855 
6856   SDValue N = REMNode.getOperand(0);
6857   SDValue D = REMNode.getOperand(1);
6858 
6859   // Collect the values from each element.
6860   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6861     return SDValue();
6862 
6863   // If all lanes are tautological, the result can be constant-folded.
6864   if (AllLanesAreTautological)
6865     return SDValue();
6866 
6867   // If this is a urem by a powers-of-two, avoid the fold since it can be
6868   // best implemented as a bit test.
6869   if (AllDivisorsArePowerOfTwo)
6870     return SDValue();
6871 
6872   SDValue PVal, KVal, QVal;
6873   if (D.getOpcode() == ISD::BUILD_VECTOR) {
6874     if (HadTautologicalLanes) {
6875       // Try to turn PAmts into a splat, since we don't care about the values
6876       // that are currently '0'. If we can't, just keep '0'`s.
6877       turnVectorIntoSplatVector(PAmts, isNullConstant);
6878       // Try to turn KAmts into a splat, since we don't care about the values
6879       // that are currently '-1'. If we can't, change them to '0'`s.
6880       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6881                                 DAG.getConstant(0, DL, ShSVT));
6882     }
6883 
6884     PVal = DAG.getBuildVector(VT, DL, PAmts);
6885     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6886     QVal = DAG.getBuildVector(VT, DL, QAmts);
6887   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6888     assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6889            "Expected matchBinaryPredicate to return one element for "
6890            "SPLAT_VECTORs");
6891     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6892     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6893     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6894   } else {
6895     PVal = PAmts[0];
6896     KVal = KAmts[0];
6897     QVal = QAmts[0];
6898   }
6899 
6900   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6901     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6902       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6903     assert(CompTargetNode.getValueType() == N.getValueType() &&
6904            "Expecting that the types on LHS and RHS of comparisons match.");
6905     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6906   }
6907 
6908   // (mul N, P)
6909   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6910   Created.push_back(Op0.getNode());
6911 
6912   // Rotate right only if any divisor was even. We avoid rotates for all-odd
6913   // divisors as a performance improvement, since rotating by 0 is a no-op.
6914   if (HadEvenDivisor) {
6915     // We need ROTR to do this.
6916     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6917       return SDValue();
6918     // UREM: (rotr (mul N, P), K)
6919     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6920     Created.push_back(Op0.getNode());
6921   }
6922 
6923   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6924   SDValue NewCC =
6925       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6926                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6927   if (!HadTautologicalInvertedLanes)
6928     return NewCC;
6929 
6930   // If any lanes previously compared always-false, the NewCC will give
6931   // always-true result for them, so we need to fixup those lanes.
6932   // Or the other way around for inequality predicate.
6933   assert(VT.isVector() && "Can/should only get here for vectors.");
6934   Created.push_back(NewCC.getNode());
6935 
6936   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6937   // if C2 is not less than C1, the comparison is always false.
6938   // But we have produced the comparison that will give the
6939   // opposive tautological answer. So these lanes would need to be fixed up.
6940   SDValue TautologicalInvertedChannels =
6941       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6942   Created.push_back(TautologicalInvertedChannels.getNode());
6943 
6944   // NOTE: we avoid letting illegal types through even if we're before legalize
6945   // ops – legalization has a hard time producing good code for this.
6946   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6947     // If we have a vector select, let's replace the comparison results in the
6948     // affected lanes with the correct tautological result.
6949     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6950                                               DL, SETCCVT, SETCCVT);
6951     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6952                        Replacement, NewCC);
6953   }
6954 
6955   // Else, we can just invert the comparison result in the appropriate lanes.
6956   //
6957   // NOTE: see the note above VSELECT above.
6958   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6959     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6960                        TautologicalInvertedChannels);
6961 
6962   return SDValue(); // Don't know how to lower.
6963 }
6964 
6965 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6966 /// where the divisor is constant and the comparison target is zero,
6967 /// return a DAG expression that will generate the same comparison result
6968 /// using only multiplications, additions and shifts/rotations.
6969 /// Ref: "Hacker's Delight" 10-17.
6970 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6971                                         SDValue CompTargetNode,
6972                                         ISD::CondCode Cond,
6973                                         DAGCombinerInfo &DCI,
6974                                         const SDLoc &DL) const {
6975   SmallVector<SDNode *, 7> Built;
6976   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6977                                          DCI, DL, Built)) {
6978     assert(Built.size() <= 7 && "Max size prediction failed.");
6979     for (SDNode *N : Built)
6980       DCI.AddToWorklist(N);
6981     return Folded;
6982   }
6983 
6984   return SDValue();
6985 }
6986 
6987 SDValue
6988 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6989                                   SDValue CompTargetNode, ISD::CondCode Cond,
6990                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6991                                   SmallVectorImpl<SDNode *> &Created) const {
6992   // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6993   // Fold:
6994   //   (seteq/ne (srem N, D), 0)
6995   // To:
6996   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
6997   //
6998   // - D must be constant, with D = D0 * 2^K where D0 is odd
6999   // - P is the multiplicative inverse of D0 modulo 2^W
7000   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7001   // - Q = floor((2 * A) / (2^K))
7002   // where W is the width of the common type of N and D.
7003   //
7004   // When D is a power of two (and thus D0 is 1), the normal
7005   // formula for A and Q don't apply, because the derivation
7006   // depends on D not dividing 2^(W-1), and thus theorem ZRS
7007   // does not apply. This specifically fails when N = INT_MIN.
7008   //
7009   // Instead, for power-of-two D, we use:
7010   // - A = 2^(W-1)
7011   // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7012   // - Q = 2^(W-K) - 1
7013   // |-> Test that the top K bits are zero after rotation
7014   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7015          "Only applicable for (in)equality comparisons.");
7016 
7017   SelectionDAG &DAG = DCI.DAG;
7018 
7019   EVT VT = REMNode.getValueType();
7020   EVT SVT = VT.getScalarType();
7021   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7022   EVT ShSVT = ShVT.getScalarType();
7023 
7024   // If we are after ops legalization, and MUL is unavailable, we can not
7025   // proceed.
7026   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7027     return SDValue();
7028 
7029   // TODO: Could support comparing with non-zero too.
7030   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7031   if (!CompTarget || !CompTarget->isZero())
7032     return SDValue();
7033 
7034   bool HadIntMinDivisor = false;
7035   bool HadOneDivisor = false;
7036   bool AllDivisorsAreOnes = true;
7037   bool HadEvenDivisor = false;
7038   bool NeedToApplyOffset = false;
7039   bool AllDivisorsArePowerOfTwo = true;
7040   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7041 
7042   auto BuildSREMPattern = [&](ConstantSDNode *C) {
7043     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7044     if (C->isZero())
7045       return false;
7046 
7047     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7048 
7049     // WARNING: this fold is only valid for positive divisors!
7050     APInt D = C->getAPIntValue();
7051     if (D.isNegative())
7052       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
7053 
7054     HadIntMinDivisor |= D.isMinSignedValue();
7055 
7056     // If all divisors are ones, we will prefer to avoid the fold.
7057     HadOneDivisor |= D.isOne();
7058     AllDivisorsAreOnes &= D.isOne();
7059 
7060     // Decompose D into D0 * 2^K
7061     unsigned K = D.countr_zero();
7062     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7063     APInt D0 = D.lshr(K);
7064 
7065     if (!D.isMinSignedValue()) {
7066       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7067       // we don't care about this lane in this fold, we'll special-handle it.
7068       HadEvenDivisor |= (K != 0);
7069     }
7070 
7071     // D is a power-of-two if D0 is one. This includes INT_MIN.
7072     // If all divisors are power-of-two, we will prefer to avoid the fold.
7073     AllDivisorsArePowerOfTwo &= D0.isOne();
7074 
7075     // P = inv(D0, 2^W)
7076     // 2^W requires W + 1 bits, so we have to extend and then truncate.
7077     unsigned W = D.getBitWidth();
7078     APInt P = D0.multiplicativeInverse();
7079     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7080 
7081     // A = floor((2^(W - 1) - 1) / D0) & -2^K
7082     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7083     A.clearLowBits(K);
7084 
7085     if (!D.isMinSignedValue()) {
7086       // If divisor INT_MIN, then we don't care about this lane in this fold,
7087       // we'll special-handle it.
7088       NeedToApplyOffset |= A != 0;
7089     }
7090 
7091     // Q = floor((2 * A) / (2^K))
7092     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7093 
7094     assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7095            "We are expecting that A is always less than all-ones for SVT");
7096     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7097            "We are expecting that K is always less than all-ones for ShSVT");
7098 
7099     // If D was a power of two, apply the alternate constant derivation.
7100     if (D0.isOne()) {
7101       // A = 2^(W-1)
7102       A = APInt::getSignedMinValue(W);
7103       // - Q = 2^(W-K) - 1
7104       Q = APInt::getAllOnes(W - K).zext(W);
7105     }
7106 
7107     // If the divisor is 1 the result can be constant-folded. Likewise, we
7108     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7109     if (D.isOne()) {
7110       // Set P, A and K to a bogus values so we can try to splat them.
7111       P = 0;
7112       A = -1;
7113       K = -1;
7114 
7115       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
7116       Q = -1;
7117     }
7118 
7119     PAmts.push_back(DAG.getConstant(P, DL, SVT));
7120     AAmts.push_back(DAG.getConstant(A, DL, SVT));
7121     KAmts.push_back(
7122         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7123                               /*implicitTrunc=*/true),
7124                         DL, ShSVT));
7125     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7126     return true;
7127   };
7128 
7129   SDValue N = REMNode.getOperand(0);
7130   SDValue D = REMNode.getOperand(1);
7131 
7132   // Collect the values from each element.
7133   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7134     return SDValue();
7135 
7136   // If this is a srem by a one, avoid the fold since it can be constant-folded.
7137   if (AllDivisorsAreOnes)
7138     return SDValue();
7139 
7140   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7141   // since it can be best implemented as a bit test.
7142   if (AllDivisorsArePowerOfTwo)
7143     return SDValue();
7144 
7145   SDValue PVal, AVal, KVal, QVal;
7146   if (D.getOpcode() == ISD::BUILD_VECTOR) {
7147     if (HadOneDivisor) {
7148       // Try to turn PAmts into a splat, since we don't care about the values
7149       // that are currently '0'. If we can't, just keep '0'`s.
7150       turnVectorIntoSplatVector(PAmts, isNullConstant);
7151       // Try to turn AAmts into a splat, since we don't care about the
7152       // values that are currently '-1'. If we can't, change them to '0'`s.
7153       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
7154                                 DAG.getConstant(0, DL, SVT));
7155       // Try to turn KAmts into a splat, since we don't care about the values
7156       // that are currently '-1'. If we can't, change them to '0'`s.
7157       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
7158                                 DAG.getConstant(0, DL, ShSVT));
7159     }
7160 
7161     PVal = DAG.getBuildVector(VT, DL, PAmts);
7162     AVal = DAG.getBuildVector(VT, DL, AAmts);
7163     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7164     QVal = DAG.getBuildVector(VT, DL, QAmts);
7165   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7166     assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7167            QAmts.size() == 1 &&
7168            "Expected matchUnaryPredicate to return one element for scalable "
7169            "vectors");
7170     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7171     AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7172     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7173     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7174   } else {
7175     assert(isa<ConstantSDNode>(D) && "Expected a constant");
7176     PVal = PAmts[0];
7177     AVal = AAmts[0];
7178     KVal = KAmts[0];
7179     QVal = QAmts[0];
7180   }
7181 
7182   // (mul N, P)
7183   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7184   Created.push_back(Op0.getNode());
7185 
7186   if (NeedToApplyOffset) {
7187     // We need ADD to do this.
7188     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7189       return SDValue();
7190 
7191     // (add (mul N, P), A)
7192     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7193     Created.push_back(Op0.getNode());
7194   }
7195 
7196   // Rotate right only if any divisor was even. We avoid rotates for all-odd
7197   // divisors as a performance improvement, since rotating by 0 is a no-op.
7198   if (HadEvenDivisor) {
7199     // We need ROTR to do this.
7200     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7201       return SDValue();
7202     // SREM: (rotr (add (mul N, P), A), K)
7203     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7204     Created.push_back(Op0.getNode());
7205   }
7206 
7207   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7208   SDValue Fold =
7209       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7210                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7211 
7212   // If we didn't have lanes with INT_MIN divisor, then we're done.
7213   if (!HadIntMinDivisor)
7214     return Fold;
7215 
7216   // That fold is only valid for positive divisors. Which effectively means,
7217   // it is invalid for INT_MIN divisors. So if we have such a lane,
7218   // we must fix-up results for said lanes.
7219   assert(VT.isVector() && "Can/should only get here for vectors.");
7220 
7221   // NOTE: we avoid letting illegal types through even if we're before legalize
7222   // ops – legalization has a hard time producing good code for the code that
7223   // follows.
7224   if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7225       !isOperationLegalOrCustom(ISD::AND, VT) ||
7226       !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
7227       !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
7228     return SDValue();
7229 
7230   Created.push_back(Fold.getNode());
7231 
7232   SDValue IntMin = DAG.getConstant(
7233       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
7234   SDValue IntMax = DAG.getConstant(
7235       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
7236   SDValue Zero =
7237       DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
7238 
7239   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7240   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7241   Created.push_back(DivisorIsIntMin.getNode());
7242 
7243   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
7244   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7245   Created.push_back(Masked.getNode());
7246   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7247   Created.push_back(MaskedIsZero.getNode());
7248 
7249   // To produce final result we need to blend 2 vectors: 'SetCC' and
7250   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7251   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7252   // constant-folded, select can get lowered to a shuffle with constant mask.
7253   SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7254                                 MaskedIsZero, Fold);
7255 
7256   return Blended;
7257 }
7258 
7259 bool TargetLowering::
7260 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7261   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7262     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7263                                 "be a constant integer");
7264     return true;
7265   }
7266 
7267   return false;
7268 }
7269 
7270 SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7271                                          const DenormalMode &Mode) const {
7272   SDLoc DL(Op);
7273   EVT VT = Op.getValueType();
7274   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7275   SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7276 
7277   // This is specifically a check for the handling of denormal inputs, not the
7278   // result.
7279   if (Mode.Input == DenormalMode::PreserveSign ||
7280       Mode.Input == DenormalMode::PositiveZero) {
7281     // Test = X == 0.0
7282     return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7283   }
7284 
7285   // Testing it with denormal inputs to avoid wrong estimate.
7286   //
7287   // Test = fabs(X) < SmallestNormal
7288   const fltSemantics &FltSem = VT.getFltSemantics();
7289   APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7290   SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7291   SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7292   return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7293 }
7294 
7295 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7296                                              bool LegalOps, bool OptForSize,
7297                                              NegatibleCost &Cost,
7298                                              unsigned Depth) const {
7299   // fneg is removable even if it has multiple uses.
7300   if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7301     Cost = NegatibleCost::Cheaper;
7302     return Op.getOperand(0);
7303   }
7304 
7305   // Don't recurse exponentially.
7306   if (Depth > SelectionDAG::MaxRecursionDepth)
7307     return SDValue();
7308 
7309   // Pre-increment recursion depth for use in recursive calls.
7310   ++Depth;
7311   const SDNodeFlags Flags = Op->getFlags();
7312   const TargetOptions &Options = DAG.getTarget().Options;
7313   EVT VT = Op.getValueType();
7314   unsigned Opcode = Op.getOpcode();
7315 
7316   // Don't allow anything with multiple uses unless we know it is free.
7317   if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7318     bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7319                         isFPExtFree(VT, Op.getOperand(0).getValueType());
7320     if (!IsFreeExtend)
7321       return SDValue();
7322   }
7323 
7324   auto RemoveDeadNode = [&](SDValue N) {
7325     if (N && N.getNode()->use_empty())
7326       DAG.RemoveDeadNode(N.getNode());
7327   };
7328 
7329   SDLoc DL(Op);
7330 
7331   // Because getNegatedExpression can delete nodes we need a handle to keep
7332   // temporary nodes alive in case the recursion manages to create an identical
7333   // node.
7334   std::list<HandleSDNode> Handles;
7335 
7336   switch (Opcode) {
7337   case ISD::ConstantFP: {
7338     // Don't invert constant FP values after legalization unless the target says
7339     // the negated constant is legal.
7340     bool IsOpLegal =
7341         isOperationLegal(ISD::ConstantFP, VT) ||
7342         isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7343                      OptForSize);
7344 
7345     if (LegalOps && !IsOpLegal)
7346       break;
7347 
7348     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7349     V.changeSign();
7350     SDValue CFP = DAG.getConstantFP(V, DL, VT);
7351 
7352     // If we already have the use of the negated floating constant, it is free
7353     // to negate it even it has multiple uses.
7354     if (!Op.hasOneUse() && CFP.use_empty())
7355       break;
7356     Cost = NegatibleCost::Neutral;
7357     return CFP;
7358   }
7359   case ISD::BUILD_VECTOR: {
7360     // Only permit BUILD_VECTOR of constants.
7361     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7362           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7363         }))
7364       break;
7365 
7366     bool IsOpLegal =
7367         (isOperationLegal(ISD::ConstantFP, VT) &&
7368          isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
7369         llvm::all_of(Op->op_values(), [&](SDValue N) {
7370           return N.isUndef() ||
7371                  isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7372                               OptForSize);
7373         });
7374 
7375     if (LegalOps && !IsOpLegal)
7376       break;
7377 
7378     SmallVector<SDValue, 4> Ops;
7379     for (SDValue C : Op->op_values()) {
7380       if (C.isUndef()) {
7381         Ops.push_back(C);
7382         continue;
7383       }
7384       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7385       V.changeSign();
7386       Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7387     }
7388     Cost = NegatibleCost::Neutral;
7389     return DAG.getBuildVector(VT, DL, Ops);
7390   }
7391   case ISD::FADD: {
7392     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7393       break;
7394 
7395     // After operation legalization, it might not be legal to create new FSUBs.
7396     if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7397       break;
7398     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7399 
7400     // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7401     NegatibleCost CostX = NegatibleCost::Expensive;
7402     SDValue NegX =
7403         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7404     // Prevent this node from being deleted by the next call.
7405     if (NegX)
7406       Handles.emplace_back(NegX);
7407 
7408     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7409     NegatibleCost CostY = NegatibleCost::Expensive;
7410     SDValue NegY =
7411         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7412 
7413     // We're done with the handles.
7414     Handles.clear();
7415 
7416     // Negate the X if its cost is less or equal than Y.
7417     if (NegX && (CostX <= CostY)) {
7418       Cost = CostX;
7419       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7420       if (NegY != N)
7421         RemoveDeadNode(NegY);
7422       return N;
7423     }
7424 
7425     // Negate the Y if it is not expensive.
7426     if (NegY) {
7427       Cost = CostY;
7428       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7429       if (NegX != N)
7430         RemoveDeadNode(NegX);
7431       return N;
7432     }
7433     break;
7434   }
7435   case ISD::FSUB: {
7436     // We can't turn -(A-B) into B-A when we honor signed zeros.
7437     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7438       break;
7439 
7440     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7441     // fold (fneg (fsub 0, Y)) -> Y
7442     if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7443       if (C->isZero()) {
7444         Cost = NegatibleCost::Cheaper;
7445         return Y;
7446       }
7447 
7448     // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7449     Cost = NegatibleCost::Neutral;
7450     return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7451   }
7452   case ISD::FMUL:
7453   case ISD::FDIV: {
7454     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7455 
7456     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7457     NegatibleCost CostX = NegatibleCost::Expensive;
7458     SDValue NegX =
7459         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7460     // Prevent this node from being deleted by the next call.
7461     if (NegX)
7462       Handles.emplace_back(NegX);
7463 
7464     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7465     NegatibleCost CostY = NegatibleCost::Expensive;
7466     SDValue NegY =
7467         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7468 
7469     // We're done with the handles.
7470     Handles.clear();
7471 
7472     // Negate the X if its cost is less or equal than Y.
7473     if (NegX && (CostX <= CostY)) {
7474       Cost = CostX;
7475       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7476       if (NegY != N)
7477         RemoveDeadNode(NegY);
7478       return N;
7479     }
7480 
7481     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7482     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7483       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7484         break;
7485 
7486     // Negate the Y if it is not expensive.
7487     if (NegY) {
7488       Cost = CostY;
7489       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7490       if (NegX != N)
7491         RemoveDeadNode(NegX);
7492       return N;
7493     }
7494     break;
7495   }
7496   case ISD::FMA:
7497   case ISD::FMAD: {
7498     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7499       break;
7500 
7501     SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7502     NegatibleCost CostZ = NegatibleCost::Expensive;
7503     SDValue NegZ =
7504         getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7505     // Give up if fail to negate the Z.
7506     if (!NegZ)
7507       break;
7508 
7509     // Prevent this node from being deleted by the next two calls.
7510     Handles.emplace_back(NegZ);
7511 
7512     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7513     NegatibleCost CostX = NegatibleCost::Expensive;
7514     SDValue NegX =
7515         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7516     // Prevent this node from being deleted by the next call.
7517     if (NegX)
7518       Handles.emplace_back(NegX);
7519 
7520     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7521     NegatibleCost CostY = NegatibleCost::Expensive;
7522     SDValue NegY =
7523         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7524 
7525     // We're done with the handles.
7526     Handles.clear();
7527 
7528     // Negate the X if its cost is less or equal than Y.
7529     if (NegX && (CostX <= CostY)) {
7530       Cost = std::min(CostX, CostZ);
7531       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7532       if (NegY != N)
7533         RemoveDeadNode(NegY);
7534       return N;
7535     }
7536 
7537     // Negate the Y if it is not expensive.
7538     if (NegY) {
7539       Cost = std::min(CostY, CostZ);
7540       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7541       if (NegX != N)
7542         RemoveDeadNode(NegX);
7543       return N;
7544     }
7545     break;
7546   }
7547 
7548   case ISD::FP_EXTEND:
7549   case ISD::FSIN:
7550     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7551                                             OptForSize, Cost, Depth))
7552       return DAG.getNode(Opcode, DL, VT, NegV);
7553     break;
7554   case ISD::FP_ROUND:
7555     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7556                                             OptForSize, Cost, Depth))
7557       return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7558     break;
7559   case ISD::SELECT:
7560   case ISD::VSELECT: {
7561     // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7562     // iff at least one cost is cheaper and the other is neutral/cheaper
7563     SDValue LHS = Op.getOperand(1);
7564     NegatibleCost CostLHS = NegatibleCost::Expensive;
7565     SDValue NegLHS =
7566         getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7567     if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7568       RemoveDeadNode(NegLHS);
7569       break;
7570     }
7571 
7572     // Prevent this node from being deleted by the next call.
7573     Handles.emplace_back(NegLHS);
7574 
7575     SDValue RHS = Op.getOperand(2);
7576     NegatibleCost CostRHS = NegatibleCost::Expensive;
7577     SDValue NegRHS =
7578         getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7579 
7580     // We're done with the handles.
7581     Handles.clear();
7582 
7583     if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7584         (CostLHS != NegatibleCost::Cheaper &&
7585          CostRHS != NegatibleCost::Cheaper)) {
7586       RemoveDeadNode(NegLHS);
7587       RemoveDeadNode(NegRHS);
7588       break;
7589     }
7590 
7591     Cost = std::min(CostLHS, CostRHS);
7592     return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7593   }
7594   }
7595 
7596   return SDValue();
7597 }
7598 
7599 //===----------------------------------------------------------------------===//
7600 // Legalization Utilities
7601 //===----------------------------------------------------------------------===//
7602 
7603 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7604                                     SDValue LHS, SDValue RHS,
7605                                     SmallVectorImpl<SDValue> &Result,
7606                                     EVT HiLoVT, SelectionDAG &DAG,
7607                                     MulExpansionKind Kind, SDValue LL,
7608                                     SDValue LH, SDValue RL, SDValue RH) const {
7609   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7610          Opcode == ISD::SMUL_LOHI);
7611 
7612   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7613                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
7614   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7615                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
7616   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7617                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
7618   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7619                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
7620 
7621   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7622     return false;
7623 
7624   unsigned OuterBitSize = VT.getScalarSizeInBits();
7625   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7626 
7627   // LL, LH, RL, and RH must be either all NULL or all set to a value.
7628   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7629          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7630 
7631   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7632   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7633                           bool Signed) -> bool {
7634     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7635       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7636       Hi = SDValue(Lo.getNode(), 1);
7637       return true;
7638     }
7639     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7640       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7641       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7642       return true;
7643     }
7644     return false;
7645   };
7646 
7647   SDValue Lo, Hi;
7648 
7649   if (!LL.getNode() && !RL.getNode() &&
7650       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7651     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7652     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7653   }
7654 
7655   if (!LL.getNode())
7656     return false;
7657 
7658   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7659   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7660       DAG.MaskedValueIsZero(RHS, HighMask)) {
7661     // The inputs are both zero-extended.
7662     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7663       Result.push_back(Lo);
7664       Result.push_back(Hi);
7665       if (Opcode != ISD::MUL) {
7666         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7667         Result.push_back(Zero);
7668         Result.push_back(Zero);
7669       }
7670       return true;
7671     }
7672   }
7673 
7674   if (!VT.isVector() && Opcode == ISD::MUL &&
7675       DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7676       DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7677     // The input values are both sign-extended.
7678     // TODO non-MUL case?
7679     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7680       Result.push_back(Lo);
7681       Result.push_back(Hi);
7682       return true;
7683     }
7684   }
7685 
7686   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7687   SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7688 
7689   if (!LH.getNode() && !RH.getNode() &&
7690       isOperationLegalOrCustom(ISD::SRL, VT) &&
7691       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7692     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7693     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7694     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7695     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7696   }
7697 
7698   if (!LH.getNode())
7699     return false;
7700 
7701   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7702     return false;
7703 
7704   Result.push_back(Lo);
7705 
7706   if (Opcode == ISD::MUL) {
7707     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7708     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7709     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7710     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7711     Result.push_back(Hi);
7712     return true;
7713   }
7714 
7715   // Compute the full width result.
7716   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7717     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7718     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7719     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7720     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7721   };
7722 
7723   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7724   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7725     return false;
7726 
7727   // This is effectively the add part of a multiply-add of half-sized operands,
7728   // so it cannot overflow.
7729   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7730 
7731   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7732     return false;
7733 
7734   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7735   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7736 
7737   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7738                   isOperationLegalOrCustom(ISD::ADDE, VT));
7739   if (UseGlue)
7740     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7741                        Merge(Lo, Hi));
7742   else
7743     Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7744                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7745 
7746   SDValue Carry = Next.getValue(1);
7747   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7748   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7749 
7750   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7751     return false;
7752 
7753   if (UseGlue)
7754     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7755                      Carry);
7756   else
7757     Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7758                      Zero, Carry);
7759 
7760   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7761 
7762   if (Opcode == ISD::SMUL_LOHI) {
7763     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7764                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7765     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7766 
7767     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7768                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7769     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7770   }
7771 
7772   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7773   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7774   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7775   return true;
7776 }
7777 
7778 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7779                                SelectionDAG &DAG, MulExpansionKind Kind,
7780                                SDValue LL, SDValue LH, SDValue RL,
7781                                SDValue RH) const {
7782   SmallVector<SDValue, 2> Result;
7783   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7784                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7785                            DAG, Kind, LL, LH, RL, RH);
7786   if (Ok) {
7787     assert(Result.size() == 2);
7788     Lo = Result[0];
7789     Hi = Result[1];
7790   }
7791   return Ok;
7792 }
7793 
7794 // Optimize unsigned division or remainder by constants for types twice as large
7795 // as a legal VT.
7796 //
7797 // If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7798 // can be computed
7799 // as:
7800 //   Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7801 //   Remainder = Sum % Constant
7802 // This is based on "Remainder by Summing Digits" from Hacker's Delight.
7803 //
7804 // For division, we can compute the remainder using the algorithm described
7805 // above, subtract it from the dividend to get an exact multiple of Constant.
7806 // Then multiply that exact multiply by the multiplicative inverse modulo
7807 // (1 << (BitWidth / 2)) to get the quotient.
7808 
7809 // If Constant is even, we can shift right the dividend and the divisor by the
7810 // number of trailing zeros in Constant before applying the remainder algorithm.
7811 // If we're after the quotient, we can subtract this value from the shifted
7812 // dividend and multiply by the multiplicative inverse of the shifted divisor.
7813 // If we want the remainder, we shift the value left by the number of trailing
7814 // zeros and add the bits that were shifted out of the dividend.
7815 bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7816                                             SmallVectorImpl<SDValue> &Result,
7817                                             EVT HiLoVT, SelectionDAG &DAG,
7818                                             SDValue LL, SDValue LH) const {
7819   unsigned Opcode = N->getOpcode();
7820   EVT VT = N->getValueType(0);
7821 
7822   // TODO: Support signed division/remainder.
7823   if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7824     return false;
7825   assert(
7826       (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7827       "Unexpected opcode");
7828 
7829   auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7830   if (!CN)
7831     return false;
7832 
7833   APInt Divisor = CN->getAPIntValue();
7834   unsigned BitWidth = Divisor.getBitWidth();
7835   unsigned HBitWidth = BitWidth / 2;
7836   assert(VT.getScalarSizeInBits() == BitWidth &&
7837          HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7838 
7839   // Divisor needs to less than (1 << HBitWidth).
7840   APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7841   if (Divisor.uge(HalfMaxPlus1))
7842     return false;
7843 
7844   // We depend on the UREM by constant optimization in DAGCombiner that requires
7845   // high multiply.
7846   if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7847       !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
7848     return false;
7849 
7850   // Don't expand if optimizing for size.
7851   if (DAG.shouldOptForSize())
7852     return false;
7853 
7854   // Early out for 0 or 1 divisors.
7855   if (Divisor.ule(1))
7856     return false;
7857 
7858   // If the divisor is even, shift it until it becomes odd.
7859   unsigned TrailingZeros = 0;
7860   if (!Divisor[0]) {
7861     TrailingZeros = Divisor.countr_zero();
7862     Divisor.lshrInPlace(TrailingZeros);
7863   }
7864 
7865   SDLoc dl(N);
7866   SDValue Sum;
7867   SDValue PartialRem;
7868 
7869   // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7870   // then add in the carry.
7871   // TODO: If we can't split it in half, we might be able to split into 3 or
7872   // more pieces using a smaller bit width.
7873   if (HalfMaxPlus1.urem(Divisor).isOne()) {
7874     assert(!LL == !LH && "Expected both input halves or no input halves!");
7875     if (!LL)
7876       std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7877 
7878     // Shift the input by the number of TrailingZeros in the divisor. The
7879     // shifted out bits will be added to the remainder later.
7880     if (TrailingZeros) {
7881       // Save the shifted off bits if we need the remainder.
7882       if (Opcode != ISD::UDIV) {
7883         APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7884         PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7885                                  DAG.getConstant(Mask, dl, HiLoVT));
7886       }
7887 
7888       LL = DAG.getNode(
7889           ISD::OR, dl, HiLoVT,
7890           DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7891                       DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7892           DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7893                       DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7894                                                  HiLoVT, dl)));
7895       LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7896                        DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7897     }
7898 
7899     // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7900     EVT SetCCType =
7901         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7902     if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
7903       SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7904       Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7905       Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7906                         DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7907     } else {
7908       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7909       SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7910       // If the boolean for the target is 0 or 1, we can add the setcc result
7911       // directly.
7912       if (getBooleanContents(HiLoVT) ==
7913           TargetLoweringBase::ZeroOrOneBooleanContent)
7914         Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7915       else
7916         Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7917                               DAG.getConstant(0, dl, HiLoVT));
7918       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7919     }
7920   }
7921 
7922   // If we didn't find a sum, we can't do the expansion.
7923   if (!Sum)
7924     return false;
7925 
7926   // Perform a HiLoVT urem on the Sum using truncated divisor.
7927   SDValue RemL =
7928       DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7929                   DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7930   SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7931 
7932   if (Opcode != ISD::UREM) {
7933     // Subtract the remainder from the shifted dividend.
7934     SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7935     SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7936 
7937     Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7938 
7939     // Multiply by the multiplicative inverse of the divisor modulo
7940     // (1 << BitWidth).
7941     APInt MulFactor = Divisor.multiplicativeInverse();
7942 
7943     SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7944                                    DAG.getConstant(MulFactor, dl, VT));
7945 
7946     // Split the quotient into low and high parts.
7947     SDValue QuotL, QuotH;
7948     std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7949     Result.push_back(QuotL);
7950     Result.push_back(QuotH);
7951   }
7952 
7953   if (Opcode != ISD::UDIV) {
7954     // If we shifted the input, shift the remainder left and add the bits we
7955     // shifted off the input.
7956     if (TrailingZeros) {
7957       APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7958       RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7959                          DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7960       RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7961     }
7962     Result.push_back(RemL);
7963     Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7964   }
7965 
7966   return true;
7967 }
7968 
7969 // Check that (every element of) Z is undef or not an exact multiple of BW.
7970 static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7971   return ISD::matchUnaryPredicate(
7972       Z,
7973       [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7974       true);
7975 }
7976 
7977 static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7978   EVT VT = Node->getValueType(0);
7979   SDValue ShX, ShY;
7980   SDValue ShAmt, InvShAmt;
7981   SDValue X = Node->getOperand(0);
7982   SDValue Y = Node->getOperand(1);
7983   SDValue Z = Node->getOperand(2);
7984   SDValue Mask = Node->getOperand(3);
7985   SDValue VL = Node->getOperand(4);
7986 
7987   unsigned BW = VT.getScalarSizeInBits();
7988   bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7989   SDLoc DL(SDValue(Node, 0));
7990 
7991   EVT ShVT = Z.getValueType();
7992   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7993     // fshl: X << C | Y >> (BW - C)
7994     // fshr: X << (BW - C) | Y >> C
7995     // where C = Z % BW is not zero
7996     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7997     ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7998     InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7999     ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8000                       VL);
8001     ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8002                       VL);
8003   } else {
8004     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8005     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8006     SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8007     if (isPowerOf2_32(BW)) {
8008       // Z % BW -> Z & (BW - 1)
8009       ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8010       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8011       SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8012                                  DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8013       InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8014     } else {
8015       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8016       ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8017       InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8018     }
8019 
8020     SDValue One = DAG.getConstant(1, DL, ShVT);
8021     if (IsFSHL) {
8022       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8023       SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8024       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8025     } else {
8026       SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8027       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8028       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8029     }
8030   }
8031   return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8032 }
8033 
8034 SDValue TargetLowering::expandFunnelShift(SDNode *Node,
8035                                           SelectionDAG &DAG) const {
8036   if (Node->isVPOpcode())
8037     return expandVPFunnelShift(Node, DAG);
8038 
8039   EVT VT = Node->getValueType(0);
8040 
8041   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8042                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
8043                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
8044                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
8045     return SDValue();
8046 
8047   SDValue X = Node->getOperand(0);
8048   SDValue Y = Node->getOperand(1);
8049   SDValue Z = Node->getOperand(2);
8050 
8051   unsigned BW = VT.getScalarSizeInBits();
8052   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8053   SDLoc DL(SDValue(Node, 0));
8054 
8055   EVT ShVT = Z.getValueType();
8056 
8057   // If a funnel shift in the other direction is more supported, use it.
8058   unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8059   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8060       isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8061     if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8062       // fshl X, Y, Z -> fshr X, Y, -Z
8063       // fshr X, Y, Z -> fshl X, Y, -Z
8064       SDValue Zero = DAG.getConstant(0, DL, ShVT);
8065       Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8066     } else {
8067       // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8068       // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8069       SDValue One = DAG.getConstant(1, DL, ShVT);
8070       if (IsFSHL) {
8071         Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8072         X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8073       } else {
8074         X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8075         Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8076       }
8077       Z = DAG.getNOT(DL, Z, ShVT);
8078     }
8079     return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8080   }
8081 
8082   SDValue ShX, ShY;
8083   SDValue ShAmt, InvShAmt;
8084   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8085     // fshl: X << C | Y >> (BW - C)
8086     // fshr: X << (BW - C) | Y >> C
8087     // where C = Z % BW is not zero
8088     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8089     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8090     InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8091     ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8092     ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8093   } else {
8094     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8095     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8096     SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8097     if (isPowerOf2_32(BW)) {
8098       // Z % BW -> Z & (BW - 1)
8099       ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8100       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8101       InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8102     } else {
8103       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8104       ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8105       InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8106     }
8107 
8108     SDValue One = DAG.getConstant(1, DL, ShVT);
8109     if (IsFSHL) {
8110       ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8111       SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8112       ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8113     } else {
8114       SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8115       ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8116       ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8117     }
8118   }
8119   return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8120 }
8121 
8122 // TODO: Merge with expandFunnelShift.
8123 SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8124                                   SelectionDAG &DAG) const {
8125   EVT VT = Node->getValueType(0);
8126   unsigned EltSizeInBits = VT.getScalarSizeInBits();
8127   bool IsLeft = Node->getOpcode() == ISD::ROTL;
8128   SDValue Op0 = Node->getOperand(0);
8129   SDValue Op1 = Node->getOperand(1);
8130   SDLoc DL(SDValue(Node, 0));
8131 
8132   EVT ShVT = Op1.getValueType();
8133   SDValue Zero = DAG.getConstant(0, DL, ShVT);
8134 
8135   // If a rotate in the other direction is more supported, use it.
8136   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8137   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8138       isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8139     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8140     return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8141   }
8142 
8143   if (!AllowVectorOps && VT.isVector() &&
8144       (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8145        !isOperationLegalOrCustom(ISD::SRL, VT) ||
8146        !isOperationLegalOrCustom(ISD::SUB, VT) ||
8147        !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
8148        !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
8149     return SDValue();
8150 
8151   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8152   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8153   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8154   SDValue ShVal;
8155   SDValue HsVal;
8156   if (isPowerOf2_32(EltSizeInBits)) {
8157     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8158     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8159     SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8160     SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8161     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8162     SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8163     HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8164   } else {
8165     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8166     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8167     SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8168     SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8169     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8170     SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8171     SDValue One = DAG.getConstant(1, DL, ShVT);
8172     HsVal =
8173         DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8174   }
8175   return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8176 }
8177 
8178 void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8179                                       SelectionDAG &DAG) const {
8180   assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8181   EVT VT = Node->getValueType(0);
8182   unsigned VTBits = VT.getScalarSizeInBits();
8183   assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8184 
8185   bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8186   bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8187   SDValue ShOpLo = Node->getOperand(0);
8188   SDValue ShOpHi = Node->getOperand(1);
8189   SDValue ShAmt = Node->getOperand(2);
8190   EVT ShAmtVT = ShAmt.getValueType();
8191   EVT ShAmtCCVT =
8192       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8193   SDLoc dl(Node);
8194 
8195   // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8196   // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8197   // away during isel.
8198   SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8199                                   DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8200   SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8201                                      DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8202                        : DAG.getConstant(0, dl, VT);
8203 
8204   SDValue Tmp2, Tmp3;
8205   if (IsSHL) {
8206     Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8207     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8208   } else {
8209     Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8210     Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8211   }
8212 
8213   // If the shift amount is larger or equal than the width of a part we don't
8214   // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8215   // values for large shift amounts.
8216   SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8217                                 DAG.getConstant(VTBits, dl, ShAmtVT));
8218   SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8219                               DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8220 
8221   if (IsSHL) {
8222     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8223     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8224   } else {
8225     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8226     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8227   }
8228 }
8229 
8230 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8231                                       SelectionDAG &DAG) const {
8232   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8233   SDValue Src = Node->getOperand(OpNo);
8234   EVT SrcVT = Src.getValueType();
8235   EVT DstVT = Node->getValueType(0);
8236   SDLoc dl(SDValue(Node, 0));
8237 
8238   // FIXME: Only f32 to i64 conversions are supported.
8239   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8240     return false;
8241 
8242   if (Node->isStrictFPOpcode())
8243     // When a NaN is converted to an integer a trap is allowed. We can't
8244     // use this expansion here because it would eliminate that trap. Other
8245     // traps are also allowed and cannot be eliminated. See
8246     // IEEE 754-2008 sec 5.8.
8247     return false;
8248 
8249   // Expand f32 -> i64 conversion
8250   // This algorithm comes from compiler-rt's implementation of fixsfdi:
8251   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8252   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8253   EVT IntVT = SrcVT.changeTypeToInteger();
8254   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8255 
8256   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8257   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8258   SDValue Bias = DAG.getConstant(127, dl, IntVT);
8259   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8260   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8261   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8262 
8263   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8264 
8265   SDValue ExponentBits = DAG.getNode(
8266       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8267       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8268   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8269 
8270   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8271                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8272                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8273   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8274 
8275   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8276                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8277                           DAG.getConstant(0x00800000, dl, IntVT));
8278 
8279   R = DAG.getZExtOrTrunc(R, dl, DstVT);
8280 
8281   R = DAG.getSelectCC(
8282       dl, Exponent, ExponentLoBit,
8283       DAG.getNode(ISD::SHL, dl, DstVT, R,
8284                   DAG.getZExtOrTrunc(
8285                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8286                       dl, IntShVT)),
8287       DAG.getNode(ISD::SRL, dl, DstVT, R,
8288                   DAG.getZExtOrTrunc(
8289                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8290                       dl, IntShVT)),
8291       ISD::SETGT);
8292 
8293   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8294                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8295 
8296   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8297                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8298   return true;
8299 }
8300 
8301 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8302                                       SDValue &Chain,
8303                                       SelectionDAG &DAG) const {
8304   SDLoc dl(SDValue(Node, 0));
8305   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8306   SDValue Src = Node->getOperand(OpNo);
8307 
8308   EVT SrcVT = Src.getValueType();
8309   EVT DstVT = Node->getValueType(0);
8310   EVT SetCCVT =
8311       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8312   EVT DstSetCCVT =
8313       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8314 
8315   // Only expand vector types if we have the appropriate vector bit operations.
8316   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8317                                                    ISD::FP_TO_SINT;
8318   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8319                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
8320     return false;
8321 
8322   // If the maximum float value is smaller then the signed integer range,
8323   // the destination signmask can't be represented by the float, so we can
8324   // just use FP_TO_SINT directly.
8325   const fltSemantics &APFSem = SrcVT.getFltSemantics();
8326   APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8327   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8328   if (APFloat::opOverflow &
8329       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8330     if (Node->isStrictFPOpcode()) {
8331       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8332                            { Node->getOperand(0), Src });
8333       Chain = Result.getValue(1);
8334     } else
8335       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8336     return true;
8337   }
8338 
8339   // Don't expand it if there isn't cheap fsub instruction.
8340   if (!isOperationLegalOrCustom(
8341           Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8342     return false;
8343 
8344   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8345   SDValue Sel;
8346 
8347   if (Node->isStrictFPOpcode()) {
8348     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8349                        Node->getOperand(0), /*IsSignaling*/ true);
8350     Chain = Sel.getValue(1);
8351   } else {
8352     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8353   }
8354 
8355   bool Strict = Node->isStrictFPOpcode() ||
8356                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8357 
8358   if (Strict) {
8359     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8360     // signmask then offset (the result of which should be fully representable).
8361     // Sel = Src < 0x8000000000000000
8362     // FltOfs = select Sel, 0, 0x8000000000000000
8363     // IntOfs = select Sel, 0, 0x8000000000000000
8364     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8365 
8366     // TODO: Should any fast-math-flags be set for the FSUB?
8367     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8368                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8369     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8370     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8371                                    DAG.getConstant(0, dl, DstVT),
8372                                    DAG.getConstant(SignMask, dl, DstVT));
8373     SDValue SInt;
8374     if (Node->isStrictFPOpcode()) {
8375       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8376                                 { Chain, Src, FltOfs });
8377       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8378                          { Val.getValue(1), Val });
8379       Chain = SInt.getValue(1);
8380     } else {
8381       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8382       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8383     }
8384     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8385   } else {
8386     // Expand based on maximum range of FP_TO_SINT:
8387     // True = fp_to_sint(Src)
8388     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8389     // Result = select (Src < 0x8000000000000000), True, False
8390 
8391     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8392     // TODO: Should any fast-math-flags be set for the FSUB?
8393     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8394                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8395     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8396                         DAG.getConstant(SignMask, dl, DstVT));
8397     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8398     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8399   }
8400   return true;
8401 }
8402 
8403 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8404                                       SDValue &Chain, SelectionDAG &DAG) const {
8405   // This transform is not correct for converting 0 when rounding mode is set
8406   // to round toward negative infinity which will produce -0.0. So disable
8407   // under strictfp.
8408   if (Node->isStrictFPOpcode())
8409     return false;
8410 
8411   SDValue Src = Node->getOperand(0);
8412   EVT SrcVT = Src.getValueType();
8413   EVT DstVT = Node->getValueType(0);
8414 
8415   // If the input is known to be non-negative and SINT_TO_FP is legal then use
8416   // it.
8417   if (Node->getFlags().hasNonNeg() &&
8418       isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT)) {
8419     Result =
8420         DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8421     return true;
8422   }
8423 
8424   if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8425     return false;
8426 
8427   // Only expand vector types if we have the appropriate vector bit
8428   // operations.
8429   if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8430                            !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
8431                            !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
8432                            !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
8433                            !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
8434     return false;
8435 
8436   SDLoc dl(SDValue(Node, 0));
8437 
8438   // Implementation of unsigned i64 to f64 following the algorithm in
8439   // __floatundidf in compiler_rt.  This implementation performs rounding
8440   // correctly in all rounding modes with the exception of converting 0
8441   // when rounding toward negative infinity. In that case the fsub will
8442   // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8443   // incorrect.
8444   SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8445   SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8446       llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8447   SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8448   SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8449   SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8450 
8451   SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8452   SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8453   SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8454   SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8455   SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8456   SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8457   SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8458   Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8459   return true;
8460 }
8461 
8462 SDValue
8463 TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8464                                                SelectionDAG &DAG) const {
8465   unsigned Opcode = Node->getOpcode();
8466   assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8467           Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8468          "Wrong opcode");
8469 
8470   if (Node->getFlags().hasNoNaNs()) {
8471     ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8472     EVT VT = Node->getValueType(0);
8473     if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8474          !isOperationLegalOrCustom(ISD::VSELECT, VT)) &&
8475         VT.isVector())
8476       return SDValue();
8477     SDValue Op1 = Node->getOperand(0);
8478     SDValue Op2 = Node->getOperand(1);
8479     SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8480     // Copy FMF flags, but always set the no-signed-zeros flag
8481     // as this is implied by the FMINNUM/FMAXNUM semantics.
8482     SelCC->setFlags(Node->getFlags() | SDNodeFlags::NoSignedZeros);
8483     return SelCC;
8484   }
8485 
8486   return SDValue();
8487 }
8488 
8489 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8490                                               SelectionDAG &DAG) const {
8491   if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8492     return Expanded;
8493 
8494   EVT VT = Node->getValueType(0);
8495   if (VT.isScalableVector())
8496     report_fatal_error(
8497         "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8498 
8499   SDLoc dl(Node);
8500   unsigned NewOp =
8501       Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8502 
8503   if (isOperationLegalOrCustom(NewOp, VT)) {
8504     SDValue Quiet0 = Node->getOperand(0);
8505     SDValue Quiet1 = Node->getOperand(1);
8506 
8507     if (!Node->getFlags().hasNoNaNs()) {
8508       // Insert canonicalizes if it's possible we need to quiet to get correct
8509       // sNaN behavior.
8510       if (!DAG.isKnownNeverSNaN(Quiet0)) {
8511         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8512                              Node->getFlags());
8513       }
8514       if (!DAG.isKnownNeverSNaN(Quiet1)) {
8515         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8516                              Node->getFlags());
8517       }
8518     }
8519 
8520     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8521   }
8522 
8523   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8524   // instead if there are no NaNs and there can't be an incompatible zero
8525   // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8526   if ((Node->getFlags().hasNoNaNs() ||
8527        (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8528         DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8529       (Node->getFlags().hasNoSignedZeros() ||
8530        DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8531        DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8532     unsigned IEEE2018Op =
8533         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8534     if (isOperationLegalOrCustom(IEEE2018Op, VT))
8535       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8536                          Node->getOperand(1), Node->getFlags());
8537   }
8538 
8539   if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8540     return SelCC;
8541 
8542   return SDValue();
8543 }
8544 
8545 SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8546                                                 SelectionDAG &DAG) const {
8547   if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8548     return Expanded;
8549 
8550   SDLoc DL(N);
8551   SDValue LHS = N->getOperand(0);
8552   SDValue RHS = N->getOperand(1);
8553   unsigned Opc = N->getOpcode();
8554   EVT VT = N->getValueType(0);
8555   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8556   bool IsMax = Opc == ISD::FMAXIMUM;
8557   SDNodeFlags Flags = N->getFlags();
8558 
8559   // First, implement comparison not propagating NaN. If no native fmin or fmax
8560   // available, use plain select with setcc instead.
8561   SDValue MinMax;
8562   unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8563   unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8564 
8565   // FIXME: We should probably define fminnum/fmaxnum variants with correct
8566   // signed zero behavior.
8567   bool MinMaxMustRespectOrderedZero = false;
8568 
8569   if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8570     MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8571     MinMaxMustRespectOrderedZero = true;
8572   } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8573     MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8574   } else {
8575     if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8576       return DAG.UnrollVectorOp(N);
8577 
8578     // NaN (if exists) will be propagated later, so orderness doesn't matter.
8579     SDValue Compare =
8580         DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8581     MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8582   }
8583 
8584   // Propagate any NaN of both operands
8585   if (!N->getFlags().hasNoNaNs() &&
8586       (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8587     ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8588                                         APFloat::getNaN(VT.getFltSemantics()));
8589     MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8590                            DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8591   }
8592 
8593   // fminimum/fmaximum requires -0.0 less than +0.0
8594   if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8595       !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8596     SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8597                                   DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8598     SDValue TestZero =
8599         DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8600     SDValue LCmp = DAG.getSelect(
8601         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8602         MinMax, Flags);
8603     SDValue RCmp = DAG.getSelect(
8604         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8605         LCmp, Flags);
8606     MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8607   }
8608 
8609   return MinMax;
8610 }
8611 
8612 SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
8613                                                       SelectionDAG &DAG) const {
8614   SDLoc DL(Node);
8615   SDValue LHS = Node->getOperand(0);
8616   SDValue RHS = Node->getOperand(1);
8617   unsigned Opc = Node->getOpcode();
8618   EVT VT = Node->getValueType(0);
8619   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8620   bool IsMax = Opc == ISD::FMAXIMUMNUM;
8621   const TargetOptions &Options = DAG.getTarget().Options;
8622   SDNodeFlags Flags = Node->getFlags();
8623 
8624   unsigned NewOp =
8625       Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8626 
8627   if (isOperationLegalOrCustom(NewOp, VT)) {
8628     if (!Flags.hasNoNaNs()) {
8629       // Insert canonicalizes if it's possible we need to quiet to get correct
8630       // sNaN behavior.
8631       if (!DAG.isKnownNeverSNaN(LHS)) {
8632         LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8633       }
8634       if (!DAG.isKnownNeverSNaN(RHS)) {
8635         RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8636       }
8637     }
8638 
8639     return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8640   }
8641 
8642   // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8643   // same behaviors for all of other cases: +0.0 vs -0.0 included.
8644   if (Flags.hasNoNaNs() ||
8645       (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8646     unsigned IEEE2019Op =
8647         Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8648     if (isOperationLegalOrCustom(IEEE2019Op, VT))
8649       return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8650   }
8651 
8652   // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8653   // either one for +0.0 vs -0.0.
8654   if ((Flags.hasNoNaNs() ||
8655        (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8656       (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8657        DAG.isKnownNeverZeroFloat(RHS))) {
8658     unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8659     if (isOperationLegalOrCustom(IEEE2008Op, VT))
8660       return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8661   }
8662 
8663   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8664     return DAG.UnrollVectorOp(Node);
8665 
8666   // If only one operand is NaN, override it with another operand.
8667   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8668     LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8669   }
8670   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8671     RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8672   }
8673 
8674   SDValue MinMax =
8675       DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8676   // If MinMax is NaN, let's quiet it.
8677   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
8678       !DAG.isKnownNeverNaN(RHS)) {
8679     MinMax = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
8680   }
8681 
8682   // Fixup signed zero behavior.
8683   if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8684       DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
8685     return MinMax;
8686   }
8687   SDValue TestZero =
8688       DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8689   SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8690                                 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8691   SDValue LCmp = DAG.getSelect(
8692       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8693       MinMax, Flags);
8694   SDValue RCmp = DAG.getSelect(
8695       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8696       Flags);
8697   return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8698 }
8699 
8700 /// Returns a true value if if this FPClassTest can be performed with an ordered
8701 /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8702 /// std::nullopt if it cannot be performed as a compare with 0.
8703 static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8704                                            const fltSemantics &Semantics,
8705                                            const MachineFunction &MF) {
8706   FPClassTest OrderedMask = Test & ~fcNan;
8707   FPClassTest NanTest = Test & fcNan;
8708   bool IsOrdered = NanTest == fcNone;
8709   bool IsUnordered = NanTest == fcNan;
8710 
8711   // Skip cases that are testing for only a qnan or snan.
8712   if (!IsOrdered && !IsUnordered)
8713     return std::nullopt;
8714 
8715   if (OrderedMask == fcZero &&
8716       MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8717     return IsOrdered;
8718   if (OrderedMask == (fcZero | fcSubnormal) &&
8719       MF.getDenormalMode(Semantics).inputsAreZero())
8720     return IsOrdered;
8721   return std::nullopt;
8722 }
8723 
8724 SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8725                                          const FPClassTest OrigTestMask,
8726                                          SDNodeFlags Flags, const SDLoc &DL,
8727                                          SelectionDAG &DAG) const {
8728   EVT OperandVT = Op.getValueType();
8729   assert(OperandVT.isFloatingPoint());
8730   FPClassTest Test = OrigTestMask;
8731 
8732   // Degenerated cases.
8733   if (Test == fcNone)
8734     return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8735   if (Test == fcAllFlags)
8736     return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8737 
8738   // PPC double double is a pair of doubles, of which the higher part determines
8739   // the value class.
8740   if (OperandVT == MVT::ppcf128) {
8741     Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8742                      DAG.getConstant(1, DL, MVT::i32));
8743     OperandVT = MVT::f64;
8744   }
8745 
8746   // Floating-point type properties.
8747   EVT ScalarFloatVT = OperandVT.getScalarType();
8748   const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8749   const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8750   bool IsF80 = (ScalarFloatVT == MVT::f80);
8751 
8752   // Some checks can be implemented using float comparisons, if floating point
8753   // exceptions are ignored.
8754   if (Flags.hasNoFPExcept() &&
8755       isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8756     FPClassTest FPTestMask = Test;
8757     bool IsInvertedFP = false;
8758 
8759     if (FPClassTest InvertedFPCheck =
8760             invertFPClassTestIfSimpler(FPTestMask, true)) {
8761       FPTestMask = InvertedFPCheck;
8762       IsInvertedFP = true;
8763     }
8764 
8765     ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8766     ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8767 
8768     // See if we can fold an | fcNan into an unordered compare.
8769     FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8770 
8771     // Can't fold the ordered check if we're only testing for snan or qnan
8772     // individually.
8773     if ((FPTestMask & fcNan) != fcNan)
8774       OrderedFPTestMask = FPTestMask;
8775 
8776     const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8777 
8778     if (std::optional<bool> IsCmp0 =
8779             isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8780         IsCmp0 && (isCondCodeLegalOrCustom(
8781                       *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8782                       OperandVT.getScalarType().getSimpleVT()))) {
8783 
8784       // If denormals could be implicitly treated as 0, this is not equivalent
8785       // to a compare with 0 since it will also be true for denormals.
8786       return DAG.getSetCC(DL, ResultVT, Op,
8787                           DAG.getConstantFP(0.0, DL, OperandVT),
8788                           *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8789     }
8790 
8791     if (FPTestMask == fcNan &&
8792         isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
8793                                 OperandVT.getScalarType().getSimpleVT()))
8794       return DAG.getSetCC(DL, ResultVT, Op, Op,
8795                           IsInvertedFP ? ISD::SETO : ISD::SETUO);
8796 
8797     bool IsOrderedInf = FPTestMask == fcInf;
8798     if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8799         isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8800                                              : UnorderedCmpOpcode,
8801                                 OperandVT.getScalarType().getSimpleVT()) &&
8802         isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8803         (isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) ||
8804          (OperandVT.isVector() &&
8805           isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) {
8806       // isinf(x) --> fabs(x) == inf
8807       SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8808       SDValue Inf =
8809           DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8810       return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8811                           IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8812     }
8813 
8814     if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8815         isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8816                                           : UnorderedCmpOpcode,
8817                                 OperandVT.getSimpleVT())) {
8818       // isposinf(x) --> x == inf
8819       // isneginf(x) --> x == -inf
8820       // isposinf(x) || nan --> x u== inf
8821       // isneginf(x) || nan --> x u== -inf
8822 
8823       SDValue Inf = DAG.getConstantFP(
8824           APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
8825           OperandVT);
8826       return DAG.getSetCC(DL, ResultVT, Op, Inf,
8827                           IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8828     }
8829 
8830     if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8831       // TODO: Could handle ordered case, but it produces worse code for
8832       // x86. Maybe handle ordered if fabs is free?
8833 
8834       ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8835       ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8836 
8837       if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8838                                   OperandVT.getScalarType().getSimpleVT())) {
8839         // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8840 
8841         // TODO: Maybe only makes sense if fabs is free. Integer test of
8842         // exponent bits seems better for x86.
8843         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8844         SDValue SmallestNormal = DAG.getConstantFP(
8845             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8846         return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8847                             IsOrdered ? OrderedOp : UnorderedOp);
8848       }
8849     }
8850 
8851     if (FPTestMask == fcNormal) {
8852       // TODO: Handle unordered
8853       ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8854       ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
8855 
8856       if (isCondCodeLegalOrCustom(IsFiniteOp,
8857                                   OperandVT.getScalarType().getSimpleVT()) &&
8858           isCondCodeLegalOrCustom(IsNormalOp,
8859                                   OperandVT.getScalarType().getSimpleVT()) &&
8860           isFAbsFree(OperandVT)) {
8861         // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
8862         SDValue Inf =
8863             DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8864         SDValue SmallestNormal = DAG.getConstantFP(
8865             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8866 
8867         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8868         SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
8869         SDValue IsNormal =
8870             DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
8871         unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
8872         return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
8873       }
8874     }
8875   }
8876 
8877   // Some checks may be represented as inversion of simpler check, for example
8878   // "inf|normal|subnormal|zero" => !"nan".
8879   bool IsInverted = false;
8880 
8881   if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8882     Test = InvertedCheck;
8883     IsInverted = true;
8884   }
8885 
8886   // In the general case use integer operations.
8887   unsigned BitSize = OperandVT.getScalarSizeInBits();
8888   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8889   if (OperandVT.isVector())
8890     IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8891                              OperandVT.getVectorElementCount());
8892   SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8893 
8894   // Various masks.
8895   APInt SignBit = APInt::getSignMask(BitSize);
8896   APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
8897   APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8898   const unsigned ExplicitIntBitInF80 = 63;
8899   APInt ExpMask = Inf;
8900   if (IsF80)
8901     ExpMask.clearBit(ExplicitIntBitInF80);
8902   APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8903   APInt QNaNBitMask =
8904       APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8905   APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8906 
8907   SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8908   SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8909   SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8910   SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8911   SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8912   SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8913 
8914   SDValue Res;
8915   const auto appendResult = [&](SDValue PartialRes) {
8916     if (PartialRes) {
8917       if (Res)
8918         Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8919       else
8920         Res = PartialRes;
8921     }
8922   };
8923 
8924   SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8925   const auto getIntBitIsSet = [&]() -> SDValue {
8926     if (!IntBitIsSetV) {
8927       APInt IntBitMask(BitSize, 0);
8928       IntBitMask.setBit(ExplicitIntBitInF80);
8929       SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8930       SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8931       IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8932     }
8933     return IntBitIsSetV;
8934   };
8935 
8936   // Split the value into sign bit and absolute value.
8937   SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8938   SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8939                                DAG.getConstant(0, DL, IntVT), ISD::SETLT);
8940 
8941   // Tests that involve more than one class should be processed first.
8942   SDValue PartialRes;
8943 
8944   if (IsF80)
8945     ; // Detect finite numbers of f80 by checking individual classes because
8946       // they have different settings of the explicit integer bit.
8947   else if ((Test & fcFinite) == fcFinite) {
8948     // finite(V) ==> abs(V) < exp_mask
8949     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8950     Test &= ~fcFinite;
8951   } else if ((Test & fcFinite) == fcPosFinite) {
8952     // finite(V) && V > 0 ==> V < exp_mask
8953     PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8954     Test &= ~fcPosFinite;
8955   } else if ((Test & fcFinite) == fcNegFinite) {
8956     // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8957     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8958     PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8959     Test &= ~fcNegFinite;
8960   }
8961   appendResult(PartialRes);
8962 
8963   if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8964     // fcZero | fcSubnormal => test all exponent bits are 0
8965     // TODO: Handle sign bit specific cases
8966     if (PartialCheck == (fcZero | fcSubnormal)) {
8967       SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8968       SDValue ExpIsZero =
8969           DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8970       appendResult(ExpIsZero);
8971       Test &= ~PartialCheck & fcAllFlags;
8972     }
8973   }
8974 
8975   // Check for individual classes.
8976 
8977   if (unsigned PartialCheck = Test & fcZero) {
8978     if (PartialCheck == fcPosZero)
8979       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8980     else if (PartialCheck == fcZero)
8981       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8982     else // ISD::fcNegZero
8983       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8984     appendResult(PartialRes);
8985   }
8986 
8987   if (unsigned PartialCheck = Test & fcSubnormal) {
8988     // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8989     // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8990     SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8991     SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8992     SDValue VMinusOneV =
8993         DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8994     PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8995     if (PartialCheck == fcNegSubnormal)
8996       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8997     appendResult(PartialRes);
8998   }
8999 
9000   if (unsigned PartialCheck = Test & fcInf) {
9001     if (PartialCheck == fcPosInf)
9002       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9003     else if (PartialCheck == fcInf)
9004       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9005     else { // ISD::fcNegInf
9006       APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9007       SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9008       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9009     }
9010     appendResult(PartialRes);
9011   }
9012 
9013   if (unsigned PartialCheck = Test & fcNan) {
9014     APInt InfWithQnanBit = Inf | QNaNBitMask;
9015     SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9016     if (PartialCheck == fcNan) {
9017       // isnan(V) ==> abs(V) > int(inf)
9018       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9019       if (IsF80) {
9020         // Recognize unsupported values as NaNs for compatibility with glibc.
9021         // In them (exp(V)==0) == int_bit.
9022         SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9023         SDValue ExpIsZero =
9024             DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9025         SDValue IsPseudo =
9026             DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9027         PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9028       }
9029     } else if (PartialCheck == fcQNan) {
9030       // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9031       PartialRes =
9032           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9033     } else { // ISD::fcSNan
9034       // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9035       //                    abs(V) < (unsigned(Inf) | quiet_bit)
9036       SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9037       SDValue IsNotQnan =
9038           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9039       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9040     }
9041     appendResult(PartialRes);
9042   }
9043 
9044   if (unsigned PartialCheck = Test & fcNormal) {
9045     // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9046     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9047     SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9048     SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9049     APInt ExpLimit = ExpMask - ExpLSB;
9050     SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9051     PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9052     if (PartialCheck == fcNegNormal)
9053       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9054     else if (PartialCheck == fcPosNormal) {
9055       SDValue PosSignV =
9056           DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
9057       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9058     }
9059     if (IsF80)
9060       PartialRes =
9061           DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9062     appendResult(PartialRes);
9063   }
9064 
9065   if (!Res)
9066     return DAG.getConstant(IsInverted, DL, ResultVT);
9067   if (IsInverted)
9068     Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
9069   return Res;
9070 }
9071 
9072 // Only expand vector types if we have the appropriate vector bit operations.
9073 static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9074   assert(VT.isVector() && "Expected vector type");
9075   unsigned Len = VT.getScalarSizeInBits();
9076   return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9077          TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
9078          TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
9079          (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9080          TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
9081 }
9082 
9083 SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9084   SDLoc dl(Node);
9085   EVT VT = Node->getValueType(0);
9086   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9087   SDValue Op = Node->getOperand(0);
9088   unsigned Len = VT.getScalarSizeInBits();
9089   assert(VT.isInteger() && "CTPOP not implemented for this type.");
9090 
9091   // TODO: Add support for irregular type lengths.
9092   if (!(Len <= 128 && Len % 8 == 0))
9093     return SDValue();
9094 
9095   // Only expand vector types if we have the appropriate vector bit operations.
9096   if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9097     return SDValue();
9098 
9099   // This is the "best" algorithm from
9100   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9101   SDValue Mask55 =
9102       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9103   SDValue Mask33 =
9104       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9105   SDValue Mask0F =
9106       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9107 
9108   // v = v - ((v >> 1) & 0x55555555...)
9109   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9110                    DAG.getNode(ISD::AND, dl, VT,
9111                                DAG.getNode(ISD::SRL, dl, VT, Op,
9112                                            DAG.getConstant(1, dl, ShVT)),
9113                                Mask55));
9114   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9115   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9116                    DAG.getNode(ISD::AND, dl, VT,
9117                                DAG.getNode(ISD::SRL, dl, VT, Op,
9118                                            DAG.getConstant(2, dl, ShVT)),
9119                                Mask33));
9120   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9121   Op = DAG.getNode(ISD::AND, dl, VT,
9122                    DAG.getNode(ISD::ADD, dl, VT, Op,
9123                                DAG.getNode(ISD::SRL, dl, VT, Op,
9124                                            DAG.getConstant(4, dl, ShVT))),
9125                    Mask0F);
9126 
9127   if (Len <= 8)
9128     return Op;
9129 
9130   // Avoid the multiply if we only have 2 bytes to add.
9131   // TODO: Only doing this for scalars because vectors weren't as obviously
9132   // improved.
9133   if (Len == 16 && !VT.isVector()) {
9134     // v = (v + (v >> 8)) & 0x00FF;
9135     return DAG.getNode(ISD::AND, dl, VT,
9136                      DAG.getNode(ISD::ADD, dl, VT, Op,
9137                                  DAG.getNode(ISD::SRL, dl, VT, Op,
9138                                              DAG.getConstant(8, dl, ShVT))),
9139                      DAG.getConstant(0xFF, dl, VT));
9140   }
9141 
9142   // v = (v * 0x01010101...) >> (Len - 8)
9143   SDValue V;
9144   if (isOperationLegalOrCustomOrPromote(
9145           ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9146     SDValue Mask01 =
9147         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9148     V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9149   } else {
9150     V = Op;
9151     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9152       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9153       V = DAG.getNode(ISD::ADD, dl, VT, V,
9154                       DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9155     }
9156   }
9157   return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9158 }
9159 
9160 SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9161   SDLoc dl(Node);
9162   EVT VT = Node->getValueType(0);
9163   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9164   SDValue Op = Node->getOperand(0);
9165   SDValue Mask = Node->getOperand(1);
9166   SDValue VL = Node->getOperand(2);
9167   unsigned Len = VT.getScalarSizeInBits();
9168   assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9169 
9170   // TODO: Add support for irregular type lengths.
9171   if (!(Len <= 128 && Len % 8 == 0))
9172     return SDValue();
9173 
9174   // This is same algorithm of expandCTPOP from
9175   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9176   SDValue Mask55 =
9177       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9178   SDValue Mask33 =
9179       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9180   SDValue Mask0F =
9181       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9182 
9183   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9184 
9185   // v = v - ((v >> 1) & 0x55555555...)
9186   Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9187                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9188                                  DAG.getConstant(1, dl, ShVT), Mask, VL),
9189                      Mask55, Mask, VL);
9190   Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9191 
9192   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9193   Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9194   Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9195                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9196                                  DAG.getConstant(2, dl, ShVT), Mask, VL),
9197                      Mask33, Mask, VL);
9198   Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9199 
9200   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9201   Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9202                      Mask, VL),
9203   Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9204   Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9205 
9206   if (Len <= 8)
9207     return Op;
9208 
9209   // v = (v * 0x01010101...) >> (Len - 8)
9210   SDValue V;
9211   if (isOperationLegalOrCustomOrPromote(
9212           ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9213     SDValue Mask01 =
9214         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9215     V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9216   } else {
9217     V = Op;
9218     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9219       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9220       V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9221                       DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9222                       Mask, VL);
9223     }
9224   }
9225   return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9226                      Mask, VL);
9227 }
9228 
9229 SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9230   SDLoc dl(Node);
9231   EVT VT = Node->getValueType(0);
9232   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9233   SDValue Op = Node->getOperand(0);
9234   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9235 
9236   // If the non-ZERO_UNDEF version is supported we can use that instead.
9237   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9238       isOperationLegalOrCustom(ISD::CTLZ, VT))
9239     return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9240 
9241   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9242   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
9243     EVT SetCCVT =
9244         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9245     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9246     SDValue Zero = DAG.getConstant(0, dl, VT);
9247     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9248     return DAG.getSelect(dl, VT, SrcIsZero,
9249                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9250   }
9251 
9252   // Only expand vector types if we have the appropriate vector bit operations.
9253   // This includes the operations needed to expand CTPOP if it isn't supported.
9254   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9255                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9256                          !canExpandVectorCTPOP(*this, VT)) ||
9257                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
9258                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
9259     return SDValue();
9260 
9261   // for now, we do this:
9262   // x = x | (x >> 1);
9263   // x = x | (x >> 2);
9264   // ...
9265   // x = x | (x >>16);
9266   // x = x | (x >>32); // for 64-bit input
9267   // return popcount(~x);
9268   //
9269   // Ref: "Hacker's Delight" by Henry Warren
9270   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9271     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9272     Op = DAG.getNode(ISD::OR, dl, VT, Op,
9273                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9274   }
9275   Op = DAG.getNOT(dl, Op, VT);
9276   return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9277 }
9278 
9279 SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9280   SDLoc dl(Node);
9281   EVT VT = Node->getValueType(0);
9282   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9283   SDValue Op = Node->getOperand(0);
9284   SDValue Mask = Node->getOperand(1);
9285   SDValue VL = Node->getOperand(2);
9286   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9287 
9288   // do this:
9289   // x = x | (x >> 1);
9290   // x = x | (x >> 2);
9291   // ...
9292   // x = x | (x >>16);
9293   // x = x | (x >>32); // for 64-bit input
9294   // return popcount(~x);
9295   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9296     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9297     Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9298                      DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9299                      VL);
9300   }
9301   Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9302                    Mask, VL);
9303   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9304 }
9305 
9306 SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9307                                         const SDLoc &DL, EVT VT, SDValue Op,
9308                                         unsigned BitWidth) const {
9309   if (BitWidth != 32 && BitWidth != 64)
9310     return SDValue();
9311   APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9312                                   : APInt(64, 0x0218A392CD3D5DBFULL);
9313   const DataLayout &TD = DAG.getDataLayout();
9314   MachinePointerInfo PtrInfo =
9315       MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
9316   unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9317   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9318   SDValue Lookup = DAG.getNode(
9319       ISD::SRL, DL, VT,
9320       DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9321                   DAG.getConstant(DeBruijn, DL, VT)),
9322       DAG.getConstant(ShiftAmt, DL, VT));
9323   Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
9324 
9325   SmallVector<uint8_t> Table(BitWidth, 0);
9326   for (unsigned i = 0; i < BitWidth; i++) {
9327     APInt Shl = DeBruijn.shl(i);
9328     APInt Lshr = Shl.lshr(ShiftAmt);
9329     Table[Lshr.getZExtValue()] = i;
9330   }
9331 
9332   // Create a ConstantArray in Constant Pool
9333   auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9334   SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9335                                       TD.getPrefTypeAlign(CA->getType()));
9336   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9337                                    DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9338                                    PtrInfo, MVT::i8);
9339   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9340     return ExtLoad;
9341 
9342   EVT SetCCVT =
9343       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9344   SDValue Zero = DAG.getConstant(0, DL, VT);
9345   SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9346   return DAG.getSelect(DL, VT, SrcIsZero,
9347                        DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9348 }
9349 
9350 SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9351   SDLoc dl(Node);
9352   EVT VT = Node->getValueType(0);
9353   SDValue Op = Node->getOperand(0);
9354   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9355 
9356   // If the non-ZERO_UNDEF version is supported we can use that instead.
9357   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9358       isOperationLegalOrCustom(ISD::CTTZ, VT))
9359     return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9360 
9361   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9362   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
9363     EVT SetCCVT =
9364         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9365     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9366     SDValue Zero = DAG.getConstant(0, dl, VT);
9367     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9368     return DAG.getSelect(dl, VT, SrcIsZero,
9369                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9370   }
9371 
9372   // Only expand vector types if we have the appropriate vector bit operations.
9373   // This includes the operations needed to expand CTPOP if it isn't supported.
9374   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9375                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9376                          !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
9377                          !canExpandVectorCTPOP(*this, VT)) ||
9378                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
9379                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
9380                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9381     return SDValue();
9382 
9383   // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9384   if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9385       !isOperationLegal(ISD::CTLZ, VT))
9386     if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9387       return V;
9388 
9389   // for now, we use: { return popcount(~x & (x - 1)); }
9390   // unless the target has ctlz but not ctpop, in which case we use:
9391   // { return 32 - nlz(~x & (x-1)); }
9392   // Ref: "Hacker's Delight" by Henry Warren
9393   SDValue Tmp = DAG.getNode(
9394       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9395       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9396 
9397   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9398   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
9399     return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9400                        DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9401   }
9402 
9403   return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9404 }
9405 
9406 SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9407   SDValue Op = Node->getOperand(0);
9408   SDValue Mask = Node->getOperand(1);
9409   SDValue VL = Node->getOperand(2);
9410   SDLoc dl(Node);
9411   EVT VT = Node->getValueType(0);
9412 
9413   // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9414   SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9415                             DAG.getAllOnesConstant(dl, VT), Mask, VL);
9416   SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9417                                  DAG.getConstant(1, dl, VT), Mask, VL);
9418   SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9419   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9420 }
9421 
9422 SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9423                                              SelectionDAG &DAG) const {
9424   // %cond = to_bool_vec %source
9425   // %splat = splat /*val=*/VL
9426   // %tz = step_vector
9427   // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9428   // %r = vp.reduce.umin %v
9429   SDLoc DL(N);
9430   SDValue Source = N->getOperand(0);
9431   SDValue Mask = N->getOperand(1);
9432   SDValue EVL = N->getOperand(2);
9433   EVT SrcVT = Source.getValueType();
9434   EVT ResVT = N->getValueType(0);
9435   EVT ResVecVT =
9436       EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9437 
9438   // Convert to boolean vector.
9439   if (SrcVT.getScalarType() != MVT::i1) {
9440     SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9441     SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9442                              SrcVT.getVectorElementCount());
9443     Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9444                          DAG.getCondCode(ISD::SETNE), Mask, EVL);
9445   }
9446 
9447   SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9448   SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9449   SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9450   SDValue Select =
9451       DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9452   return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9453 }
9454 
9455 SDValue TargetLowering::expandVectorFindLastActive(SDNode *N,
9456                                                    SelectionDAG &DAG) const {
9457   SDLoc DL(N);
9458   SDValue Mask = N->getOperand(0);
9459   EVT MaskVT = Mask.getValueType();
9460   EVT BoolVT = MaskVT.getScalarType();
9461 
9462   // Find a suitable type for a stepvector.
9463   ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9464   if (MaskVT.isScalableVector())
9465     VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9466   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9467   unsigned EltWidth = TLI.getBitWidthForCttzElements(
9468       BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9469       /*ZeroIsPoison=*/true, &VScaleRange);
9470   EVT StepVT = MVT::getIntegerVT(EltWidth);
9471   EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);
9472 
9473   // If promotion is required to make the type legal, do it here; promotion
9474   // of integers within LegalizeVectorOps is looking for types of the same
9475   // size but with a smaller number of larger elements, not the usual larger
9476   // size with the same number of larger elements.
9477   if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==
9478       TargetLowering::TypePromoteInteger) {
9479     StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9480     StepVT = StepVecVT.getVectorElementType();
9481   }
9482 
9483   // Zero out lanes with inactive elements, then find the highest remaining
9484   // value from the stepvector.
9485   SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9486   SDValue StepVec = DAG.getStepVector(DL, StepVecVT);
9487   SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9488   SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9489   return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9490 }
9491 
9492 SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9493                                   bool IsNegative) const {
9494   SDLoc dl(N);
9495   EVT VT = N->getValueType(0);
9496   SDValue Op = N->getOperand(0);
9497 
9498   // abs(x) -> smax(x,sub(0,x))
9499   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9500       isOperationLegal(ISD::SMAX, VT)) {
9501     SDValue Zero = DAG.getConstant(0, dl, VT);
9502     Op = DAG.getFreeze(Op);
9503     return DAG.getNode(ISD::SMAX, dl, VT, Op,
9504                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9505   }
9506 
9507   // abs(x) -> umin(x,sub(0,x))
9508   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9509       isOperationLegal(ISD::UMIN, VT)) {
9510     SDValue Zero = DAG.getConstant(0, dl, VT);
9511     Op = DAG.getFreeze(Op);
9512     return DAG.getNode(ISD::UMIN, dl, VT, Op,
9513                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9514   }
9515 
9516   // 0 - abs(x) -> smin(x, sub(0,x))
9517   if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9518       isOperationLegal(ISD::SMIN, VT)) {
9519     SDValue Zero = DAG.getConstant(0, dl, VT);
9520     Op = DAG.getFreeze(Op);
9521     return DAG.getNode(ISD::SMIN, dl, VT, Op,
9522                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9523   }
9524 
9525   // Only expand vector types if we have the appropriate vector operations.
9526   if (VT.isVector() &&
9527       (!isOperationLegalOrCustom(ISD::SRA, VT) ||
9528        (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9529        (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9530        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9531     return SDValue();
9532 
9533   Op = DAG.getFreeze(Op);
9534   SDValue Shift = DAG.getNode(
9535       ISD::SRA, dl, VT, Op,
9536       DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9537   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9538 
9539   // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9540   if (!IsNegative)
9541     return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9542 
9543   // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9544   return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9545 }
9546 
9547 SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9548   SDLoc dl(N);
9549   EVT VT = N->getValueType(0);
9550   SDValue LHS = DAG.getFreeze(N->getOperand(0));
9551   SDValue RHS = DAG.getFreeze(N->getOperand(1));
9552   bool IsSigned = N->getOpcode() == ISD::ABDS;
9553 
9554   // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9555   // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9556   unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9557   unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9558   if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9559     SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9560     SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9561     return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9562   }
9563 
9564   // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9565   if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9566     return DAG.getNode(ISD::OR, dl, VT,
9567                        DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9568                        DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9569 
9570   // If the subtract doesn't overflow then just use abs(sub())
9571   // NOTE: don't use frozen operands for value tracking.
9572   bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9573                        DAG.SignBitIsZero(N->getOperand(0));
9574 
9575   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
9576                              N->getOperand(1)))
9577     return DAG.getNode(ISD::ABS, dl, VT,
9578                        DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9579 
9580   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
9581                              N->getOperand(0)))
9582     return DAG.getNode(ISD::ABS, dl, VT,
9583                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9584 
9585   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9586   ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9587   SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9588 
9589   // Branchless expansion iff cmp result is allbits:
9590   // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9591   // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9592   if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9593     SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9594     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9595     return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9596   }
9597 
9598   // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9599   // flag if the (scalar) type is illegal as this is more likely to legalize
9600   // cleanly:
9601   // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9602   if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9603     SDValue USubO =
9604         DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9605     SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9606     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9607     return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9608   }
9609 
9610   // FIXME: Should really try to split the vector in case it's legal on a
9611   // subvector.
9612   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9613     return DAG.UnrollVectorOp(N);
9614 
9615   // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9616   // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9617   return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9618                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9619 }
9620 
9621 SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
9622   SDLoc dl(N);
9623   EVT VT = N->getValueType(0);
9624   SDValue LHS = N->getOperand(0);
9625   SDValue RHS = N->getOperand(1);
9626 
9627   unsigned Opc = N->getOpcode();
9628   bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9629   bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9630   unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9631   unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9632   unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9633   unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9634   assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9635           Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9636          "Unknown AVG node");
9637 
9638   // If the operands are already extended, we can add+shift.
9639   bool IsExt =
9640       (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9641        DAG.ComputeNumSignBits(RHS) >= 2) ||
9642       (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9643        DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9644   if (IsExt) {
9645     SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9646     if (!IsFloor)
9647       Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9648     return DAG.getNode(ShiftOpc, dl, VT, Sum,
9649                        DAG.getShiftAmountConstant(1, VT, dl));
9650   }
9651 
9652   // For scalars, see if we can efficiently extend/truncate to use add+shift.
9653   if (VT.isScalarInteger()) {
9654     unsigned BW = VT.getScalarSizeInBits();
9655     EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9656     if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9657       LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9658       RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9659       SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9660       if (!IsFloor)
9661         Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9662                           DAG.getConstant(1, dl, ExtVT));
9663       // Just use SRL as we will be truncating away the extended sign bits.
9664       Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9665                         DAG.getShiftAmountConstant(1, ExtVT, dl));
9666       return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9667     }
9668   }
9669 
9670   // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9671   if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9672     SDValue UAddWithOverflow =
9673         DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9674 
9675     SDValue Sum = UAddWithOverflow.getValue(0);
9676     SDValue Overflow = UAddWithOverflow.getValue(1);
9677 
9678     // Right shift the sum by 1
9679     SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9680                                   DAG.getShiftAmountConstant(1, VT, dl));
9681 
9682     SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9683     SDValue OverflowShl = DAG.getNode(
9684         ISD::SHL, dl, VT, ZeroExtOverflow,
9685         DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9686 
9687     return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9688   }
9689 
9690   // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9691   // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9692   // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9693   // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9694   LHS = DAG.getFreeze(LHS);
9695   RHS = DAG.getFreeze(RHS);
9696   SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9697   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9698   SDValue Shift =
9699       DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9700   return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9701 }
9702 
9703 SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9704   SDLoc dl(N);
9705   EVT VT = N->getValueType(0);
9706   SDValue Op = N->getOperand(0);
9707 
9708   if (!VT.isSimple())
9709     return SDValue();
9710 
9711   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9712   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9713   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9714   default:
9715     return SDValue();
9716   case MVT::i16:
9717     // Use a rotate by 8. This can be further expanded if necessary.
9718     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9719   case MVT::i32:
9720     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9721     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9722                        DAG.getConstant(0xFF00, dl, VT));
9723     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9724     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9725     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9726     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9727     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9728     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9729     return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9730   case MVT::i64:
9731     Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9732     Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9733                        DAG.getConstant(255ULL<<8, dl, VT));
9734     Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9735     Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9736                        DAG.getConstant(255ULL<<16, dl, VT));
9737     Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9738     Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9739                        DAG.getConstant(255ULL<<24, dl, VT));
9740     Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9741     Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9742     Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9743                        DAG.getConstant(255ULL<<24, dl, VT));
9744     Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9745     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9746                        DAG.getConstant(255ULL<<16, dl, VT));
9747     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9748     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9749                        DAG.getConstant(255ULL<<8, dl, VT));
9750     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9751     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9752     Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9753     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9754     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9755     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9756     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9757     return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9758   }
9759 }
9760 
9761 SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9762   SDLoc dl(N);
9763   EVT VT = N->getValueType(0);
9764   SDValue Op = N->getOperand(0);
9765   SDValue Mask = N->getOperand(1);
9766   SDValue EVL = N->getOperand(2);
9767 
9768   if (!VT.isSimple())
9769     return SDValue();
9770 
9771   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9772   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9773   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9774   default:
9775     return SDValue();
9776   case MVT::i16:
9777     Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9778                        Mask, EVL);
9779     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9780                        Mask, EVL);
9781     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9782   case MVT::i32:
9783     Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9784                        Mask, EVL);
9785     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9786                        Mask, EVL);
9787     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9788                        Mask, EVL);
9789     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9790                        Mask, EVL);
9791     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9792                        DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9793     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9794                        Mask, EVL);
9795     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9796     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9797     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9798   case MVT::i64:
9799     Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9800                        Mask, EVL);
9801     Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9802                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9803     Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9804                        Mask, EVL);
9805     Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9806                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9807     Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9808                        Mask, EVL);
9809     Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9810                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9811     Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9812                        Mask, EVL);
9813     Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9814                        Mask, EVL);
9815     Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9816                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9817     Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9818                        Mask, EVL);
9819     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9820                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9821     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9822                        Mask, EVL);
9823     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9824                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9825     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9826                        Mask, EVL);
9827     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9828     Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9829     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9830     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9831     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9832     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9833     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9834   }
9835 }
9836 
9837 SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9838   SDLoc dl(N);
9839   EVT VT = N->getValueType(0);
9840   SDValue Op = N->getOperand(0);
9841   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9842   unsigned Sz = VT.getScalarSizeInBits();
9843 
9844   SDValue Tmp, Tmp2, Tmp3;
9845 
9846   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9847   // and finally the i1 pairs.
9848   // TODO: We can easily support i4/i2 legal types if any target ever does.
9849   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9850     // Create the masks - repeating the pattern every byte.
9851     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9852     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9853     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9854 
9855     // BSWAP if the type is wider than a single byte.
9856     Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9857 
9858     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9859     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9860     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9861     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9862     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9863     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9864 
9865     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9866     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9867     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9868     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9869     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9870     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9871 
9872     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9873     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9874     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9875     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9876     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9877     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9878     return Tmp;
9879   }
9880 
9881   Tmp = DAG.getConstant(0, dl, VT);
9882   for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9883     if (I < J)
9884       Tmp2 =
9885           DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9886     else
9887       Tmp2 =
9888           DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9889 
9890     APInt Shift = APInt::getOneBitSet(Sz, J);
9891     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9892     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9893   }
9894 
9895   return Tmp;
9896 }
9897 
9898 SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9899   assert(N->getOpcode() == ISD::VP_BITREVERSE);
9900 
9901   SDLoc dl(N);
9902   EVT VT = N->getValueType(0);
9903   SDValue Op = N->getOperand(0);
9904   SDValue Mask = N->getOperand(1);
9905   SDValue EVL = N->getOperand(2);
9906   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9907   unsigned Sz = VT.getScalarSizeInBits();
9908 
9909   SDValue Tmp, Tmp2, Tmp3;
9910 
9911   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9912   // and finally the i1 pairs.
9913   // TODO: We can easily support i4/i2 legal types if any target ever does.
9914   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9915     // Create the masks - repeating the pattern every byte.
9916     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9917     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9918     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9919 
9920     // BSWAP if the type is wider than a single byte.
9921     Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9922 
9923     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9924     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9925                        Mask, EVL);
9926     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9927                        DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9928     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9929                        Mask, EVL);
9930     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9931                        Mask, EVL);
9932     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9933 
9934     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9935     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9936                        Mask, EVL);
9937     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9938                        DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9939     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9940                        Mask, EVL);
9941     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9942                        Mask, EVL);
9943     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9944 
9945     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9946     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9947                        Mask, EVL);
9948     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9949                        DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9950     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9951                        Mask, EVL);
9952     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9953                        Mask, EVL);
9954     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9955     return Tmp;
9956   }
9957   return SDValue();
9958 }
9959 
9960 std::pair<SDValue, SDValue>
9961 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9962                                     SelectionDAG &DAG) const {
9963   SDLoc SL(LD);
9964   SDValue Chain = LD->getChain();
9965   SDValue BasePTR = LD->getBasePtr();
9966   EVT SrcVT = LD->getMemoryVT();
9967   EVT DstVT = LD->getValueType(0);
9968   ISD::LoadExtType ExtType = LD->getExtensionType();
9969 
9970   if (SrcVT.isScalableVector())
9971     report_fatal_error("Cannot scalarize scalable vector loads");
9972 
9973   unsigned NumElem = SrcVT.getVectorNumElements();
9974 
9975   EVT SrcEltVT = SrcVT.getScalarType();
9976   EVT DstEltVT = DstVT.getScalarType();
9977 
9978   // A vector must always be stored in memory as-is, i.e. without any padding
9979   // between the elements, since various code depend on it, e.g. in the
9980   // handling of a bitcast of a vector type to int, which may be done with a
9981   // vector store followed by an integer load. A vector that does not have
9982   // elements that are byte-sized must therefore be stored as an integer
9983   // built out of the extracted vector elements.
9984   if (!SrcEltVT.isByteSized()) {
9985     unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9986     EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9987 
9988     unsigned NumSrcBits = SrcVT.getSizeInBits();
9989     EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9990 
9991     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9992     SDValue SrcEltBitMask = DAG.getConstant(
9993         APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9994 
9995     // Load the whole vector and avoid masking off the top bits as it makes
9996     // the codegen worse.
9997     SDValue Load =
9998         DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9999                        LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
10000                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
10001 
10002     SmallVector<SDValue, 8> Vals;
10003     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10004       unsigned ShiftIntoIdx =
10005           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10006       SDValue ShiftAmount = DAG.getShiftAmountConstant(
10007           ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10008       SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10009       SDValue Elt =
10010           DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10011       SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10012 
10013       if (ExtType != ISD::NON_EXTLOAD) {
10014         unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10015         Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10016       }
10017 
10018       Vals.push_back(Scalar);
10019     }
10020 
10021     SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10022     return std::make_pair(Value, Load.getValue(1));
10023   }
10024 
10025   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10026   assert(SrcEltVT.isByteSized());
10027 
10028   SmallVector<SDValue, 8> Vals;
10029   SmallVector<SDValue, 8> LoadChains;
10030 
10031   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10032     SDValue ScalarLoad =
10033         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
10034                        LD->getPointerInfo().getWithOffset(Idx * Stride),
10035                        SrcEltVT, LD->getOriginalAlign(),
10036                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
10037 
10038     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10039 
10040     Vals.push_back(ScalarLoad.getValue(0));
10041     LoadChains.push_back(ScalarLoad.getValue(1));
10042   }
10043 
10044   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10045   SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10046 
10047   return std::make_pair(Value, NewChain);
10048 }
10049 
10050 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
10051                                              SelectionDAG &DAG) const {
10052   SDLoc SL(ST);
10053 
10054   SDValue Chain = ST->getChain();
10055   SDValue BasePtr = ST->getBasePtr();
10056   SDValue Value = ST->getValue();
10057   EVT StVT = ST->getMemoryVT();
10058 
10059   if (StVT.isScalableVector())
10060     report_fatal_error("Cannot scalarize scalable vector stores");
10061 
10062   // The type of the data we want to save
10063   EVT RegVT = Value.getValueType();
10064   EVT RegSclVT = RegVT.getScalarType();
10065 
10066   // The type of data as saved in memory.
10067   EVT MemSclVT = StVT.getScalarType();
10068 
10069   unsigned NumElem = StVT.getVectorNumElements();
10070 
10071   // A vector must always be stored in memory as-is, i.e. without any padding
10072   // between the elements, since various code depend on it, e.g. in the
10073   // handling of a bitcast of a vector type to int, which may be done with a
10074   // vector store followed by an integer load. A vector that does not have
10075   // elements that are byte-sized must therefore be stored as an integer
10076   // built out of the extracted vector elements.
10077   if (!MemSclVT.isByteSized()) {
10078     unsigned NumBits = StVT.getSizeInBits();
10079     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10080 
10081     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10082 
10083     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10084       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10085                                 DAG.getVectorIdxConstant(Idx, SL));
10086       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10087       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10088       unsigned ShiftIntoIdx =
10089           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10090       SDValue ShiftAmount =
10091           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10092       SDValue ShiftedElt =
10093           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10094       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10095     }
10096 
10097     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10098                         ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10099                         ST->getAAInfo());
10100   }
10101 
10102   // Store Stride in bytes
10103   unsigned Stride = MemSclVT.getSizeInBits() / 8;
10104   assert(Stride && "Zero stride!");
10105   // Extract each of the elements from the original vector and save them into
10106   // memory individually.
10107   SmallVector<SDValue, 8> Stores;
10108   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10109     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10110                               DAG.getVectorIdxConstant(Idx, SL));
10111 
10112     SDValue Ptr =
10113         DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10114 
10115     // This scalar TruncStore may be illegal, but we legalize it later.
10116     SDValue Store = DAG.getTruncStore(
10117         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10118         MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10119         ST->getAAInfo());
10120 
10121     Stores.push_back(Store);
10122   }
10123 
10124   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10125 }
10126 
10127 std::pair<SDValue, SDValue>
10128 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
10129   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10130          "unaligned indexed loads not implemented!");
10131   SDValue Chain = LD->getChain();
10132   SDValue Ptr = LD->getBasePtr();
10133   EVT VT = LD->getValueType(0);
10134   EVT LoadedVT = LD->getMemoryVT();
10135   SDLoc dl(LD);
10136   auto &MF = DAG.getMachineFunction();
10137 
10138   if (VT.isFloatingPoint() || VT.isVector()) {
10139     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10140     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10141       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10142           LoadedVT.isVector()) {
10143         // Scalarize the load and let the individual components be handled.
10144         return scalarizeVectorLoad(LD, DAG);
10145       }
10146 
10147       // Expand to a (misaligned) integer load of the same size,
10148       // then bitconvert to floating point or vector.
10149       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10150                                     LD->getMemOperand());
10151       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10152       if (LoadedVT != VT)
10153         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10154                              ISD::ANY_EXTEND, dl, VT, Result);
10155 
10156       return std::make_pair(Result, newLoad.getValue(1));
10157     }
10158 
10159     // Copy the value to a (aligned) stack slot using (unaligned) integer
10160     // loads and stores, then do a (aligned) load from the stack slot.
10161     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10162     unsigned LoadedBytes = LoadedVT.getStoreSize();
10163     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10164     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10165 
10166     // Make sure the stack slot is also aligned for the register type.
10167     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10168     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10169     SmallVector<SDValue, 8> Stores;
10170     SDValue StackPtr = StackBase;
10171     unsigned Offset = 0;
10172 
10173     EVT PtrVT = Ptr.getValueType();
10174     EVT StackPtrVT = StackPtr.getValueType();
10175 
10176     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10177     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10178 
10179     // Do all but one copies using the full register width.
10180     for (unsigned i = 1; i < NumRegs; i++) {
10181       // Load one integer register's worth from the original location.
10182       SDValue Load = DAG.getLoad(
10183           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10184           LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10185           LD->getAAInfo());
10186       // Follow the load with a store to the stack slot.  Remember the store.
10187       Stores.push_back(DAG.getStore(
10188           Load.getValue(1), dl, Load, StackPtr,
10189           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10190       // Increment the pointers.
10191       Offset += RegBytes;
10192 
10193       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10194       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10195     }
10196 
10197     // The last copy may be partial.  Do an extending load.
10198     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10199                                   8 * (LoadedBytes - Offset));
10200     SDValue Load =
10201         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10202                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
10203                        LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10204                        LD->getAAInfo());
10205     // Follow the load with a store to the stack slot.  Remember the store.
10206     // On big-endian machines this requires a truncating store to ensure
10207     // that the bits end up in the right place.
10208     Stores.push_back(DAG.getTruncStore(
10209         Load.getValue(1), dl, Load, StackPtr,
10210         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10211 
10212     // The order of the stores doesn't matter - say it with a TokenFactor.
10213     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10214 
10215     // Finally, perform the original load only redirected to the stack slot.
10216     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10217                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10218                           LoadedVT);
10219 
10220     // Callers expect a MERGE_VALUES node.
10221     return std::make_pair(Load, TF);
10222   }
10223 
10224   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10225          "Unaligned load of unsupported type.");
10226 
10227   // Compute the new VT that is half the size of the old one.  This is an
10228   // integer MVT.
10229   unsigned NumBits = LoadedVT.getSizeInBits();
10230   EVT NewLoadedVT;
10231   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10232   NumBits >>= 1;
10233 
10234   Align Alignment = LD->getOriginalAlign();
10235   unsigned IncrementSize = NumBits / 8;
10236   ISD::LoadExtType HiExtType = LD->getExtensionType();
10237 
10238   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10239   if (HiExtType == ISD::NON_EXTLOAD)
10240     HiExtType = ISD::ZEXTLOAD;
10241 
10242   // Load the value in two parts
10243   SDValue Lo, Hi;
10244   if (DAG.getDataLayout().isLittleEndian()) {
10245     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10246                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10247                         LD->getAAInfo());
10248 
10249     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10250     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10251                         LD->getPointerInfo().getWithOffset(IncrementSize),
10252                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10253                         LD->getAAInfo());
10254   } else {
10255     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10256                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10257                         LD->getAAInfo());
10258 
10259     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10260     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10261                         LD->getPointerInfo().getWithOffset(IncrementSize),
10262                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10263                         LD->getAAInfo());
10264   }
10265 
10266   // aggregate the two parts
10267   SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10268   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10269   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10270 
10271   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10272                              Hi.getValue(1));
10273 
10274   return std::make_pair(Result, TF);
10275 }
10276 
10277 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10278                                              SelectionDAG &DAG) const {
10279   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10280          "unaligned indexed stores not implemented!");
10281   SDValue Chain = ST->getChain();
10282   SDValue Ptr = ST->getBasePtr();
10283   SDValue Val = ST->getValue();
10284   EVT VT = Val.getValueType();
10285   Align Alignment = ST->getOriginalAlign();
10286   auto &MF = DAG.getMachineFunction();
10287   EVT StoreMemVT = ST->getMemoryVT();
10288 
10289   SDLoc dl(ST);
10290   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10291     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10292     if (isTypeLegal(intVT)) {
10293       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10294           StoreMemVT.isVector()) {
10295         // Scalarize the store and let the individual components be handled.
10296         SDValue Result = scalarizeVectorStore(ST, DAG);
10297         return Result;
10298       }
10299       // Expand to a bitconvert of the value to the integer type of the
10300       // same size, then a (misaligned) int store.
10301       // FIXME: Does not handle truncating floating point stores!
10302       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10303       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10304                             Alignment, ST->getMemOperand()->getFlags());
10305       return Result;
10306     }
10307     // Do a (aligned) store to a stack slot, then copy from the stack slot
10308     // to the final destination using (unaligned) integer loads and stores.
10309     MVT RegVT = getRegisterType(
10310         *DAG.getContext(),
10311         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10312     EVT PtrVT = Ptr.getValueType();
10313     unsigned StoredBytes = StoreMemVT.getStoreSize();
10314     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10315     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10316 
10317     // Make sure the stack slot is also aligned for the register type.
10318     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10319     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10320 
10321     // Perform the original store, only redirected to the stack slot.
10322     SDValue Store = DAG.getTruncStore(
10323         Chain, dl, Val, StackPtr,
10324         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10325 
10326     EVT StackPtrVT = StackPtr.getValueType();
10327 
10328     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10329     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10330     SmallVector<SDValue, 8> Stores;
10331     unsigned Offset = 0;
10332 
10333     // Do all but one copies using the full register width.
10334     for (unsigned i = 1; i < NumRegs; i++) {
10335       // Load one integer register's worth from the stack slot.
10336       SDValue Load = DAG.getLoad(
10337           RegVT, dl, Store, StackPtr,
10338           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10339       // Store it to the final location.  Remember the store.
10340       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10341                                     ST->getPointerInfo().getWithOffset(Offset),
10342                                     ST->getOriginalAlign(),
10343                                     ST->getMemOperand()->getFlags()));
10344       // Increment the pointers.
10345       Offset += RegBytes;
10346       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10347       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10348     }
10349 
10350     // The last store may be partial.  Do a truncating store.  On big-endian
10351     // machines this requires an extending load from the stack slot to ensure
10352     // that the bits are in the right place.
10353     EVT LoadMemVT =
10354         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10355 
10356     // Load from the stack slot.
10357     SDValue Load = DAG.getExtLoad(
10358         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10359         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10360 
10361     Stores.push_back(
10362         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10363                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10364                           ST->getOriginalAlign(),
10365                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10366     // The order of the stores doesn't matter - say it with a TokenFactor.
10367     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10368     return Result;
10369   }
10370 
10371   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10372          "Unaligned store of unknown type.");
10373   // Get the half-size VT
10374   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10375   unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10376   unsigned IncrementSize = NumBits / 8;
10377 
10378   // Divide the stored value in two parts.
10379   SDValue ShiftAmount =
10380       DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10381   SDValue Lo = Val;
10382   // If Val is a constant, replace the upper bits with 0. The SRL will constant
10383   // fold and not use the upper bits. A smaller constant may be easier to
10384   // materialize.
10385   if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10386     Lo = DAG.getNode(
10387         ISD::AND, dl, VT, Lo,
10388         DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10389                         VT));
10390   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10391 
10392   // Store the two parts
10393   SDValue Store1, Store2;
10394   Store1 = DAG.getTruncStore(Chain, dl,
10395                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10396                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10397                              ST->getMemOperand()->getFlags());
10398 
10399   Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10400   Store2 = DAG.getTruncStore(
10401       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10402       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10403       ST->getMemOperand()->getFlags(), ST->getAAInfo());
10404 
10405   SDValue Result =
10406       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10407   return Result;
10408 }
10409 
10410 SDValue
10411 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10412                                        const SDLoc &DL, EVT DataVT,
10413                                        SelectionDAG &DAG,
10414                                        bool IsCompressedMemory) const {
10415   SDValue Increment;
10416   EVT AddrVT = Addr.getValueType();
10417   EVT MaskVT = Mask.getValueType();
10418   assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10419          "Incompatible types of Data and Mask");
10420   if (IsCompressedMemory) {
10421     if (DataVT.isScalableVector())
10422       report_fatal_error(
10423           "Cannot currently handle compressed memory with scalable vectors");
10424     // Incrementing the pointer according to number of '1's in the mask.
10425     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10426     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10427     if (MaskIntVT.getSizeInBits() < 32) {
10428       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10429       MaskIntVT = MVT::i32;
10430     }
10431 
10432     // Count '1's with POPCNT.
10433     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10434     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10435     // Scale is an element size in bytes.
10436     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10437                                     AddrVT);
10438     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10439   } else if (DataVT.isScalableVector()) {
10440     Increment = DAG.getVScale(DL, AddrVT,
10441                               APInt(AddrVT.getFixedSizeInBits(),
10442                                     DataVT.getStoreSize().getKnownMinValue()));
10443   } else
10444     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10445 
10446   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10447 }
10448 
10449 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10450                                        EVT VecVT, const SDLoc &dl,
10451                                        ElementCount SubEC) {
10452   assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10453          "Cannot index a scalable vector within a fixed-width vector");
10454 
10455   unsigned NElts = VecVT.getVectorMinNumElements();
10456   unsigned NumSubElts = SubEC.getKnownMinValue();
10457   EVT IdxVT = Idx.getValueType();
10458 
10459   if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10460     // If this is a constant index and we know the value plus the number of the
10461     // elements in the subvector minus one is less than the minimum number of
10462     // elements then it's safe to return Idx.
10463     if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10464       if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10465         return Idx;
10466     SDValue VS =
10467         DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10468     unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10469     SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10470                               DAG.getConstant(NumSubElts, dl, IdxVT));
10471     return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10472   }
10473   if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10474     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10475     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10476                        DAG.getConstant(Imm, dl, IdxVT));
10477   }
10478   unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10479   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10480                      DAG.getConstant(MaxIndex, dl, IdxVT));
10481 }
10482 
10483 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
10484                                                 SDValue VecPtr, EVT VecVT,
10485                                                 SDValue Index) const {
10486   return getVectorSubVecPointer(
10487       DAG, VecPtr, VecVT,
10488       EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
10489       Index);
10490 }
10491 
10492 SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
10493                                                SDValue VecPtr, EVT VecVT,
10494                                                EVT SubVecVT,
10495                                                SDValue Index) const {
10496   SDLoc dl(Index);
10497   // Make sure the index type is big enough to compute in.
10498   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10499 
10500   EVT EltVT = VecVT.getVectorElementType();
10501 
10502   // Calculate the element offset and add it to the pointer.
10503   unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10504   assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10505          "Converting bits to bytes lost precision");
10506   assert(SubVecVT.getVectorElementType() == EltVT &&
10507          "Sub-vector must be a vector with matching element type");
10508   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10509                                   SubVecVT.getVectorElementCount());
10510 
10511   EVT IdxVT = Index.getValueType();
10512   if (SubVecVT.isScalableVector())
10513     Index =
10514         DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10515                     DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10516 
10517   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10518                       DAG.getConstant(EltSize, dl, IdxVT));
10519   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10520 }
10521 
10522 //===----------------------------------------------------------------------===//
10523 // Implementation of Emulated TLS Model
10524 //===----------------------------------------------------------------------===//
10525 
10526 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10527                                                 SelectionDAG &DAG) const {
10528   // Access to address of TLS varialbe xyz is lowered to a function call:
10529   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10530   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10531   PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10532   SDLoc dl(GA);
10533 
10534   ArgListTy Args;
10535   ArgListEntry Entry;
10536   const GlobalValue *GV =
10537       cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10538   SmallString<32> NameString("__emutls_v.");
10539   NameString += GV->getName();
10540   StringRef EmuTlsVarName(NameString);
10541   const GlobalVariable *EmuTlsVar =
10542       GV->getParent()->getNamedGlobal(EmuTlsVarName);
10543   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10544   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10545   Entry.Ty = VoidPtrType;
10546   Args.push_back(Entry);
10547 
10548   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10549 
10550   TargetLowering::CallLoweringInfo CLI(DAG);
10551   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10552   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10553   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10554 
10555   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10556   // At last for X86 targets, maybe good for other targets too?
10557   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10558   MFI.setAdjustsStack(true); // Is this only for X86 target?
10559   MFI.setHasCalls(true);
10560 
10561   assert((GA->getOffset() == 0) &&
10562          "Emulated TLS must have zero offset in GlobalAddressSDNode");
10563   return CallResult.first;
10564 }
10565 
10566 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10567                                                 SelectionDAG &DAG) const {
10568   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10569   if (!isCtlzFast())
10570     return SDValue();
10571   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10572   SDLoc dl(Op);
10573   if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10574     EVT VT = Op.getOperand(0).getValueType();
10575     SDValue Zext = Op.getOperand(0);
10576     if (VT.bitsLT(MVT::i32)) {
10577       VT = MVT::i32;
10578       Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10579     }
10580     unsigned Log2b = Log2_32(VT.getSizeInBits());
10581     SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10582     SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10583                               DAG.getConstant(Log2b, dl, MVT::i32));
10584     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10585   }
10586   return SDValue();
10587 }
10588 
10589 SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
10590   SDValue Op0 = Node->getOperand(0);
10591   SDValue Op1 = Node->getOperand(1);
10592   EVT VT = Op0.getValueType();
10593   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10594   unsigned Opcode = Node->getOpcode();
10595   SDLoc DL(Node);
10596 
10597   // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10598   if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10599       getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10600     Op0 = DAG.getFreeze(Op0);
10601     SDValue Zero = DAG.getConstant(0, DL, VT);
10602     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10603                        DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10604   }
10605 
10606   // umin(x,y) -> sub(x,usubsat(x,y))
10607   // TODO: Missing freeze(Op0)?
10608   if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10609       isOperationLegal(ISD::USUBSAT, VT)) {
10610     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10611                        DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10612   }
10613 
10614   // umax(x,y) -> add(x,usubsat(y,x))
10615   // TODO: Missing freeze(Op0)?
10616   if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10617       isOperationLegal(ISD::USUBSAT, VT)) {
10618     return DAG.getNode(ISD::ADD, DL, VT, Op0,
10619                        DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10620   }
10621 
10622   // FIXME: Should really try to split the vector in case it's legal on a
10623   // subvector.
10624   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10625     return DAG.UnrollVectorOp(Node);
10626 
10627   // Attempt to find an existing SETCC node that we can reuse.
10628   // TODO: Do we need a generic doesSETCCNodeExist?
10629   // TODO: Missing freeze(Op0)/freeze(Op1)?
10630   auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10631                          ISD::CondCode PrefCommuteCC,
10632                          ISD::CondCode AltCommuteCC) {
10633     SDVTList BoolVTList = DAG.getVTList(BoolVT);
10634     for (ISD::CondCode CC : {PrefCC, AltCC}) {
10635       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10636                             {Op0, Op1, DAG.getCondCode(CC)})) {
10637         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10638         return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10639       }
10640     }
10641     for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10642       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10643                             {Op0, Op1, DAG.getCondCode(CC)})) {
10644         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10645         return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10646       }
10647     }
10648     SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10649     return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10650   };
10651 
10652   // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10653   //                      -> Y = (A < B) ? B : A
10654   //                      -> Y = (A >= B) ? A : B
10655   //                      -> Y = (A <= B) ? B : A
10656   switch (Opcode) {
10657   case ISD::SMAX:
10658     return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10659   case ISD::SMIN:
10660     return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10661   case ISD::UMAX:
10662     return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10663   case ISD::UMIN:
10664     return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10665   }
10666 
10667   llvm_unreachable("How did we get here?");
10668 }
10669 
10670 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
10671   unsigned Opcode = Node->getOpcode();
10672   SDValue LHS = Node->getOperand(0);
10673   SDValue RHS = Node->getOperand(1);
10674   EVT VT = LHS.getValueType();
10675   SDLoc dl(Node);
10676 
10677   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10678   assert(VT.isInteger() && "Expected operands to be integers");
10679 
10680   // usub.sat(a, b) -> umax(a, b) - b
10681   if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10682     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10683     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10684   }
10685 
10686   // uadd.sat(a, b) -> umin(a, ~b) + b
10687   if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10688     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10689     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10690     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10691   }
10692 
10693   unsigned OverflowOp;
10694   switch (Opcode) {
10695   case ISD::SADDSAT:
10696     OverflowOp = ISD::SADDO;
10697     break;
10698   case ISD::UADDSAT:
10699     OverflowOp = ISD::UADDO;
10700     break;
10701   case ISD::SSUBSAT:
10702     OverflowOp = ISD::SSUBO;
10703     break;
10704   case ISD::USUBSAT:
10705     OverflowOp = ISD::USUBO;
10706     break;
10707   default:
10708     llvm_unreachable("Expected method to receive signed or unsigned saturation "
10709                      "addition or subtraction node.");
10710   }
10711 
10712   // FIXME: Should really try to split the vector in case it's legal on a
10713   // subvector.
10714   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10715     return DAG.UnrollVectorOp(Node);
10716 
10717   unsigned BitWidth = LHS.getScalarValueSizeInBits();
10718   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10719   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10720   SDValue SumDiff = Result.getValue(0);
10721   SDValue Overflow = Result.getValue(1);
10722   SDValue Zero = DAG.getConstant(0, dl, VT);
10723   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10724 
10725   if (Opcode == ISD::UADDSAT) {
10726     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10727       // (LHS + RHS) | OverflowMask
10728       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10729       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10730     }
10731     // Overflow ? 0xffff.... : (LHS + RHS)
10732     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10733   }
10734 
10735   if (Opcode == ISD::USUBSAT) {
10736     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10737       // (LHS - RHS) & ~OverflowMask
10738       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10739       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10740       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10741     }
10742     // Overflow ? 0 : (LHS - RHS)
10743     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10744   }
10745 
10746   if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10747     APInt MinVal = APInt::getSignedMinValue(BitWidth);
10748     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
10749 
10750     KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10751     KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10752 
10753     // If either of the operand signs are known, then they are guaranteed to
10754     // only saturate in one direction. If non-negative they will saturate
10755     // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10756     //
10757     // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10758     // sign of 'y' has to be flipped.
10759 
10760     bool LHSIsNonNegative = KnownLHS.isNonNegative();
10761     bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10762                                                    : KnownRHS.isNegative();
10763     if (LHSIsNonNegative || RHSIsNonNegative) {
10764       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10765       return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10766     }
10767 
10768     bool LHSIsNegative = KnownLHS.isNegative();
10769     bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10770                                                 : KnownRHS.isNonNegative();
10771     if (LHSIsNegative || RHSIsNegative) {
10772       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10773       return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10774     }
10775   }
10776 
10777   // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10778   APInt MinVal = APInt::getSignedMinValue(BitWidth);
10779   SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10780   SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10781                               DAG.getConstant(BitWidth - 1, dl, VT));
10782   Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10783   return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10784 }
10785 
10786 SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
10787   unsigned Opcode = Node->getOpcode();
10788   SDValue LHS = Node->getOperand(0);
10789   SDValue RHS = Node->getOperand(1);
10790   EVT VT = LHS.getValueType();
10791   EVT ResVT = Node->getValueType(0);
10792   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10793   SDLoc dl(Node);
10794 
10795   auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10796   auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10797   SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10798   SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10799 
10800   // We can't perform arithmetic on i1 values. Extending them would
10801   // probably result in worse codegen, so let's just use two selects instead.
10802   // Some targets are also just better off using selects rather than subtraction
10803   // because one of the conditions can be merged with one of the selects.
10804   // And finally, if we don't know the contents of high bits of a boolean value
10805   // we can't perform any arithmetic either.
10806   if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10807       getBooleanContents(BoolVT) == UndefinedBooleanContent) {
10808     SDValue SelectZeroOrOne =
10809         DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10810                       DAG.getConstant(0, dl, ResVT));
10811     return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10812                          SelectZeroOrOne);
10813   }
10814 
10815   if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent)
10816     std::swap(IsGT, IsLT);
10817   return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10818                             ResVT);
10819 }
10820 
10821 SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10822   unsigned Opcode = Node->getOpcode();
10823   bool IsSigned = Opcode == ISD::SSHLSAT;
10824   SDValue LHS = Node->getOperand(0);
10825   SDValue RHS = Node->getOperand(1);
10826   EVT VT = LHS.getValueType();
10827   SDLoc dl(Node);
10828 
10829   assert((Node->getOpcode() == ISD::SSHLSAT ||
10830           Node->getOpcode() == ISD::USHLSAT) &&
10831           "Expected a SHLSAT opcode");
10832   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10833   assert(VT.isInteger() && "Expected operands to be integers");
10834 
10835   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10836     return DAG.UnrollVectorOp(Node);
10837 
10838   // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10839 
10840   unsigned BW = VT.getScalarSizeInBits();
10841   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10842   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10843   SDValue Orig =
10844       DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10845 
10846   SDValue SatVal;
10847   if (IsSigned) {
10848     SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10849     SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10850     SDValue Cond =
10851         DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10852     SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10853   } else {
10854     SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10855   }
10856   SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10857   return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10858 }
10859 
10860 void TargetLowering::forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl,
10861                                          bool Signed, SDValue &Lo, SDValue &Hi,
10862                                          SDValue LHS, SDValue RHS,
10863                                          SDValue HiLHS, SDValue HiRHS) const {
10864   EVT VT = LHS.getValueType();
10865   assert(RHS.getValueType() == VT && "Mismatching operand types");
10866 
10867   assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
10868   assert((!Signed || !HiLHS) &&
10869          "Signed flag should only be set when HiLHS and RiRHS are null");
10870 
10871   // We'll expand the multiplication by brute force because we have no other
10872   // options. This is a trivially-generalized version of the code from
10873   // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10874   // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
10875   // sign bits while calculating the Hi half.
10876   unsigned Bits = VT.getSizeInBits();
10877   unsigned HalfBits = Bits / 2;
10878   SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10879   SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
10880   SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
10881 
10882   SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
10883   SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10884 
10885   SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10886   // This is always an unsigned shift.
10887   SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10888 
10889   unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
10890   SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
10891   SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
10892 
10893   SDValue U =
10894       DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
10895   SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10896   SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
10897 
10898   SDValue V =
10899       DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
10900   SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
10901 
10902   Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10903                    DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10904 
10905   Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
10906                    DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10907 
10908   // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
10909   // the products to Hi.
10910   if (HiLHS) {
10911     Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
10912                      DAG.getNode(ISD::ADD, dl, VT,
10913                                  DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
10914                                  DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
10915   }
10916 }
10917 
10918 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10919                                         bool Signed, const SDValue LHS,
10920                                         const SDValue RHS, SDValue &Lo,
10921                                         SDValue &Hi) const {
10922   EVT VT = LHS.getValueType();
10923   assert(RHS.getValueType() == VT && "Mismatching operand types");
10924   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10925   // We can fall back to a libcall with an illegal type for the MUL if we
10926   // have a libcall big enough.
10927   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10928   if (WideVT == MVT::i16)
10929     LC = RTLIB::MUL_I16;
10930   else if (WideVT == MVT::i32)
10931     LC = RTLIB::MUL_I32;
10932   else if (WideVT == MVT::i64)
10933     LC = RTLIB::MUL_I64;
10934   else if (WideVT == MVT::i128)
10935     LC = RTLIB::MUL_I128;
10936 
10937   if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10938     forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
10939     return;
10940   }
10941 
10942   SDValue HiLHS, HiRHS;
10943   if (Signed) {
10944     // The high part is obtained by SRA'ing all but one of the bits of low
10945     // part.
10946     unsigned LoSize = VT.getFixedSizeInBits();
10947     SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
10948     HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
10949     HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
10950   } else {
10951     HiLHS = DAG.getConstant(0, dl, VT);
10952     HiRHS = DAG.getConstant(0, dl, VT);
10953   }
10954 
10955   // Attempt a libcall.
10956   SDValue Ret;
10957   TargetLowering::MakeLibCallOptions CallOptions;
10958   CallOptions.setIsSigned(Signed);
10959   CallOptions.setIsPostTypeLegalization(true);
10960   if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10961     // Halves of WideVT are packed into registers in different order
10962     // depending on platform endianness. This is usually handled by
10963     // the C calling convention, but we can't defer to it in
10964     // the legalizer.
10965     SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
10966     Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10967   } else {
10968     SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
10969     Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10970   }
10971   assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10972          "Ret value is a collection of constituent nodes holding result.");
10973   if (DAG.getDataLayout().isLittleEndian()) {
10974     // Same as above.
10975     Lo = Ret.getOperand(0);
10976     Hi = Ret.getOperand(1);
10977   } else {
10978     Lo = Ret.getOperand(1);
10979     Hi = Ret.getOperand(0);
10980   }
10981 }
10982 
10983 SDValue
10984 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
10985   assert((Node->getOpcode() == ISD::SMULFIX ||
10986           Node->getOpcode() == ISD::UMULFIX ||
10987           Node->getOpcode() == ISD::SMULFIXSAT ||
10988           Node->getOpcode() == ISD::UMULFIXSAT) &&
10989          "Expected a fixed point multiplication opcode");
10990 
10991   SDLoc dl(Node);
10992   SDValue LHS = Node->getOperand(0);
10993   SDValue RHS = Node->getOperand(1);
10994   EVT VT = LHS.getValueType();
10995   unsigned Scale = Node->getConstantOperandVal(2);
10996   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10997                      Node->getOpcode() == ISD::UMULFIXSAT);
10998   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10999                  Node->getOpcode() == ISD::SMULFIXSAT);
11000   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11001   unsigned VTSize = VT.getScalarSizeInBits();
11002 
11003   if (!Scale) {
11004     // [us]mul.fix(a, b, 0) -> mul(a, b)
11005     if (!Saturating) {
11006       if (isOperationLegalOrCustom(ISD::MUL, VT))
11007         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11008     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11009       SDValue Result =
11010           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11011       SDValue Product = Result.getValue(0);
11012       SDValue Overflow = Result.getValue(1);
11013       SDValue Zero = DAG.getConstant(0, dl, VT);
11014 
11015       APInt MinVal = APInt::getSignedMinValue(VTSize);
11016       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11017       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11018       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11019       // Xor the inputs, if resulting sign bit is 0 the product will be
11020       // positive, else negative.
11021       SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11022       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11023       Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11024       return DAG.getSelect(dl, VT, Overflow, Result, Product);
11025     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11026       SDValue Result =
11027           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11028       SDValue Product = Result.getValue(0);
11029       SDValue Overflow = Result.getValue(1);
11030 
11031       APInt MaxVal = APInt::getMaxValue(VTSize);
11032       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11033       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11034     }
11035   }
11036 
11037   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11038          "Expected scale to be less than the number of bits if signed or at "
11039          "most the number of bits if unsigned.");
11040   assert(LHS.getValueType() == RHS.getValueType() &&
11041          "Expected both operands to be the same type");
11042 
11043   // Get the upper and lower bits of the result.
11044   SDValue Lo, Hi;
11045   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11046   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11047   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11048   if (VT.isVector())
11049     WideVT =
11050         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
11051   if (isOperationLegalOrCustom(LoHiOp, VT)) {
11052     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11053     Lo = Result.getValue(0);
11054     Hi = Result.getValue(1);
11055   } else if (isOperationLegalOrCustom(HiOp, VT)) {
11056     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11057     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11058   } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11059     // Try for a multiplication using a wider type.
11060     unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11061     SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11062     SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11063     SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11064     Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11065     SDValue Shifted =
11066         DAG.getNode(ISD::SRA, dl, WideVT, Res,
11067                     DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11068     Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11069   } else if (VT.isVector()) {
11070     return SDValue();
11071   } else {
11072     forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11073   }
11074 
11075   if (Scale == VTSize)
11076     // Result is just the top half since we'd be shifting by the width of the
11077     // operand. Overflow impossible so this works for both UMULFIX and
11078     // UMULFIXSAT.
11079     return Hi;
11080 
11081   // The result will need to be shifted right by the scale since both operands
11082   // are scaled. The result is given to us in 2 halves, so we only want part of
11083   // both in the result.
11084   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11085                                DAG.getShiftAmountConstant(Scale, VT, dl));
11086   if (!Saturating)
11087     return Result;
11088 
11089   if (!Signed) {
11090     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11091     // widened multiplication) aren't all zeroes.
11092 
11093     // Saturate to max if ((Hi >> Scale) != 0),
11094     // which is the same as if (Hi > ((1 << Scale) - 1))
11095     APInt MaxVal = APInt::getMaxValue(VTSize);
11096     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11097                                       dl, VT);
11098     Result = DAG.getSelectCC(dl, Hi, LowMask,
11099                              DAG.getConstant(MaxVal, dl, VT), Result,
11100                              ISD::SETUGT);
11101 
11102     return Result;
11103   }
11104 
11105   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11106   // widened multiplication) aren't all ones or all zeroes.
11107 
11108   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11109   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11110 
11111   if (Scale == 0) {
11112     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11113                                DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11114     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11115     // Saturated to SatMin if wide product is negative, and SatMax if wide
11116     // product is positive ...
11117     SDValue Zero = DAG.getConstant(0, dl, VT);
11118     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11119                                                ISD::SETLT);
11120     // ... but only if we overflowed.
11121     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11122   }
11123 
11124   //  We handled Scale==0 above so all the bits to examine is in Hi.
11125 
11126   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11127   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11128   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11129                                     dl, VT);
11130   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11131   // Saturate to min if (Hi >> (Scale - 1)) < -1),
11132   // which is the same as if (HI < (-1 << (Scale - 1))
11133   SDValue HighMask =
11134       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11135                       dl, VT);
11136   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11137   return Result;
11138 }
11139 
11140 SDValue
11141 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11142                                     SDValue LHS, SDValue RHS,
11143                                     unsigned Scale, SelectionDAG &DAG) const {
11144   assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11145           Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11146          "Expected a fixed point division opcode");
11147 
11148   EVT VT = LHS.getValueType();
11149   bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11150   bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11151   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11152 
11153   // If there is enough room in the type to upscale the LHS or downscale the
11154   // RHS before the division, we can perform it in this type without having to
11155   // resize. For signed operations, the LHS headroom is the number of
11156   // redundant sign bits, and for unsigned ones it is the number of zeroes.
11157   // The headroom for the RHS is the number of trailing zeroes.
11158   unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11159                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
11160   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11161 
11162   // For signed saturating operations, we need to be able to detect true integer
11163   // division overflow; that is, when you have MIN / -EPS. However, this
11164   // is undefined behavior and if we emit divisions that could take such
11165   // values it may cause undesired behavior (arithmetic exceptions on x86, for
11166   // example).
11167   // Avoid this by requiring an extra bit so that we never get this case.
11168   // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11169   // signed saturating division, we need to emit a whopping 32-bit division.
11170   if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11171     return SDValue();
11172 
11173   unsigned LHSShift = std::min(LHSLead, Scale);
11174   unsigned RHSShift = Scale - LHSShift;
11175 
11176   // At this point, we know that if we shift the LHS up by LHSShift and the
11177   // RHS down by RHSShift, we can emit a regular division with a final scaling
11178   // factor of Scale.
11179 
11180   if (LHSShift)
11181     LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11182                       DAG.getShiftAmountConstant(LHSShift, VT, dl));
11183   if (RHSShift)
11184     RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11185                       DAG.getShiftAmountConstant(RHSShift, VT, dl));
11186 
11187   SDValue Quot;
11188   if (Signed) {
11189     // For signed operations, if the resulting quotient is negative and the
11190     // remainder is nonzero, subtract 1 from the quotient to round towards
11191     // negative infinity.
11192     SDValue Rem;
11193     // FIXME: Ideally we would always produce an SDIVREM here, but if the
11194     // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11195     // we couldn't just form a libcall, but the type legalizer doesn't do it.
11196     if (isTypeLegal(VT) &&
11197         isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
11198       Quot = DAG.getNode(ISD::SDIVREM, dl,
11199                          DAG.getVTList(VT, VT),
11200                          LHS, RHS);
11201       Rem = Quot.getValue(1);
11202       Quot = Quot.getValue(0);
11203     } else {
11204       Quot = DAG.getNode(ISD::SDIV, dl, VT,
11205                          LHS, RHS);
11206       Rem = DAG.getNode(ISD::SREM, dl, VT,
11207                         LHS, RHS);
11208     }
11209     SDValue Zero = DAG.getConstant(0, dl, VT);
11210     SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11211     SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11212     SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11213     SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11214     SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11215                                DAG.getConstant(1, dl, VT));
11216     Quot = DAG.getSelect(dl, VT,
11217                          DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11218                          Sub1, Quot);
11219   } else
11220     Quot = DAG.getNode(ISD::UDIV, dl, VT,
11221                        LHS, RHS);
11222 
11223   return Quot;
11224 }
11225 
11226 void TargetLowering::expandUADDSUBO(
11227     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11228   SDLoc dl(Node);
11229   SDValue LHS = Node->getOperand(0);
11230   SDValue RHS = Node->getOperand(1);
11231   bool IsAdd = Node->getOpcode() == ISD::UADDO;
11232 
11233   // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11234   unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11235   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11236     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11237     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11238                                     { LHS, RHS, CarryIn });
11239     Result = SDValue(NodeCarry.getNode(), 0);
11240     Overflow = SDValue(NodeCarry.getNode(), 1);
11241     return;
11242   }
11243 
11244   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11245                             LHS.getValueType(), LHS, RHS);
11246 
11247   EVT ResultType = Node->getValueType(1);
11248   EVT SetCCType = getSetCCResultType(
11249       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11250   SDValue SetCC;
11251   if (IsAdd && isOneConstant(RHS)) {
11252     // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11253     // the live range of X. We assume comparing with 0 is cheap.
11254     // The general case (X + C) < C is not necessarily beneficial. Although we
11255     // reduce the live range of X, we may introduce the materialization of
11256     // constant C.
11257     SetCC =
11258         DAG.getSetCC(dl, SetCCType, Result,
11259                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11260   } else if (IsAdd && isAllOnesConstant(RHS)) {
11261     // Special case: uaddo X, -1 overflows if X != 0.
11262     SetCC =
11263         DAG.getSetCC(dl, SetCCType, LHS,
11264                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11265   } else {
11266     ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11267     SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11268   }
11269   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11270 }
11271 
11272 void TargetLowering::expandSADDSUBO(
11273     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11274   SDLoc dl(Node);
11275   SDValue LHS = Node->getOperand(0);
11276   SDValue RHS = Node->getOperand(1);
11277   bool IsAdd = Node->getOpcode() == ISD::SADDO;
11278 
11279   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11280                             LHS.getValueType(), LHS, RHS);
11281 
11282   EVT ResultType = Node->getValueType(1);
11283   EVT OType = getSetCCResultType(
11284       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11285 
11286   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11287   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11288   if (isOperationLegal(OpcSat, LHS.getValueType())) {
11289     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11290     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11291     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11292     return;
11293   }
11294 
11295   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11296 
11297   // For an addition, the result should be less than one of the operands (LHS)
11298   // if and only if the other operand (RHS) is negative, otherwise there will
11299   // be overflow.
11300   // For a subtraction, the result should be less than one of the operands
11301   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11302   // otherwise there will be overflow.
11303   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11304   SDValue ConditionRHS =
11305       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11306 
11307   Overflow = DAG.getBoolExtOrTrunc(
11308       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11309       ResultType, ResultType);
11310 }
11311 
11312 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11313                                 SDValue &Overflow, SelectionDAG &DAG) const {
11314   SDLoc dl(Node);
11315   EVT VT = Node->getValueType(0);
11316   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11317   SDValue LHS = Node->getOperand(0);
11318   SDValue RHS = Node->getOperand(1);
11319   bool isSigned = Node->getOpcode() == ISD::SMULO;
11320 
11321   // For power-of-two multiplications we can use a simpler shift expansion.
11322   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11323     const APInt &C = RHSC->getAPIntValue();
11324     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11325     if (C.isPowerOf2()) {
11326       // smulo(x, signed_min) is same as umulo(x, signed_min).
11327       bool UseArithShift = isSigned && !C.isMinSignedValue();
11328       SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11329       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11330       Overflow = DAG.getSetCC(dl, SetCCVT,
11331           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11332                       dl, VT, Result, ShiftAmt),
11333           LHS, ISD::SETNE);
11334       return true;
11335     }
11336   }
11337 
11338   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11339   if (VT.isVector())
11340     WideVT =
11341         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
11342 
11343   SDValue BottomHalf;
11344   SDValue TopHalf;
11345   static const unsigned Ops[2][3] =
11346       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
11347         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
11348   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11349     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11350     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11351   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11352     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11353                              RHS);
11354     TopHalf = BottomHalf.getValue(1);
11355   } else if (isTypeLegal(WideVT)) {
11356     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11357     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11358     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11359     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11360     SDValue ShiftAmt =
11361         DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11362     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11363                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11364   } else {
11365     if (VT.isVector())
11366       return false;
11367 
11368     forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11369   }
11370 
11371   Result = BottomHalf;
11372   if (isSigned) {
11373     SDValue ShiftAmt = DAG.getShiftAmountConstant(
11374         VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11375     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11376     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11377   } else {
11378     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11379                             DAG.getConstant(0, dl, VT), ISD::SETNE);
11380   }
11381 
11382   // Truncate the result if SetCC returns a larger type than needed.
11383   EVT RType = Node->getValueType(1);
11384   if (RType.bitsLT(Overflow.getValueType()))
11385     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11386 
11387   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11388          "Unexpected result type for S/UMULO legalization");
11389   return true;
11390 }
11391 
11392 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
11393   SDLoc dl(Node);
11394   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11395   SDValue Op = Node->getOperand(0);
11396   EVT VT = Op.getValueType();
11397 
11398   if (VT.isScalableVector())
11399     report_fatal_error(
11400         "Expanding reductions for scalable vectors is undefined.");
11401 
11402   // Try to use a shuffle reduction for power of two vectors.
11403   if (VT.isPow2VectorType()) {
11404     while (VT.getVectorNumElements() > 1) {
11405       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11406       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11407         break;
11408 
11409       SDValue Lo, Hi;
11410       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11411       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11412       VT = HalfVT;
11413     }
11414   }
11415 
11416   EVT EltVT = VT.getVectorElementType();
11417   unsigned NumElts = VT.getVectorNumElements();
11418 
11419   SmallVector<SDValue, 8> Ops;
11420   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11421 
11422   SDValue Res = Ops[0];
11423   for (unsigned i = 1; i < NumElts; i++)
11424     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11425 
11426   // Result type may be wider than element type.
11427   if (EltVT != Node->getValueType(0))
11428     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11429   return Res;
11430 }
11431 
11432 SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
11433   SDLoc dl(Node);
11434   SDValue AccOp = Node->getOperand(0);
11435   SDValue VecOp = Node->getOperand(1);
11436   SDNodeFlags Flags = Node->getFlags();
11437 
11438   EVT VT = VecOp.getValueType();
11439   EVT EltVT = VT.getVectorElementType();
11440 
11441   if (VT.isScalableVector())
11442     report_fatal_error(
11443         "Expanding reductions for scalable vectors is undefined.");
11444 
11445   unsigned NumElts = VT.getVectorNumElements();
11446 
11447   SmallVector<SDValue, 8> Ops;
11448   DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11449 
11450   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11451 
11452   SDValue Res = AccOp;
11453   for (unsigned i = 0; i < NumElts; i++)
11454     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11455 
11456   return Res;
11457 }
11458 
11459 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11460                                SelectionDAG &DAG) const {
11461   EVT VT = Node->getValueType(0);
11462   SDLoc dl(Node);
11463   bool isSigned = Node->getOpcode() == ISD::SREM;
11464   unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11465   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11466   SDValue Dividend = Node->getOperand(0);
11467   SDValue Divisor = Node->getOperand(1);
11468   if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11469     SDVTList VTs = DAG.getVTList(VT, VT);
11470     Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11471     return true;
11472   }
11473   if (isOperationLegalOrCustom(DivOpc, VT)) {
11474     // X % Y -> X-X/Y*Y
11475     SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11476     SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11477     Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11478     return true;
11479   }
11480   return false;
11481 }
11482 
11483 SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11484                                             SelectionDAG &DAG) const {
11485   bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11486   SDLoc dl(SDValue(Node, 0));
11487   SDValue Src = Node->getOperand(0);
11488 
11489   // DstVT is the result type, while SatVT is the size to which we saturate
11490   EVT SrcVT = Src.getValueType();
11491   EVT DstVT = Node->getValueType(0);
11492 
11493   EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11494   unsigned SatWidth = SatVT.getScalarSizeInBits();
11495   unsigned DstWidth = DstVT.getScalarSizeInBits();
11496   assert(SatWidth <= DstWidth &&
11497          "Expected saturation width smaller than result width");
11498 
11499   // Determine minimum and maximum integer values and their corresponding
11500   // floating-point values.
11501   APInt MinInt, MaxInt;
11502   if (IsSigned) {
11503     MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11504     MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11505   } else {
11506     MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11507     MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11508   }
11509 
11510   // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11511   // libcall emission cannot handle this. Large result types will fail.
11512   if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11513     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11514     SrcVT = Src.getValueType();
11515   }
11516 
11517   const fltSemantics &Sem = SrcVT.getFltSemantics();
11518   APFloat MinFloat(Sem);
11519   APFloat MaxFloat(Sem);
11520 
11521   APFloat::opStatus MinStatus =
11522       MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11523   APFloat::opStatus MaxStatus =
11524       MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11525   bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11526                              !(MaxStatus & APFloat::opStatus::opInexact);
11527 
11528   SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11529   SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11530 
11531   // If the integer bounds are exactly representable as floats and min/max are
11532   // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11533   // of comparisons and selects.
11534   bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11535                      isOperationLegal(ISD::FMAXNUM, SrcVT);
11536   if (AreExactFloatBounds && MinMaxLegal) {
11537     SDValue Clamped = Src;
11538 
11539     // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11540     Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11541     // Clamp by MaxFloat from above. NaN cannot occur.
11542     Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11543     // Convert clamped value to integer.
11544     SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11545                                   dl, DstVT, Clamped);
11546 
11547     // In the unsigned case we're done, because we mapped NaN to MinFloat,
11548     // which will cast to zero.
11549     if (!IsSigned)
11550       return FpToInt;
11551 
11552     // Otherwise, select 0 if Src is NaN.
11553     SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11554     EVT SetCCVT =
11555         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11556     SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11557     return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11558   }
11559 
11560   SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11561   SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11562 
11563   // Result of direct conversion. The assumption here is that the operation is
11564   // non-trapping and it's fine to apply it to an out-of-range value if we
11565   // select it away later.
11566   SDValue FpToInt =
11567       DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11568 
11569   SDValue Select = FpToInt;
11570 
11571   EVT SetCCVT =
11572       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11573 
11574   // If Src ULT MinFloat, select MinInt. In particular, this also selects
11575   // MinInt if Src is NaN.
11576   SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11577   Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11578   // If Src OGT MaxFloat, select MaxInt.
11579   SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11580   Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11581 
11582   // In the unsigned case we are done, because we mapped NaN to MinInt, which
11583   // is already zero.
11584   if (!IsSigned)
11585     return Select;
11586 
11587   // Otherwise, select 0 if Src is NaN.
11588   SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11589   SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11590   return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11591 }
11592 
11593 SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
11594                                                 const SDLoc &dl,
11595                                                 SelectionDAG &DAG) const {
11596   EVT OperandVT = Op.getValueType();
11597   if (OperandVT.getScalarType() == ResultVT.getScalarType())
11598     return Op;
11599   EVT ResultIntVT = ResultVT.changeTypeToInteger();
11600   // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11601   // can induce double-rounding which may alter the results. We can
11602   // correct for this using a trick explained in: Boldo, Sylvie, and
11603   // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11604   // World Congress. 2005.
11605   unsigned BitSize = OperandVT.getScalarSizeInBits();
11606   EVT WideIntVT = OperandVT.changeTypeToInteger();
11607   SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11608   SDValue SignBit =
11609       DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11610                   DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11611   SDValue AbsWide;
11612   if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11613     AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11614   } else {
11615     SDValue ClearedSign = DAG.getNode(
11616         ISD::AND, dl, WideIntVT, OpAsInt,
11617         DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11618     AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11619   }
11620   SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11621   SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11622 
11623   // We can keep the narrow value as-is if narrowing was exact (no
11624   // rounding error), the wide value was NaN (the narrow value is also
11625   // NaN and should be preserved) or if we rounded to the odd value.
11626   SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11627   SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11628   SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11629   SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11630   EVT ResultIntVTCCVT = getSetCCResultType(
11631       DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11632   SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11633   // The result is already odd so we don't need to do anything.
11634   SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11635 
11636   EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11637                                        AbsWide.getValueType());
11638   // We keep results which are exact, odd or NaN.
11639   SDValue KeepNarrow =
11640       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11641   KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11642   // We morally performed a round-down if AbsNarrow is smaller than
11643   // AbsWide.
11644   SDValue NarrowIsRd =
11645       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11646   // If the narrow value is odd or exact, pick it.
11647   // Otherwise, narrow is even and corresponds to either the rounded-up
11648   // or rounded-down value. If narrow is the rounded-down value, we want
11649   // the rounded-up value as it will be odd.
11650   SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11651   SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11652   Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11653   int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11654   SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11655   SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11656   SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11657   Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11658   return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11659 }
11660 
11661 SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
11662   assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11663   SDValue Op = Node->getOperand(0);
11664   EVT VT = Node->getValueType(0);
11665   SDLoc dl(Node);
11666   if (VT.getScalarType() == MVT::bf16) {
11667     if (Node->getConstantOperandVal(1) == 1) {
11668       return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11669     }
11670     EVT OperandVT = Op.getValueType();
11671     SDValue IsNaN = DAG.getSetCC(
11672         dl,
11673         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11674         Op, Op, ISD::SETUO);
11675 
11676     // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11677     // can induce double-rounding which may alter the results. We can
11678     // correct for this using a trick explained in: Boldo, Sylvie, and
11679     // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11680     // World Congress. 2005.
11681     EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11682     EVT I32 = F32.changeTypeToInteger();
11683     Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11684     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11685 
11686     // Conversions should set NaN's quiet bit. This also prevents NaNs from
11687     // turning into infinities.
11688     SDValue NaN =
11689         DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11690 
11691     // Factor in the contribution of the low 16 bits.
11692     SDValue One = DAG.getConstant(1, dl, I32);
11693     SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11694                               DAG.getShiftAmountConstant(16, I32, dl));
11695     Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11696     SDValue RoundingBias =
11697         DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11698     SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11699 
11700     // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11701     // 0x80000000.
11702     Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11703 
11704     // Now that we have rounded, shift the bits into position.
11705     Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11706                      DAG.getShiftAmountConstant(16, I32, dl));
11707     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11708     EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11709     Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11710     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11711   }
11712   return SDValue();
11713 }
11714 
11715 SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11716                                            SelectionDAG &DAG) const {
11717   assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11718   assert(Node->getValueType(0).isScalableVector() &&
11719          "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11720 
11721   EVT VT = Node->getValueType(0);
11722   SDValue V1 = Node->getOperand(0);
11723   SDValue V2 = Node->getOperand(1);
11724   int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11725   SDLoc DL(Node);
11726 
11727   // Expand through memory thusly:
11728   //  Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11729   //  Store V1, Ptr
11730   //  Store V2, Ptr + sizeof(V1)
11731   //  If (Imm < 0)
11732   //    TrailingElts = -Imm
11733   //    Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11734   //  else
11735   //    Ptr = Ptr + (Imm * sizeof(VT.Elt))
11736   //  Res = Load Ptr
11737 
11738   Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11739 
11740   EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
11741                                VT.getVectorElementCount() * 2);
11742   SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11743   EVT PtrVT = StackPtr.getValueType();
11744   auto &MF = DAG.getMachineFunction();
11745   auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11746   auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11747 
11748   // Store the lo part of CONCAT_VECTORS(V1, V2)
11749   SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11750   // Store the hi part of CONCAT_VECTORS(V1, V2)
11751   SDValue OffsetToV2 = DAG.getVScale(
11752       DL, PtrVT,
11753       APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11754   SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11755   SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11756 
11757   if (Imm >= 0) {
11758     // Load back the required element. getVectorElementPointer takes care of
11759     // clamping the index if it's out-of-bounds.
11760     StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11761     // Load the spliced result
11762     return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11763                        MachinePointerInfo::getUnknownStack(MF));
11764   }
11765 
11766   uint64_t TrailingElts = -Imm;
11767 
11768   // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11769   TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11770   SDValue TrailingBytes =
11771       DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11772 
11773   if (TrailingElts > VT.getVectorMinNumElements()) {
11774     SDValue VLBytes =
11775         DAG.getVScale(DL, PtrVT,
11776                       APInt(PtrVT.getFixedSizeInBits(),
11777                             VT.getStoreSize().getKnownMinValue()));
11778     TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11779   }
11780 
11781   // Calculate the start address of the spliced result.
11782   StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11783 
11784   // Load the spliced result
11785   return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11786                      MachinePointerInfo::getUnknownStack(MF));
11787 }
11788 
11789 SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
11790                                               SelectionDAG &DAG) const {
11791   SDLoc DL(Node);
11792   SDValue Vec = Node->getOperand(0);
11793   SDValue Mask = Node->getOperand(1);
11794   SDValue Passthru = Node->getOperand(2);
11795 
11796   EVT VecVT = Vec.getValueType();
11797   EVT ScalarVT = VecVT.getScalarType();
11798   EVT MaskVT = Mask.getValueType();
11799   EVT MaskScalarVT = MaskVT.getScalarType();
11800 
11801   // Needs to be handled by targets that have scalable vector types.
11802   if (VecVT.isScalableVector())
11803     report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11804 
11805   SDValue StackPtr = DAG.CreateStackTemporary(
11806       VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11807   int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11808   MachinePointerInfo PtrInfo =
11809       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
11810 
11811   MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11812   SDValue Chain = DAG.getEntryNode();
11813   SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11814 
11815   bool HasPassthru = !Passthru.isUndef();
11816 
11817   // If we have a passthru vector, store it on the stack, overwrite the matching
11818   // positions and then re-write the last element that was potentially
11819   // overwritten even though mask[i] = false.
11820   if (HasPassthru)
11821     Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11822 
11823   SDValue LastWriteVal;
11824   APInt PassthruSplatVal;
11825   bool IsSplatPassthru =
11826       ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11827 
11828   if (IsSplatPassthru) {
11829     // As we do not know which position we wrote to last, we cannot simply
11830     // access that index from the passthru vector. So we first check if passthru
11831     // is a splat vector, to use any element ...
11832     LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11833   } else if (HasPassthru) {
11834     // ... if it is not a splat vector, we need to get the passthru value at
11835     // position = popcount(mask) and re-load it from the stack before it is
11836     // overwritten in the loop below.
11837     EVT PopcountVT = ScalarVT.changeTypeToInteger();
11838     SDValue Popcount = DAG.getNode(
11839         ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11840     Popcount =
11841         DAG.getNode(ISD::ZERO_EXTEND, DL,
11842                     MaskVT.changeVectorElementType(PopcountVT), Popcount);
11843     Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
11844     SDValue LastElmtPtr =
11845         getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11846     LastWriteVal = DAG.getLoad(
11847         ScalarVT, DL, Chain, LastElmtPtr,
11848         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11849     Chain = LastWriteVal.getValue(1);
11850   }
11851 
11852   unsigned NumElms = VecVT.getVectorNumElements();
11853   for (unsigned I = 0; I < NumElms; I++) {
11854     SDValue Idx = DAG.getVectorIdxConstant(I, DL);
11855 
11856     SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11857     SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11858     Chain = DAG.getStore(
11859         Chain, DL, ValI, OutPtr,
11860         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11861 
11862     // Get the mask value and add it to the current output position. This
11863     // either increments by 1 if MaskI is true or adds 0 otherwise.
11864     // Freeze in case we have poison/undef mask entries.
11865     SDValue MaskI = DAG.getFreeze(
11866         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11867     MaskI = DAG.getFreeze(MaskI);
11868     MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11869     MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11870     OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11871 
11872     if (HasPassthru && I == NumElms - 1) {
11873       SDValue EndOfVector =
11874           DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11875       SDValue AllLanesSelected =
11876           DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11877       OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11878       OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11879 
11880       // Re-write the last ValI if all lanes were selected. Otherwise,
11881       // overwrite the last write it with the passthru value.
11882       LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
11883                                    LastWriteVal, SDNodeFlags::Unpredictable);
11884       Chain = DAG.getStore(
11885           Chain, DL, LastWriteVal, OutPtr,
11886           MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11887     }
11888   }
11889 
11890   return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11891 }
11892 
11893 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
11894                                            SDValue &LHS, SDValue &RHS,
11895                                            SDValue &CC, SDValue Mask,
11896                                            SDValue EVL, bool &NeedInvert,
11897                                            const SDLoc &dl, SDValue &Chain,
11898                                            bool IsSignaling) const {
11899   MVT OpVT = LHS.getSimpleValueType();
11900   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11901   NeedInvert = false;
11902   assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11903   bool IsNonVP = !EVL;
11904   switch (getCondCodeAction(CCCode, OpVT)) {
11905   default:
11906     llvm_unreachable("Unknown condition code action!");
11907   case TargetLowering::Legal:
11908     // Nothing to do.
11909     break;
11910   case TargetLowering::Expand: {
11911     ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
11912     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11913       std::swap(LHS, RHS);
11914       CC = DAG.getCondCode(InvCC);
11915       return true;
11916     }
11917     // Swapping operands didn't work. Try inverting the condition.
11918     bool NeedSwap = false;
11919     InvCC = getSetCCInverse(CCCode, OpVT);
11920     if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
11921       // If inverting the condition is not enough, try swapping operands
11922       // on top of it.
11923       InvCC = ISD::getSetCCSwappedOperands(InvCC);
11924       NeedSwap = true;
11925     }
11926     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11927       CC = DAG.getCondCode(InvCC);
11928       NeedInvert = true;
11929       if (NeedSwap)
11930         std::swap(LHS, RHS);
11931       return true;
11932     }
11933 
11934     // Special case: expand i1 comparisons using logical operations.
11935     if (OpVT == MVT::i1) {
11936       SDValue Ret;
11937       switch (CCCode) {
11938       default:
11939         llvm_unreachable("Unknown integer setcc!");
11940       case ISD::SETEQ: // X == Y  -->  ~(X ^ Y)
11941         Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
11942                          MVT::i1);
11943         break;
11944       case ISD::SETNE: // X != Y  -->  (X ^ Y)
11945         Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
11946         break;
11947       case ISD::SETGT:  // X >s Y  -->  X == 0 & Y == 1  -->  ~X & Y
11948       case ISD::SETULT: // X <u Y  -->  X == 0 & Y == 1  -->  ~X & Y
11949         Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
11950                           DAG.getNOT(dl, LHS, MVT::i1));
11951         break;
11952       case ISD::SETLT:  // X <s Y  -->  X == 1 & Y == 0  -->  ~Y & X
11953       case ISD::SETUGT: // X >u Y  -->  X == 1 & Y == 0  -->  ~Y & X
11954         Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
11955                           DAG.getNOT(dl, RHS, MVT::i1));
11956         break;
11957       case ISD::SETULE: // X <=u Y  -->  X == 0 | Y == 1  -->  ~X | Y
11958       case ISD::SETGE:  // X >=s Y  -->  X == 0 | Y == 1  -->  ~X | Y
11959         Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
11960                           DAG.getNOT(dl, LHS, MVT::i1));
11961         break;
11962       case ISD::SETUGE: // X >=u Y  -->  X == 1 | Y == 0  -->  ~Y | X
11963       case ISD::SETLE:  // X <=s Y  -->  X == 1 | Y == 0  -->  ~Y | X
11964         Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
11965                           DAG.getNOT(dl, RHS, MVT::i1));
11966         break;
11967       }
11968 
11969       LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
11970       RHS = SDValue();
11971       CC = SDValue();
11972       return true;
11973     }
11974 
11975     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
11976     unsigned Opc = 0;
11977     switch (CCCode) {
11978     default:
11979       llvm_unreachable("Don't know how to expand this condition!");
11980     case ISD::SETUO:
11981       if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
11982         CC1 = ISD::SETUNE;
11983         CC2 = ISD::SETUNE;
11984         Opc = ISD::OR;
11985         break;
11986       }
11987       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11988              "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11989       NeedInvert = true;
11990       [[fallthrough]];
11991     case ISD::SETO:
11992       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11993              "If SETO is expanded, SETOEQ must be legal!");
11994       CC1 = ISD::SETOEQ;
11995       CC2 = ISD::SETOEQ;
11996       Opc = ISD::AND;
11997       break;
11998     case ISD::SETONE:
11999     case ISD::SETUEQ:
12000       // If the SETUO or SETO CC isn't legal, we might be able to use
12001       // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12002       // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12003       // the operands.
12004       CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12005       if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12006                                           isCondCodeLegal(ISD::SETOLT, OpVT))) {
12007         CC1 = ISD::SETOGT;
12008         CC2 = ISD::SETOLT;
12009         Opc = ISD::OR;
12010         NeedInvert = ((unsigned)CCCode & 0x8U);
12011         break;
12012       }
12013       [[fallthrough]];
12014     case ISD::SETOEQ:
12015     case ISD::SETOGT:
12016     case ISD::SETOGE:
12017     case ISD::SETOLT:
12018     case ISD::SETOLE:
12019     case ISD::SETUNE:
12020     case ISD::SETUGT:
12021     case ISD::SETUGE:
12022     case ISD::SETULT:
12023     case ISD::SETULE:
12024       // If we are floating point, assign and break, otherwise fall through.
12025       if (!OpVT.isInteger()) {
12026         // We can use the 4th bit to tell if we are the unordered
12027         // or ordered version of the opcode.
12028         CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12029         Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12030         CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12031         break;
12032       }
12033       // Fallthrough if we are unsigned integer.
12034       [[fallthrough]];
12035     case ISD::SETLE:
12036     case ISD::SETGT:
12037     case ISD::SETGE:
12038     case ISD::SETLT:
12039     case ISD::SETNE:
12040     case ISD::SETEQ:
12041       // If all combinations of inverting the condition and swapping operands
12042       // didn't work then we have no means to expand the condition.
12043       llvm_unreachable("Don't know how to expand this condition!");
12044     }
12045 
12046     SDValue SetCC1, SetCC2;
12047     if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12048       // If we aren't the ordered or unorder operation,
12049       // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12050       if (IsNonVP) {
12051         SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12052         SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12053       } else {
12054         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12055         SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12056       }
12057     } else {
12058       // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12059       if (IsNonVP) {
12060         SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12061         SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12062       } else {
12063         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12064         SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12065       }
12066     }
12067     if (Chain)
12068       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12069                           SetCC2.getValue(1));
12070     if (IsNonVP)
12071       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12072     else {
12073       // Transform the binary opcode to the VP equivalent.
12074       assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12075       Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12076       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12077     }
12078     RHS = SDValue();
12079     CC = SDValue();
12080     return true;
12081   }
12082   }
12083   return false;
12084 }
12085 
12086 SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
12087                                                       SelectionDAG &DAG) const {
12088   EVT VT = Node->getValueType(0);
12089   // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12090   // split into two equal parts.
12091   if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12092     return SDValue();
12093 
12094   // Restrict expansion to cases where both parts can be concatenated.
12095   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12096   if (LoVT != HiVT || !isTypeLegal(LoVT))
12097     return SDValue();
12098 
12099   SDLoc DL(Node);
12100   unsigned Opcode = Node->getOpcode();
12101 
12102   // Don't expand if the result is likely to be unrolled anyway.
12103   if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12104     return SDValue();
12105 
12106   SmallVector<SDValue, 4> LoOps, HiOps;
12107   for (const SDValue &V : Node->op_values()) {
12108     auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12109     LoOps.push_back(Lo);
12110     HiOps.push_back(Hi);
12111   }
12112 
12113   SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12114   SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12115   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12116 }
12117