xref: /llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (revision f582cd3dc70fa8c9519f74f16ab0a33ad663038e)
1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Analysis/VectorUtils.h"
16 #include "llvm/CodeGen/CallingConvLower.h"
17 #include "llvm/CodeGen/CodeGenCommonISel.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineJumpTableInfo.h"
21 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/TargetRegisterInfo.h"
25 #include "llvm/IR/DataLayout.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/Support/DivisionByConstantInfo.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/KnownBits.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include <cctype>
37 using namespace llvm;
38 
39 /// NOTE: The TargetMachine owns TLOF.
40 TargetLowering::TargetLowering(const TargetMachine &tm)
41     : TargetLoweringBase(tm) {}
42 
43 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44   return nullptr;
45 }
46 
47 bool TargetLowering::isPositionIndependent() const {
48   return getTargetMachine().isPositionIndependent();
49 }
50 
51 /// Check whether a given call node is in tail position within its function. If
52 /// so, it sets Chain to the input chain of the tail call.
53 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54                                           SDValue &Chain) const {
55   const Function &F = DAG.getMachineFunction().getFunction();
56 
57   // First, check if tail calls have been disabled in this function.
58   if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59     return false;
60 
61   // Conservatively require the attributes of the call to match those of
62   // the return. Ignore following attributes because they don't affect the
63   // call sequence.
64   AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65   for (const auto &Attr :
66        {Attribute::Alignment, Attribute::Dereferenceable,
67         Attribute::DereferenceableOrNull, Attribute::NoAlias,
68         Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
69     CallerAttrs.removeAttribute(Attr);
70 
71   if (CallerAttrs.hasAttributes())
72     return false;
73 
74   // It's not safe to eliminate the sign / zero extension of the return value.
75   if (CallerAttrs.contains(Attribute::ZExt) ||
76       CallerAttrs.contains(Attribute::SExt))
77     return false;
78 
79   // Check if the only use is a function return node.
80   return isUsedByReturnOnly(Node, Chain);
81 }
82 
83 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
84     const uint32_t *CallerPreservedMask,
85     const SmallVectorImpl<CCValAssign> &ArgLocs,
86     const SmallVectorImpl<SDValue> &OutVals) const {
87   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
88     const CCValAssign &ArgLoc = ArgLocs[I];
89     if (!ArgLoc.isRegLoc())
90       continue;
91     MCRegister Reg = ArgLoc.getLocReg();
92     // Only look at callee saved registers.
93     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94       continue;
95     // Check that we pass the value used for the caller.
96     // (We look for a CopyFromReg reading a virtual register that is used
97     //  for the function live-in value of register Reg)
98     SDValue Value = OutVals[I];
99     if (Value->getOpcode() == ISD::AssertZext)
100       Value = Value.getOperand(0);
101     if (Value->getOpcode() != ISD::CopyFromReg)
102       return false;
103     Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105       return false;
106   }
107   return true;
108 }
109 
110 /// Set CallLoweringInfo attribute flags based on a call instruction
111 /// and called function attributes.
112 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
113                                                      unsigned ArgIdx) {
114   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116   IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
117   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
118   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
119   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
120   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
121   IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
122   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
123   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
124   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
125   IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
126   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
127   Alignment = Call->getParamStackAlign(ArgIdx);
128   IndirectType = nullptr;
129   assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
130          "multiple ABI attributes?");
131   if (IsByVal) {
132     IndirectType = Call->getParamByValType(ArgIdx);
133     if (!Alignment)
134       Alignment = Call->getParamAlign(ArgIdx);
135   }
136   if (IsPreallocated)
137     IndirectType = Call->getParamPreallocatedType(ArgIdx);
138   if (IsInAlloca)
139     IndirectType = Call->getParamInAllocaType(ArgIdx);
140   if (IsSRet)
141     IndirectType = Call->getParamStructRetType(ArgIdx);
142 }
143 
144 /// Generate a libcall taking the given operands as arguments and returning a
145 /// result of type RetVT.
146 std::pair<SDValue, SDValue>
147 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
148                             ArrayRef<SDValue> Ops,
149                             MakeLibCallOptions CallOptions,
150                             const SDLoc &dl,
151                             SDValue InChain) const {
152   if (!InChain)
153     InChain = DAG.getEntryNode();
154 
155   TargetLowering::ArgListTy Args;
156   Args.reserve(Ops.size());
157 
158   TargetLowering::ArgListEntry Entry;
159   for (unsigned i = 0; i < Ops.size(); ++i) {
160     SDValue NewOp = Ops[i];
161     Entry.Node = NewOp;
162     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
163     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
164                                                  CallOptions.IsSExt);
165     Entry.IsZExt = !Entry.IsSExt;
166 
167     if (CallOptions.IsSoften &&
168         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
169       Entry.IsSExt = Entry.IsZExt = false;
170     }
171     Args.push_back(Entry);
172   }
173 
174   if (LC == RTLIB::UNKNOWN_LIBCALL)
175     report_fatal_error("Unsupported library call operation!");
176   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
177                                          getPointerTy(DAG.getDataLayout()));
178 
179   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180   TargetLowering::CallLoweringInfo CLI(DAG);
181   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
182   bool zeroExtend = !signExtend;
183 
184   if (CallOptions.IsSoften &&
185       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
186     signExtend = zeroExtend = false;
187   }
188 
189   CLI.setDebugLoc(dl)
190       .setChain(InChain)
191       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
192       .setNoReturn(CallOptions.DoesNotReturn)
193       .setDiscardResult(!CallOptions.IsReturnValueUsed)
194       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
195       .setSExtResult(signExtend)
196       .setZExtResult(zeroExtend);
197   return LowerCallTo(CLI);
198 }
199 
200 bool TargetLowering::findOptimalMemOpLowering(
201     std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
202     unsigned SrcAS, const AttributeList &FuncAttributes) const {
203   if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
204       Op.getSrcAlign() < Op.getDstAlign())
205     return false;
206 
207   EVT VT = getOptimalMemOpType(Op, FuncAttributes);
208 
209   if (VT == MVT::Other) {
210     // Use the largest integer type whose alignment constraints are satisfied.
211     // We only need to check DstAlign here as SrcAlign is always greater or
212     // equal to DstAlign (or zero).
213     VT = MVT::LAST_INTEGER_VALUETYPE;
214     if (Op.isFixedDstAlign())
215       while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
216              !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
217         VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
218     assert(VT.isInteger());
219 
220     // Find the largest legal integer type.
221     MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
222     while (!isTypeLegal(LVT))
223       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
224     assert(LVT.isInteger());
225 
226     // If the type we've chosen is larger than the largest legal integer type
227     // then use that instead.
228     if (VT.bitsGT(LVT))
229       VT = LVT;
230   }
231 
232   unsigned NumMemOps = 0;
233   uint64_t Size = Op.size();
234   while (Size) {
235     unsigned VTSize = VT.getSizeInBits() / 8;
236     while (VTSize > Size) {
237       // For now, only use non-vector load / store's for the left-over pieces.
238       EVT NewVT = VT;
239       unsigned NewVTSize;
240 
241       bool Found = false;
242       if (VT.isVector() || VT.isFloatingPoint()) {
243         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
244         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
245             isSafeMemOpType(NewVT.getSimpleVT()))
246           Found = true;
247         else if (NewVT == MVT::i64 &&
248                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
249                  isSafeMemOpType(MVT::f64)) {
250           // i64 is usually not legal on 32-bit targets, but f64 may be.
251           NewVT = MVT::f64;
252           Found = true;
253         }
254       }
255 
256       if (!Found) {
257         do {
258           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
259           if (NewVT == MVT::i8)
260             break;
261         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
262       }
263       NewVTSize = NewVT.getSizeInBits() / 8;
264 
265       // If the new VT cannot cover all of the remaining bits, then consider
266       // issuing a (or a pair of) unaligned and overlapping load / store.
267       unsigned Fast;
268       if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
269           allowsMisalignedMemoryAccesses(
270               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
271               MachineMemOperand::MONone, &Fast) &&
272           Fast)
273         VTSize = Size;
274       else {
275         VT = NewVT;
276         VTSize = NewVTSize;
277       }
278     }
279 
280     if (++NumMemOps > Limit)
281       return false;
282 
283     MemOps.push_back(VT);
284     Size -= VTSize;
285   }
286 
287   return true;
288 }
289 
290 /// Soften the operands of a comparison. This code is shared among BR_CC,
291 /// SELECT_CC, and SETCC handlers.
292 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
293                                          SDValue &NewLHS, SDValue &NewRHS,
294                                          ISD::CondCode &CCCode,
295                                          const SDLoc &dl, const SDValue OldLHS,
296                                          const SDValue OldRHS) const {
297   SDValue Chain;
298   return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
299                              OldRHS, Chain);
300 }
301 
302 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
303                                          SDValue &NewLHS, SDValue &NewRHS,
304                                          ISD::CondCode &CCCode,
305                                          const SDLoc &dl, const SDValue OldLHS,
306                                          const SDValue OldRHS,
307                                          SDValue &Chain,
308                                          bool IsSignaling) const {
309   // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
310   // not supporting it. We can update this code when libgcc provides such
311   // functions.
312 
313   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
314          && "Unsupported setcc type!");
315 
316   // Expand into one or more soft-fp libcall(s).
317   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
318   bool ShouldInvertCC = false;
319   switch (CCCode) {
320   case ISD::SETEQ:
321   case ISD::SETOEQ:
322     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
323           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
324           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
325     break;
326   case ISD::SETNE:
327   case ISD::SETUNE:
328     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
329           (VT == MVT::f64) ? RTLIB::UNE_F64 :
330           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
331     break;
332   case ISD::SETGE:
333   case ISD::SETOGE:
334     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
335           (VT == MVT::f64) ? RTLIB::OGE_F64 :
336           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
337     break;
338   case ISD::SETLT:
339   case ISD::SETOLT:
340     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
341           (VT == MVT::f64) ? RTLIB::OLT_F64 :
342           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
343     break;
344   case ISD::SETLE:
345   case ISD::SETOLE:
346     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
347           (VT == MVT::f64) ? RTLIB::OLE_F64 :
348           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
349     break;
350   case ISD::SETGT:
351   case ISD::SETOGT:
352     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
353           (VT == MVT::f64) ? RTLIB::OGT_F64 :
354           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
355     break;
356   case ISD::SETO:
357     ShouldInvertCC = true;
358     [[fallthrough]];
359   case ISD::SETUO:
360     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
361           (VT == MVT::f64) ? RTLIB::UO_F64 :
362           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
363     break;
364   case ISD::SETONE:
365     // SETONE = O && UNE
366     ShouldInvertCC = true;
367     [[fallthrough]];
368   case ISD::SETUEQ:
369     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
370           (VT == MVT::f64) ? RTLIB::UO_F64 :
371           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
372     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
373           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
374           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
375     break;
376   default:
377     // Invert CC for unordered comparisons
378     ShouldInvertCC = true;
379     switch (CCCode) {
380     case ISD::SETULT:
381       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
382             (VT == MVT::f64) ? RTLIB::OGE_F64 :
383             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
384       break;
385     case ISD::SETULE:
386       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
387             (VT == MVT::f64) ? RTLIB::OGT_F64 :
388             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
389       break;
390     case ISD::SETUGT:
391       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
392             (VT == MVT::f64) ? RTLIB::OLE_F64 :
393             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
394       break;
395     case ISD::SETUGE:
396       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
397             (VT == MVT::f64) ? RTLIB::OLT_F64 :
398             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
399       break;
400     default: llvm_unreachable("Do not know how to soften this setcc!");
401     }
402   }
403 
404   // Use the target specific return value for comparison lib calls.
405   EVT RetVT = getCmpLibcallReturnType();
406   SDValue Ops[2] = {NewLHS, NewRHS};
407   TargetLowering::MakeLibCallOptions CallOptions;
408   EVT OpsVT[2] = { OldLHS.getValueType(),
409                    OldRHS.getValueType() };
410   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
411   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
412   NewLHS = Call.first;
413   NewRHS = DAG.getConstant(0, dl, RetVT);
414 
415   CCCode = getCmpLibcallCC(LC1);
416   if (ShouldInvertCC) {
417     assert(RetVT.isInteger());
418     CCCode = getSetCCInverse(CCCode, RetVT);
419   }
420 
421   if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
422     // Update Chain.
423     Chain = Call.second;
424   } else {
425     EVT SetCCVT =
426         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
427     SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
428     auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
429     CCCode = getCmpLibcallCC(LC2);
430     if (ShouldInvertCC)
431       CCCode = getSetCCInverse(CCCode, RetVT);
432     NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
433     if (Chain)
434       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
435                           Call2.second);
436     NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
437                          Tmp.getValueType(), Tmp, NewLHS);
438     NewRHS = SDValue();
439   }
440 }
441 
442 /// Return the entry encoding for a jump table in the current function. The
443 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444 unsigned TargetLowering::getJumpTableEncoding() const {
445   // In non-pic modes, just use the address of a block.
446   if (!isPositionIndependent())
447     return MachineJumpTableInfo::EK_BlockAddress;
448 
449   // In PIC mode, if the target supports a GPRel32 directive, use it.
450   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
451     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
452 
453   // Otherwise, use a label difference.
454   return MachineJumpTableInfo::EK_LabelDifference32;
455 }
456 
457 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
458                                                  SelectionDAG &DAG) const {
459   // If our PIC model is GP relative, use the global offset table as the base.
460   unsigned JTEncoding = getJumpTableEncoding();
461 
462   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
463       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
464     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
465 
466   return Table;
467 }
468 
469 /// This returns the relocation base for the given PIC jumptable, the same as
470 /// getPICJumpTableRelocBase, but as an MCExpr.
471 const MCExpr *
472 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
473                                              unsigned JTI,MCContext &Ctx) const{
474   // The normal PIC reloc base is the label at the start of the jump table.
475   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
476 }
477 
478 SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
479                                                SDValue Addr, int JTI,
480                                                SelectionDAG &DAG) const {
481   SDValue Chain = Value;
482   // Jump table debug info is only needed if CodeView is enabled.
483   if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
484     Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
485   }
486   return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
487 }
488 
489 bool
490 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
491   const TargetMachine &TM = getTargetMachine();
492   const GlobalValue *GV = GA->getGlobal();
493 
494   // If the address is not even local to this DSO we will have to load it from
495   // a got and then add the offset.
496   if (!TM.shouldAssumeDSOLocal(GV))
497     return false;
498 
499   // If the code is position independent we will have to add a base register.
500   if (isPositionIndependent())
501     return false;
502 
503   // Otherwise we can do it.
504   return true;
505 }
506 
507 //===----------------------------------------------------------------------===//
508 //  Optimization Methods
509 //===----------------------------------------------------------------------===//
510 
511 /// If the specified instruction has a constant integer operand and there are
512 /// bits set in that constant that are not demanded, then clear those bits and
513 /// return true.
514 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
515                                             const APInt &DemandedBits,
516                                             const APInt &DemandedElts,
517                                             TargetLoweringOpt &TLO) const {
518   SDLoc DL(Op);
519   unsigned Opcode = Op.getOpcode();
520 
521   // Early-out if we've ended up calling an undemanded node, leave this to
522   // constant folding.
523   if (DemandedBits.isZero() || DemandedElts.isZero())
524     return false;
525 
526   // Do target-specific constant optimization.
527   if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
528     return TLO.New.getNode();
529 
530   // FIXME: ISD::SELECT, ISD::SELECT_CC
531   switch (Opcode) {
532   default:
533     break;
534   case ISD::XOR:
535   case ISD::AND:
536   case ISD::OR: {
537     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
538     if (!Op1C || Op1C->isOpaque())
539       return false;
540 
541     // If this is a 'not' op, don't touch it because that's a canonical form.
542     const APInt &C = Op1C->getAPIntValue();
543     if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
544       return false;
545 
546     if (!C.isSubsetOf(DemandedBits)) {
547       EVT VT = Op.getValueType();
548       SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
549       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
550                                       Op->getFlags());
551       return TLO.CombineTo(Op, NewOp);
552     }
553 
554     break;
555   }
556   }
557 
558   return false;
559 }
560 
561 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
562                                             const APInt &DemandedBits,
563                                             TargetLoweringOpt &TLO) const {
564   EVT VT = Op.getValueType();
565   APInt DemandedElts = VT.isVector()
566                            ? APInt::getAllOnes(VT.getVectorNumElements())
567                            : APInt(1, 1);
568   return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
569 }
570 
571 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
572 /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
573 /// but it could be generalized for targets with other types of implicit
574 /// widening casts.
575 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
576                                       const APInt &DemandedBits,
577                                       TargetLoweringOpt &TLO) const {
578   assert(Op.getNumOperands() == 2 &&
579          "ShrinkDemandedOp only supports binary operators!");
580   assert(Op.getNode()->getNumValues() == 1 &&
581          "ShrinkDemandedOp only supports nodes with one result!");
582 
583   EVT VT = Op.getValueType();
584   SelectionDAG &DAG = TLO.DAG;
585   SDLoc dl(Op);
586 
587   // Early return, as this function cannot handle vector types.
588   if (VT.isVector())
589     return false;
590 
591   assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
592          Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
593          "ShrinkDemandedOp only supports operands that have the same size!");
594 
595   // Don't do this if the node has another user, which may require the
596   // full value.
597   if (!Op.getNode()->hasOneUse())
598     return false;
599 
600   // Search for the smallest integer type with free casts to and from
601   // Op's type. For expedience, just check power-of-2 integer types.
602   unsigned DemandedSize = DemandedBits.getActiveBits();
603   for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
604        SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
605     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
606     if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
607       // We found a type with free casts.
608       SDValue X = DAG.getNode(
609           Op.getOpcode(), dl, SmallVT,
610           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
611           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
612       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
613       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
614       return TLO.CombineTo(Op, Z);
615     }
616   }
617   return false;
618 }
619 
620 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
621                                           DAGCombinerInfo &DCI) const {
622   SelectionDAG &DAG = DCI.DAG;
623   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
624                         !DCI.isBeforeLegalizeOps());
625   KnownBits Known;
626 
627   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
628   if (Simplified) {
629     DCI.AddToWorklist(Op.getNode());
630     DCI.CommitTargetLoweringOpt(TLO);
631   }
632   return Simplified;
633 }
634 
635 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
636                                           const APInt &DemandedElts,
637                                           DAGCombinerInfo &DCI) const {
638   SelectionDAG &DAG = DCI.DAG;
639   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
640                         !DCI.isBeforeLegalizeOps());
641   KnownBits Known;
642 
643   bool Simplified =
644       SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
645   if (Simplified) {
646     DCI.AddToWorklist(Op.getNode());
647     DCI.CommitTargetLoweringOpt(TLO);
648   }
649   return Simplified;
650 }
651 
652 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
653                                           KnownBits &Known,
654                                           TargetLoweringOpt &TLO,
655                                           unsigned Depth,
656                                           bool AssumeSingleUse) const {
657   EVT VT = Op.getValueType();
658 
659   // Since the number of lanes in a scalable vector is unknown at compile time,
660   // we track one bit which is implicitly broadcast to all lanes.  This means
661   // that all lanes in a scalable vector are considered demanded.
662   APInt DemandedElts = VT.isFixedLengthVector()
663                            ? APInt::getAllOnes(VT.getVectorNumElements())
664                            : APInt(1, 1);
665   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
666                               AssumeSingleUse);
667 }
668 
669 // TODO: Under what circumstances can we create nodes? Constant folding?
670 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
671     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
672     SelectionDAG &DAG, unsigned Depth) const {
673   EVT VT = Op.getValueType();
674 
675   // Limit search depth.
676   if (Depth >= SelectionDAG::MaxRecursionDepth)
677     return SDValue();
678 
679   // Ignore UNDEFs.
680   if (Op.isUndef())
681     return SDValue();
682 
683   // Not demanding any bits/elts from Op.
684   if (DemandedBits == 0 || DemandedElts == 0)
685     return DAG.getUNDEF(VT);
686 
687   bool IsLE = DAG.getDataLayout().isLittleEndian();
688   unsigned NumElts = DemandedElts.getBitWidth();
689   unsigned BitWidth = DemandedBits.getBitWidth();
690   KnownBits LHSKnown, RHSKnown;
691   switch (Op.getOpcode()) {
692   case ISD::BITCAST: {
693     if (VT.isScalableVector())
694       return SDValue();
695 
696     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
697     EVT SrcVT = Src.getValueType();
698     EVT DstVT = Op.getValueType();
699     if (SrcVT == DstVT)
700       return Src;
701 
702     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
703     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
704     if (NumSrcEltBits == NumDstEltBits)
705       if (SDValue V = SimplifyMultipleUseDemandedBits(
706               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
707         return DAG.getBitcast(DstVT, V);
708 
709     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
710       unsigned Scale = NumDstEltBits / NumSrcEltBits;
711       unsigned NumSrcElts = SrcVT.getVectorNumElements();
712       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
713       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
714       for (unsigned i = 0; i != Scale; ++i) {
715         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
716         unsigned BitOffset = EltOffset * NumSrcEltBits;
717         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
718         if (!Sub.isZero()) {
719           DemandedSrcBits |= Sub;
720           for (unsigned j = 0; j != NumElts; ++j)
721             if (DemandedElts[j])
722               DemandedSrcElts.setBit((j * Scale) + i);
723         }
724       }
725 
726       if (SDValue V = SimplifyMultipleUseDemandedBits(
727               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
728         return DAG.getBitcast(DstVT, V);
729     }
730 
731     // TODO - bigendian once we have test coverage.
732     if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
733       unsigned Scale = NumSrcEltBits / NumDstEltBits;
734       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
735       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
736       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
737       for (unsigned i = 0; i != NumElts; ++i)
738         if (DemandedElts[i]) {
739           unsigned Offset = (i % Scale) * NumDstEltBits;
740           DemandedSrcBits.insertBits(DemandedBits, Offset);
741           DemandedSrcElts.setBit(i / Scale);
742         }
743 
744       if (SDValue V = SimplifyMultipleUseDemandedBits(
745               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
746         return DAG.getBitcast(DstVT, V);
747     }
748 
749     break;
750   }
751   case ISD::FREEZE: {
752     SDValue N0 = Op.getOperand(0);
753     if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
754                                              /*PoisonOnly=*/false))
755       return N0;
756     break;
757   }
758   case ISD::AND: {
759     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
760     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
761 
762     // If all of the demanded bits are known 1 on one side, return the other.
763     // These bits cannot contribute to the result of the 'and' in this
764     // context.
765     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
766       return Op.getOperand(0);
767     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
768       return Op.getOperand(1);
769     break;
770   }
771   case ISD::OR: {
772     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
773     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
774 
775     // If all of the demanded bits are known zero on one side, return the
776     // other.  These bits cannot contribute to the result of the 'or' in this
777     // context.
778     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
779       return Op.getOperand(0);
780     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
781       return Op.getOperand(1);
782     break;
783   }
784   case ISD::XOR: {
785     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
786     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
787 
788     // If all of the demanded bits are known zero on one side, return the
789     // other.
790     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
791       return Op.getOperand(0);
792     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
793       return Op.getOperand(1);
794     break;
795   }
796   case ISD::SHL: {
797     // If we are only demanding sign bits then we can use the shift source
798     // directly.
799     if (std::optional<uint64_t> MaxSA =
800             DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
801       SDValue Op0 = Op.getOperand(0);
802       unsigned ShAmt = *MaxSA;
803       unsigned NumSignBits =
804           DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
805       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
806       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
807         return Op0;
808     }
809     break;
810   }
811   case ISD::SETCC: {
812     SDValue Op0 = Op.getOperand(0);
813     SDValue Op1 = Op.getOperand(1);
814     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
815     // If (1) we only need the sign-bit, (2) the setcc operands are the same
816     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
817     // -1, we may be able to bypass the setcc.
818     if (DemandedBits.isSignMask() &&
819         Op0.getScalarValueSizeInBits() == BitWidth &&
820         getBooleanContents(Op0.getValueType()) ==
821             BooleanContent::ZeroOrNegativeOneBooleanContent) {
822       // If we're testing X < 0, then this compare isn't needed - just use X!
823       // FIXME: We're limiting to integer types here, but this should also work
824       // if we don't care about FP signed-zero. The use of SETLT with FP means
825       // that we don't care about NaNs.
826       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
827           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
828         return Op0;
829     }
830     break;
831   }
832   case ISD::SIGN_EXTEND_INREG: {
833     // If none of the extended bits are demanded, eliminate the sextinreg.
834     SDValue Op0 = Op.getOperand(0);
835     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
836     unsigned ExBits = ExVT.getScalarSizeInBits();
837     if (DemandedBits.getActiveBits() <= ExBits &&
838         shouldRemoveRedundantExtend(Op))
839       return Op0;
840     // If the input is already sign extended, just drop the extension.
841     unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
842     if (NumSignBits >= (BitWidth - ExBits + 1))
843       return Op0;
844     break;
845   }
846   case ISD::ANY_EXTEND_VECTOR_INREG:
847   case ISD::SIGN_EXTEND_VECTOR_INREG:
848   case ISD::ZERO_EXTEND_VECTOR_INREG: {
849     if (VT.isScalableVector())
850       return SDValue();
851 
852     // If we only want the lowest element and none of extended bits, then we can
853     // return the bitcasted source vector.
854     SDValue Src = Op.getOperand(0);
855     EVT SrcVT = Src.getValueType();
856     EVT DstVT = Op.getValueType();
857     if (IsLE && DemandedElts == 1 &&
858         DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
859         DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
860       return DAG.getBitcast(DstVT, Src);
861     }
862     break;
863   }
864   case ISD::INSERT_VECTOR_ELT: {
865     if (VT.isScalableVector())
866       return SDValue();
867 
868     // If we don't demand the inserted element, return the base vector.
869     SDValue Vec = Op.getOperand(0);
870     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
871     EVT VecVT = Vec.getValueType();
872     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
873         !DemandedElts[CIdx->getZExtValue()])
874       return Vec;
875     break;
876   }
877   case ISD::INSERT_SUBVECTOR: {
878     if (VT.isScalableVector())
879       return SDValue();
880 
881     SDValue Vec = Op.getOperand(0);
882     SDValue Sub = Op.getOperand(1);
883     uint64_t Idx = Op.getConstantOperandVal(2);
884     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
885     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
886     // If we don't demand the inserted subvector, return the base vector.
887     if (DemandedSubElts == 0)
888       return Vec;
889     break;
890   }
891   case ISD::VECTOR_SHUFFLE: {
892     assert(!VT.isScalableVector());
893     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
894 
895     // If all the demanded elts are from one operand and are inline,
896     // then we can use the operand directly.
897     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
898     for (unsigned i = 0; i != NumElts; ++i) {
899       int M = ShuffleMask[i];
900       if (M < 0 || !DemandedElts[i])
901         continue;
902       AllUndef = false;
903       IdentityLHS &= (M == (int)i);
904       IdentityRHS &= ((M - NumElts) == i);
905     }
906 
907     if (AllUndef)
908       return DAG.getUNDEF(Op.getValueType());
909     if (IdentityLHS)
910       return Op.getOperand(0);
911     if (IdentityRHS)
912       return Op.getOperand(1);
913     break;
914   }
915   default:
916     // TODO: Probably okay to remove after audit; here to reduce change size
917     // in initial enablement patch for scalable vectors
918     if (VT.isScalableVector())
919       return SDValue();
920 
921     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
922       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
923               Op, DemandedBits, DemandedElts, DAG, Depth))
924         return V;
925     break;
926   }
927   return SDValue();
928 }
929 
930 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
931     SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
932     unsigned Depth) const {
933   EVT VT = Op.getValueType();
934   // Since the number of lanes in a scalable vector is unknown at compile time,
935   // we track one bit which is implicitly broadcast to all lanes.  This means
936   // that all lanes in a scalable vector are considered demanded.
937   APInt DemandedElts = VT.isFixedLengthVector()
938                            ? APInt::getAllOnes(VT.getVectorNumElements())
939                            : APInt(1, 1);
940   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
941                                          Depth);
942 }
943 
944 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
945     SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
946     unsigned Depth) const {
947   APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
948   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
949                                          Depth);
950 }
951 
952 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
953 //      or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
954 static SDValue combineShiftToAVG(SDValue Op,
955                                  TargetLowering::TargetLoweringOpt &TLO,
956                                  const TargetLowering &TLI,
957                                  const APInt &DemandedBits,
958                                  const APInt &DemandedElts, unsigned Depth) {
959   assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
960          "SRL or SRA node is required here!");
961   // Is the right shift using an immediate value of 1?
962   ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
963   if (!N1C || !N1C->isOne())
964     return SDValue();
965 
966   // We are looking for an avgfloor
967   // add(ext, ext)
968   // or one of these as a avgceil
969   // add(add(ext, ext), 1)
970   // add(add(ext, 1), ext)
971   // add(ext, add(ext, 1))
972   SDValue Add = Op.getOperand(0);
973   if (Add.getOpcode() != ISD::ADD)
974     return SDValue();
975 
976   SDValue ExtOpA = Add.getOperand(0);
977   SDValue ExtOpB = Add.getOperand(1);
978   SDValue Add2;
979   auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
980     ConstantSDNode *ConstOp;
981     if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
982         ConstOp->isOne()) {
983       ExtOpA = Op1;
984       ExtOpB = Op3;
985       Add2 = A;
986       return true;
987     }
988     if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
989         ConstOp->isOne()) {
990       ExtOpA = Op1;
991       ExtOpB = Op2;
992       Add2 = A;
993       return true;
994     }
995     return false;
996   };
997   bool IsCeil =
998       (ExtOpA.getOpcode() == ISD::ADD &&
999        MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1000       (ExtOpB.getOpcode() == ISD::ADD &&
1001        MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1002 
1003   // If the shift is signed (sra):
1004   //  - Needs >= 2 sign bit for both operands.
1005   //  - Needs >= 2 zero bits.
1006   // If the shift is unsigned (srl):
1007   //  - Needs >= 1 zero bit for both operands.
1008   //  - Needs 1 demanded bit zero and >= 2 sign bits.
1009   SelectionDAG &DAG = TLO.DAG;
1010   unsigned ShiftOpc = Op.getOpcode();
1011   bool IsSigned = false;
1012   unsigned KnownBits;
1013   unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1014   unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1015   unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1016   unsigned NumZeroA =
1017       DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1018   unsigned NumZeroB =
1019       DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1020   unsigned NumZero = std::min(NumZeroA, NumZeroB);
1021 
1022   switch (ShiftOpc) {
1023   default:
1024     llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1025   case ISD::SRA: {
1026     if (NumZero >= 2 && NumSigned < NumZero) {
1027       IsSigned = false;
1028       KnownBits = NumZero;
1029       break;
1030     }
1031     if (NumSigned >= 1) {
1032       IsSigned = true;
1033       KnownBits = NumSigned;
1034       break;
1035     }
1036     return SDValue();
1037   }
1038   case ISD::SRL: {
1039     if (NumZero >= 1 && NumSigned < NumZero) {
1040       IsSigned = false;
1041       KnownBits = NumZero;
1042       break;
1043     }
1044     if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1045       IsSigned = true;
1046       KnownBits = NumSigned;
1047       break;
1048     }
1049     return SDValue();
1050   }
1051   }
1052 
1053   unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1054                            : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1055 
1056   // Find the smallest power-2 type that is legal for this vector size and
1057   // operation, given the original type size and the number of known sign/zero
1058   // bits.
1059   EVT VT = Op.getValueType();
1060   unsigned MinWidth =
1061       std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1062   EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1063   if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1064     return SDValue();
1065   if (VT.isVector())
1066     NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1067   if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1068     // If we could not transform, and (both) adds are nuw/nsw, we can use the
1069     // larger type size to do the transform.
1070     if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1071       return SDValue();
1072     if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1073                                Add.getOperand(1)) &&
1074         (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1075                                          Add2.getOperand(1))))
1076       NVT = VT;
1077     else
1078       return SDValue();
1079   }
1080 
1081   // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1082   // this is likely to stop other folds (reassociation, value tracking etc.)
1083   if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1084       (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1085     return SDValue();
1086 
1087   SDLoc DL(Op);
1088   SDValue ResultAVG =
1089       DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1090                   DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1091   return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1092 }
1093 
1094 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1095 /// result of Op are ever used downstream. If we can use this information to
1096 /// simplify Op, create a new simplified DAG node and return true, returning the
1097 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1098 /// return a mask of Known bits for the expression (used to simplify the
1099 /// caller).  The Known bits may only be accurate for those bits in the
1100 /// OriginalDemandedBits and OriginalDemandedElts.
1101 bool TargetLowering::SimplifyDemandedBits(
1102     SDValue Op, const APInt &OriginalDemandedBits,
1103     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1104     unsigned Depth, bool AssumeSingleUse) const {
1105   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1106   assert(Op.getScalarValueSizeInBits() == BitWidth &&
1107          "Mask size mismatches value type size!");
1108 
1109   // Don't know anything.
1110   Known = KnownBits(BitWidth);
1111 
1112   EVT VT = Op.getValueType();
1113   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1114   unsigned NumElts = OriginalDemandedElts.getBitWidth();
1115   assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1116          "Unexpected vector size");
1117 
1118   APInt DemandedBits = OriginalDemandedBits;
1119   APInt DemandedElts = OriginalDemandedElts;
1120   SDLoc dl(Op);
1121 
1122   // Undef operand.
1123   if (Op.isUndef())
1124     return false;
1125 
1126   // We can't simplify target constants.
1127   if (Op.getOpcode() == ISD::TargetConstant)
1128     return false;
1129 
1130   if (Op.getOpcode() == ISD::Constant) {
1131     // We know all of the bits for a constant!
1132     Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1133     return false;
1134   }
1135 
1136   if (Op.getOpcode() == ISD::ConstantFP) {
1137     // We know all of the bits for a floating point constant!
1138     Known = KnownBits::makeConstant(
1139         cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1140     return false;
1141   }
1142 
1143   // Other users may use these bits.
1144   bool HasMultiUse = false;
1145   if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1146     if (Depth >= SelectionDAG::MaxRecursionDepth) {
1147       // Limit search depth.
1148       return false;
1149     }
1150     // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1151     DemandedBits = APInt::getAllOnes(BitWidth);
1152     DemandedElts = APInt::getAllOnes(NumElts);
1153     HasMultiUse = true;
1154   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1155     // Not demanding any bits/elts from Op.
1156     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1157   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1158     // Limit search depth.
1159     return false;
1160   }
1161 
1162   KnownBits Known2;
1163   switch (Op.getOpcode()) {
1164   case ISD::SCALAR_TO_VECTOR: {
1165     if (VT.isScalableVector())
1166       return false;
1167     if (!DemandedElts[0])
1168       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1169 
1170     KnownBits SrcKnown;
1171     SDValue Src = Op.getOperand(0);
1172     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1173     APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1174     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1175       return true;
1176 
1177     // Upper elements are undef, so only get the knownbits if we just demand
1178     // the bottom element.
1179     if (DemandedElts == 1)
1180       Known = SrcKnown.anyextOrTrunc(BitWidth);
1181     break;
1182   }
1183   case ISD::BUILD_VECTOR:
1184     // Collect the known bits that are shared by every demanded element.
1185     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1186     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1187     return false; // Don't fall through, will infinitely loop.
1188   case ISD::SPLAT_VECTOR: {
1189     SDValue Scl = Op.getOperand(0);
1190     APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1191     KnownBits KnownScl;
1192     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1193       return true;
1194 
1195     // Implicitly truncate the bits to match the official semantics of
1196     // SPLAT_VECTOR.
1197     Known = KnownScl.trunc(BitWidth);
1198     break;
1199   }
1200   case ISD::LOAD: {
1201     auto *LD = cast<LoadSDNode>(Op);
1202     if (getTargetConstantFromLoad(LD)) {
1203       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1204       return false; // Don't fall through, will infinitely loop.
1205     }
1206     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1207       // If this is a ZEXTLoad and we are looking at the loaded value.
1208       EVT MemVT = LD->getMemoryVT();
1209       unsigned MemBits = MemVT.getScalarSizeInBits();
1210       Known.Zero.setBitsFrom(MemBits);
1211       return false; // Don't fall through, will infinitely loop.
1212     }
1213     break;
1214   }
1215   case ISD::INSERT_VECTOR_ELT: {
1216     if (VT.isScalableVector())
1217       return false;
1218     SDValue Vec = Op.getOperand(0);
1219     SDValue Scl = Op.getOperand(1);
1220     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1221     EVT VecVT = Vec.getValueType();
1222 
1223     // If index isn't constant, assume we need all vector elements AND the
1224     // inserted element.
1225     APInt DemandedVecElts(DemandedElts);
1226     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1227       unsigned Idx = CIdx->getZExtValue();
1228       DemandedVecElts.clearBit(Idx);
1229 
1230       // Inserted element is not required.
1231       if (!DemandedElts[Idx])
1232         return TLO.CombineTo(Op, Vec);
1233     }
1234 
1235     KnownBits KnownScl;
1236     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1237     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1238     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1239       return true;
1240 
1241     Known = KnownScl.anyextOrTrunc(BitWidth);
1242 
1243     KnownBits KnownVec;
1244     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1245                              Depth + 1))
1246       return true;
1247 
1248     if (!!DemandedVecElts)
1249       Known = Known.intersectWith(KnownVec);
1250 
1251     return false;
1252   }
1253   case ISD::INSERT_SUBVECTOR: {
1254     if (VT.isScalableVector())
1255       return false;
1256     // Demand any elements from the subvector and the remainder from the src its
1257     // inserted into.
1258     SDValue Src = Op.getOperand(0);
1259     SDValue Sub = Op.getOperand(1);
1260     uint64_t Idx = Op.getConstantOperandVal(2);
1261     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1262     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1263     APInt DemandedSrcElts = DemandedElts;
1264     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1265 
1266     KnownBits KnownSub, KnownSrc;
1267     if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1268                              Depth + 1))
1269       return true;
1270     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1271                              Depth + 1))
1272       return true;
1273 
1274     Known.Zero.setAllBits();
1275     Known.One.setAllBits();
1276     if (!!DemandedSubElts)
1277       Known = Known.intersectWith(KnownSub);
1278     if (!!DemandedSrcElts)
1279       Known = Known.intersectWith(KnownSrc);
1280 
1281     // Attempt to avoid multi-use src if we don't need anything from it.
1282     if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1283         !DemandedSrcElts.isAllOnes()) {
1284       SDValue NewSub = SimplifyMultipleUseDemandedBits(
1285           Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1286       SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1287           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1288       if (NewSub || NewSrc) {
1289         NewSub = NewSub ? NewSub : Sub;
1290         NewSrc = NewSrc ? NewSrc : Src;
1291         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1292                                         Op.getOperand(2));
1293         return TLO.CombineTo(Op, NewOp);
1294       }
1295     }
1296     break;
1297   }
1298   case ISD::EXTRACT_SUBVECTOR: {
1299     if (VT.isScalableVector())
1300       return false;
1301     // Offset the demanded elts by the subvector index.
1302     SDValue Src = Op.getOperand(0);
1303     if (Src.getValueType().isScalableVector())
1304       break;
1305     uint64_t Idx = Op.getConstantOperandVal(1);
1306     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1307     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1308 
1309     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1310                              Depth + 1))
1311       return true;
1312 
1313     // Attempt to avoid multi-use src if we don't need anything from it.
1314     if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1315       SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1316           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1317       if (DemandedSrc) {
1318         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1319                                         Op.getOperand(1));
1320         return TLO.CombineTo(Op, NewOp);
1321       }
1322     }
1323     break;
1324   }
1325   case ISD::CONCAT_VECTORS: {
1326     if (VT.isScalableVector())
1327       return false;
1328     Known.Zero.setAllBits();
1329     Known.One.setAllBits();
1330     EVT SubVT = Op.getOperand(0).getValueType();
1331     unsigned NumSubVecs = Op.getNumOperands();
1332     unsigned NumSubElts = SubVT.getVectorNumElements();
1333     for (unsigned i = 0; i != NumSubVecs; ++i) {
1334       APInt DemandedSubElts =
1335           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1336       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1337                                Known2, TLO, Depth + 1))
1338         return true;
1339       // Known bits are shared by every demanded subvector element.
1340       if (!!DemandedSubElts)
1341         Known = Known.intersectWith(Known2);
1342     }
1343     break;
1344   }
1345   case ISD::VECTOR_SHUFFLE: {
1346     assert(!VT.isScalableVector());
1347     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1348 
1349     // Collect demanded elements from shuffle operands..
1350     APInt DemandedLHS, DemandedRHS;
1351     if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1352                                 DemandedRHS))
1353       break;
1354 
1355     if (!!DemandedLHS || !!DemandedRHS) {
1356       SDValue Op0 = Op.getOperand(0);
1357       SDValue Op1 = Op.getOperand(1);
1358 
1359       Known.Zero.setAllBits();
1360       Known.One.setAllBits();
1361       if (!!DemandedLHS) {
1362         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1363                                  Depth + 1))
1364           return true;
1365         Known = Known.intersectWith(Known2);
1366       }
1367       if (!!DemandedRHS) {
1368         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1369                                  Depth + 1))
1370           return true;
1371         Known = Known.intersectWith(Known2);
1372       }
1373 
1374       // Attempt to avoid multi-use ops if we don't need anything from them.
1375       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1376           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1377       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1378           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1379       if (DemandedOp0 || DemandedOp1) {
1380         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1381         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1382         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1383         return TLO.CombineTo(Op, NewOp);
1384       }
1385     }
1386     break;
1387   }
1388   case ISD::AND: {
1389     SDValue Op0 = Op.getOperand(0);
1390     SDValue Op1 = Op.getOperand(1);
1391 
1392     // If the RHS is a constant, check to see if the LHS would be zero without
1393     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1394     // simplify the LHS, here we're using information from the LHS to simplify
1395     // the RHS.
1396     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1397       // Do not increment Depth here; that can cause an infinite loop.
1398       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1399       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1400       if ((LHSKnown.Zero & DemandedBits) ==
1401           (~RHSC->getAPIntValue() & DemandedBits))
1402         return TLO.CombineTo(Op, Op0);
1403 
1404       // If any of the set bits in the RHS are known zero on the LHS, shrink
1405       // the constant.
1406       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1407                                  DemandedElts, TLO))
1408         return true;
1409 
1410       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1411       // constant, but if this 'and' is only clearing bits that were just set by
1412       // the xor, then this 'and' can be eliminated by shrinking the mask of
1413       // the xor. For example, for a 32-bit X:
1414       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1415       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1416           LHSKnown.One == ~RHSC->getAPIntValue()) {
1417         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1418         return TLO.CombineTo(Op, Xor);
1419       }
1420     }
1421 
1422     // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1423     // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1424     if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1425         (Op0.getOperand(0).isUndef() ||
1426          ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
1427         Op0->hasOneUse()) {
1428       unsigned NumSubElts =
1429           Op0.getOperand(1).getValueType().getVectorNumElements();
1430       unsigned SubIdx = Op0.getConstantOperandVal(2);
1431       APInt DemandedSub =
1432           APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1433       KnownBits KnownSubMask =
1434           TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1435       if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1436         SDValue NewAnd =
1437             TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1438         SDValue NewInsert =
1439             TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1440                             Op0.getOperand(1), Op0.getOperand(2));
1441         return TLO.CombineTo(Op, NewInsert);
1442       }
1443     }
1444 
1445     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1446                              Depth + 1))
1447       return true;
1448     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1449                              Known2, TLO, Depth + 1))
1450       return true;
1451 
1452     // If all of the demanded bits are known one on one side, return the other.
1453     // These bits cannot contribute to the result of the 'and'.
1454     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1455       return TLO.CombineTo(Op, Op0);
1456     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1457       return TLO.CombineTo(Op, Op1);
1458     // If all of the demanded bits in the inputs are known zeros, return zero.
1459     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1460       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1461     // If the RHS is a constant, see if we can simplify it.
1462     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1463                                TLO))
1464       return true;
1465     // If the operation can be done in a smaller type, do so.
1466     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1467       return true;
1468 
1469     // Attempt to avoid multi-use ops if we don't need anything from them.
1470     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1471       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1472           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1473       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1474           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1475       if (DemandedOp0 || DemandedOp1) {
1476         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1477         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1478         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1479         return TLO.CombineTo(Op, NewOp);
1480       }
1481     }
1482 
1483     Known &= Known2;
1484     break;
1485   }
1486   case ISD::OR: {
1487     SDValue Op0 = Op.getOperand(0);
1488     SDValue Op1 = Op.getOperand(1);
1489     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1490                              Depth + 1)) {
1491       Op->dropFlags(SDNodeFlags::Disjoint);
1492       return true;
1493     }
1494 
1495     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1496                              Known2, TLO, Depth + 1)) {
1497       Op->dropFlags(SDNodeFlags::Disjoint);
1498       return true;
1499     }
1500 
1501     // If all of the demanded bits are known zero on one side, return the other.
1502     // These bits cannot contribute to the result of the 'or'.
1503     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1504       return TLO.CombineTo(Op, Op0);
1505     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1506       return TLO.CombineTo(Op, Op1);
1507     // If the RHS is a constant, see if we can simplify it.
1508     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1509       return true;
1510     // If the operation can be done in a smaller type, do so.
1511     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1512       return true;
1513 
1514     // Attempt to avoid multi-use ops if we don't need anything from them.
1515     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1516       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1517           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1518       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1519           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1520       if (DemandedOp0 || DemandedOp1) {
1521         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1522         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1523         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1524         return TLO.CombineTo(Op, NewOp);
1525       }
1526     }
1527 
1528     // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1529     // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1530     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1531         Op0->hasOneUse() && Op1->hasOneUse()) {
1532       // Attempt to match all commutations - m_c_Or would've been useful!
1533       for (int I = 0; I != 2; ++I) {
1534         SDValue X = Op.getOperand(I).getOperand(0);
1535         SDValue C1 = Op.getOperand(I).getOperand(1);
1536         SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1537         SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1538         if (Alt.getOpcode() == ISD::OR) {
1539           for (int J = 0; J != 2; ++J) {
1540             if (X == Alt.getOperand(J)) {
1541               SDValue Y = Alt.getOperand(1 - J);
1542               if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1543                                                                {C1, C2})) {
1544                 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1545                 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1546                 return TLO.CombineTo(
1547                     Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1548               }
1549             }
1550           }
1551         }
1552       }
1553     }
1554 
1555     Known |= Known2;
1556     break;
1557   }
1558   case ISD::XOR: {
1559     SDValue Op0 = Op.getOperand(0);
1560     SDValue Op1 = Op.getOperand(1);
1561 
1562     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1563                              Depth + 1))
1564       return true;
1565     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1566                              Depth + 1))
1567       return true;
1568 
1569     // If all of the demanded bits are known zero on one side, return the other.
1570     // These bits cannot contribute to the result of the 'xor'.
1571     if (DemandedBits.isSubsetOf(Known.Zero))
1572       return TLO.CombineTo(Op, Op0);
1573     if (DemandedBits.isSubsetOf(Known2.Zero))
1574       return TLO.CombineTo(Op, Op1);
1575     // If the operation can be done in a smaller type, do so.
1576     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1577       return true;
1578 
1579     // If all of the unknown bits are known to be zero on one side or the other
1580     // turn this into an *inclusive* or.
1581     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1582     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1583       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1584 
1585     ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1586     if (C) {
1587       // If one side is a constant, and all of the set bits in the constant are
1588       // also known set on the other side, turn this into an AND, as we know
1589       // the bits will be cleared.
1590       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1591       // NB: it is okay if more bits are known than are requested
1592       if (C->getAPIntValue() == Known2.One) {
1593         SDValue ANDC =
1594             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1595         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1596       }
1597 
1598       // If the RHS is a constant, see if we can change it. Don't alter a -1
1599       // constant because that's a 'not' op, and that is better for combining
1600       // and codegen.
1601       if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1602         // We're flipping all demanded bits. Flip the undemanded bits too.
1603         SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1604         return TLO.CombineTo(Op, New);
1605       }
1606 
1607       unsigned Op0Opcode = Op0.getOpcode();
1608       if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1609         if (ConstantSDNode *ShiftC =
1610                 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1611           // Don't crash on an oversized shift. We can not guarantee that a
1612           // bogus shift has been simplified to undef.
1613           if (ShiftC->getAPIntValue().ult(BitWidth)) {
1614             uint64_t ShiftAmt = ShiftC->getZExtValue();
1615             APInt Ones = APInt::getAllOnes(BitWidth);
1616             Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1617                                          : Ones.lshr(ShiftAmt);
1618             if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1619                 isDesirableToCommuteXorWithShift(Op.getNode())) {
1620               // If the xor constant is a demanded mask, do a 'not' before the
1621               // shift:
1622               // xor (X << ShiftC), XorC --> (not X) << ShiftC
1623               // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1624               SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1625               return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1626                                                        Op0.getOperand(1)));
1627             }
1628           }
1629         }
1630       }
1631     }
1632 
1633     // If we can't turn this into a 'not', try to shrink the constant.
1634     if (!C || !C->isAllOnes())
1635       if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1636         return true;
1637 
1638     // Attempt to avoid multi-use ops if we don't need anything from them.
1639     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1640       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1641           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1642       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1643           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1644       if (DemandedOp0 || DemandedOp1) {
1645         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1646         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1647         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1648         return TLO.CombineTo(Op, NewOp);
1649       }
1650     }
1651 
1652     Known ^= Known2;
1653     break;
1654   }
1655   case ISD::SELECT:
1656     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1657                              Known, TLO, Depth + 1))
1658       return true;
1659     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1660                              Known2, TLO, Depth + 1))
1661       return true;
1662 
1663     // If the operands are constants, see if we can simplify them.
1664     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1665       return true;
1666 
1667     // Only known if known in both the LHS and RHS.
1668     Known = Known.intersectWith(Known2);
1669     break;
1670   case ISD::VSELECT:
1671     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1672                              Known, TLO, Depth + 1))
1673       return true;
1674     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1675                              Known2, TLO, Depth + 1))
1676       return true;
1677 
1678     // Only known if known in both the LHS and RHS.
1679     Known = Known.intersectWith(Known2);
1680     break;
1681   case ISD::SELECT_CC:
1682     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1683                              Known, TLO, Depth + 1))
1684       return true;
1685     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1686                              Known2, TLO, Depth + 1))
1687       return true;
1688 
1689     // If the operands are constants, see if we can simplify them.
1690     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1691       return true;
1692 
1693     // Only known if known in both the LHS and RHS.
1694     Known = Known.intersectWith(Known2);
1695     break;
1696   case ISD::SETCC: {
1697     SDValue Op0 = Op.getOperand(0);
1698     SDValue Op1 = Op.getOperand(1);
1699     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1700     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1701     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1702     // -1, we may be able to bypass the setcc.
1703     if (DemandedBits.isSignMask() &&
1704         Op0.getScalarValueSizeInBits() == BitWidth &&
1705         getBooleanContents(Op0.getValueType()) ==
1706             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1707       // If we're testing X < 0, then this compare isn't needed - just use X!
1708       // FIXME: We're limiting to integer types here, but this should also work
1709       // if we don't care about FP signed-zero. The use of SETLT with FP means
1710       // that we don't care about NaNs.
1711       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1712           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1713         return TLO.CombineTo(Op, Op0);
1714 
1715       // TODO: Should we check for other forms of sign-bit comparisons?
1716       // Examples: X <= -1, X >= 0
1717     }
1718     if (getBooleanContents(Op0.getValueType()) ==
1719             TargetLowering::ZeroOrOneBooleanContent &&
1720         BitWidth > 1)
1721       Known.Zero.setBitsFrom(1);
1722     break;
1723   }
1724   case ISD::SHL: {
1725     SDValue Op0 = Op.getOperand(0);
1726     SDValue Op1 = Op.getOperand(1);
1727     EVT ShiftVT = Op1.getValueType();
1728 
1729     if (std::optional<uint64_t> KnownSA =
1730             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1731       unsigned ShAmt = *KnownSA;
1732       if (ShAmt == 0)
1733         return TLO.CombineTo(Op, Op0);
1734 
1735       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1736       // single shift.  We can do this if the bottom bits (which are shifted
1737       // out) are never demanded.
1738       // TODO - support non-uniform vector amounts.
1739       if (Op0.getOpcode() == ISD::SRL) {
1740         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1741           if (std::optional<uint64_t> InnerSA =
1742                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1743             unsigned C1 = *InnerSA;
1744             unsigned Opc = ISD::SHL;
1745             int Diff = ShAmt - C1;
1746             if (Diff < 0) {
1747               Diff = -Diff;
1748               Opc = ISD::SRL;
1749             }
1750             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1751             return TLO.CombineTo(
1752                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1753           }
1754         }
1755       }
1756 
1757       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1758       // are not demanded. This will likely allow the anyext to be folded away.
1759       // TODO - support non-uniform vector amounts.
1760       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1761         SDValue InnerOp = Op0.getOperand(0);
1762         EVT InnerVT = InnerOp.getValueType();
1763         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1764         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1765             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1766           SDValue NarrowShl = TLO.DAG.getNode(
1767               ISD::SHL, dl, InnerVT, InnerOp,
1768               TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1769           return TLO.CombineTo(
1770               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1771         }
1772 
1773         // Repeat the SHL optimization above in cases where an extension
1774         // intervenes: (shl (anyext (shr x, c1)), c2) to
1775         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1776         // aren't demanded (as above) and that the shifted upper c1 bits of
1777         // x aren't demanded.
1778         // TODO - support non-uniform vector amounts.
1779         if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1780             InnerOp.hasOneUse()) {
1781           if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1782                   InnerOp, DemandedElts, Depth + 2)) {
1783             unsigned InnerShAmt = *SA2;
1784             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1785                 DemandedBits.getActiveBits() <=
1786                     (InnerBits - InnerShAmt + ShAmt) &&
1787                 DemandedBits.countr_zero() >= ShAmt) {
1788               SDValue NewSA =
1789                   TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1790               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1791                                                InnerOp.getOperand(0));
1792               return TLO.CombineTo(
1793                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1794             }
1795           }
1796         }
1797       }
1798 
1799       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1800       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1801                                Depth + 1)) {
1802         // Disable the nsw and nuw flags. We can no longer guarantee that we
1803         // won't wrap after simplification.
1804         Op->dropFlags(SDNodeFlags::NoWrap);
1805         return true;
1806       }
1807       Known.Zero <<= ShAmt;
1808       Known.One <<= ShAmt;
1809       // low bits known zero.
1810       Known.Zero.setLowBits(ShAmt);
1811 
1812       // Attempt to avoid multi-use ops if we don't need anything from them.
1813       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1814         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1815             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1816         if (DemandedOp0) {
1817           SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1818           return TLO.CombineTo(Op, NewOp);
1819         }
1820       }
1821 
1822       // TODO: Can we merge this fold with the one below?
1823       // Try shrinking the operation as long as the shift amount will still be
1824       // in range.
1825       if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1826           Op.getNode()->hasOneUse()) {
1827         // Search for the smallest integer type with free casts to and from
1828         // Op's type. For expedience, just check power-of-2 integer types.
1829         unsigned DemandedSize = DemandedBits.getActiveBits();
1830         for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1831              SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1832           EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1833           if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1834               isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1835               isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1836               (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1837             assert(DemandedSize <= SmallVTBits &&
1838                    "Narrowed below demanded bits?");
1839             // We found a type with free casts.
1840             SDValue NarrowShl = TLO.DAG.getNode(
1841                 ISD::SHL, dl, SmallVT,
1842                 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1843                 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1844             return TLO.CombineTo(
1845                 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1846           }
1847         }
1848       }
1849 
1850       // Narrow shift to lower half - similar to ShrinkDemandedOp.
1851       // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1852       // Only do this if we demand the upper half so the knownbits are correct.
1853       unsigned HalfWidth = BitWidth / 2;
1854       if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1855           DemandedBits.countLeadingOnes() >= HalfWidth) {
1856         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1857         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1858             isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1859             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1860             (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1861           // If we're demanding the upper bits at all, we must ensure
1862           // that the upper bits of the shift result are known to be zero,
1863           // which is equivalent to the narrow shift being NUW.
1864           if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1865             bool IsNSW = Known.countMinSignBits() > HalfWidth;
1866             SDNodeFlags Flags;
1867             Flags.setNoSignedWrap(IsNSW);
1868             Flags.setNoUnsignedWrap(IsNUW);
1869             SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1870             SDValue NewShiftAmt =
1871                 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1872             SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1873                                                NewShiftAmt, Flags);
1874             SDValue NewExt =
1875                 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1876             return TLO.CombineTo(Op, NewExt);
1877           }
1878         }
1879       }
1880     } else {
1881       // This is a variable shift, so we can't shift the demand mask by a known
1882       // amount. But if we are not demanding high bits, then we are not
1883       // demanding those bits from the pre-shifted operand either.
1884       if (unsigned CTLZ = DemandedBits.countl_zero()) {
1885         APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1886         if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1887                                  Depth + 1)) {
1888           // Disable the nsw and nuw flags. We can no longer guarantee that we
1889           // won't wrap after simplification.
1890           Op->dropFlags(SDNodeFlags::NoWrap);
1891           return true;
1892         }
1893         Known.resetAll();
1894       }
1895     }
1896 
1897     // If we are only demanding sign bits then we can use the shift source
1898     // directly.
1899     if (std::optional<uint64_t> MaxSA =
1900             TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1901       unsigned ShAmt = *MaxSA;
1902       unsigned NumSignBits =
1903           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1904       unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1905       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1906         return TLO.CombineTo(Op, Op0);
1907     }
1908     break;
1909   }
1910   case ISD::SRL: {
1911     SDValue Op0 = Op.getOperand(0);
1912     SDValue Op1 = Op.getOperand(1);
1913     EVT ShiftVT = Op1.getValueType();
1914 
1915     if (std::optional<uint64_t> KnownSA =
1916             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1917       unsigned ShAmt = *KnownSA;
1918       if (ShAmt == 0)
1919         return TLO.CombineTo(Op, Op0);
1920 
1921       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1922       // single shift.  We can do this if the top bits (which are shifted out)
1923       // are never demanded.
1924       // TODO - support non-uniform vector amounts.
1925       if (Op0.getOpcode() == ISD::SHL) {
1926         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1927           if (std::optional<uint64_t> InnerSA =
1928                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1929             unsigned C1 = *InnerSA;
1930             unsigned Opc = ISD::SRL;
1931             int Diff = ShAmt - C1;
1932             if (Diff < 0) {
1933               Diff = -Diff;
1934               Opc = ISD::SHL;
1935             }
1936             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1937             return TLO.CombineTo(
1938                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1939           }
1940         }
1941       }
1942 
1943       // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1944       // single sra. We can do this if the top bits are never demanded.
1945       if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1946         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1947           if (std::optional<uint64_t> InnerSA =
1948                   TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1949             unsigned C1 = *InnerSA;
1950             // Clamp the combined shift amount if it exceeds the bit width.
1951             unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
1952             SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
1953             return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
1954                                                      Op0.getOperand(0), NewSA));
1955           }
1956         }
1957       }
1958 
1959       APInt InDemandedMask = (DemandedBits << ShAmt);
1960 
1961       // If the shift is exact, then it does demand the low bits (and knows that
1962       // they are zero).
1963       if (Op->getFlags().hasExact())
1964         InDemandedMask.setLowBits(ShAmt);
1965 
1966       // Narrow shift to lower half - similar to ShrinkDemandedOp.
1967       // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1968       if ((BitWidth % 2) == 0 && !VT.isVector()) {
1969         APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2);
1970         EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1971         if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1972             isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1973             isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1974             (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1975             ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1976              TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1977           SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1978           SDValue NewShiftAmt =
1979               TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1980           SDValue NewShift =
1981               TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1982           return TLO.CombineTo(
1983               Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1984         }
1985       }
1986 
1987       // Compute the new bits that are at the top now.
1988       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1989                                Depth + 1))
1990         return true;
1991       Known.Zero.lshrInPlace(ShAmt);
1992       Known.One.lshrInPlace(ShAmt);
1993       // High bits known zero.
1994       Known.Zero.setHighBits(ShAmt);
1995 
1996       // Attempt to avoid multi-use ops if we don't need anything from them.
1997       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1998         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1999             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2000         if (DemandedOp0) {
2001           SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2002           return TLO.CombineTo(Op, NewOp);
2003         }
2004       }
2005     } else {
2006       // Use generic knownbits computation as it has support for non-uniform
2007       // shift amounts.
2008       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2009     }
2010 
2011     // Try to match AVG patterns (after shift simplification).
2012     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2013                                         DemandedElts, Depth + 1))
2014       return TLO.CombineTo(Op, AVG);
2015 
2016     break;
2017   }
2018   case ISD::SRA: {
2019     SDValue Op0 = Op.getOperand(0);
2020     SDValue Op1 = Op.getOperand(1);
2021     EVT ShiftVT = Op1.getValueType();
2022 
2023     // If we only want bits that already match the signbit then we don't need
2024     // to shift.
2025     unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2026     if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2027         NumHiDemandedBits)
2028       return TLO.CombineTo(Op, Op0);
2029 
2030     // If this is an arithmetic shift right and only the low-bit is set, we can
2031     // always convert this into a logical shr, even if the shift amount is
2032     // variable.  The low bit of the shift cannot be an input sign bit unless
2033     // the shift amount is >= the size of the datatype, which is undefined.
2034     if (DemandedBits.isOne())
2035       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2036 
2037     if (std::optional<uint64_t> KnownSA =
2038             TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2039       unsigned ShAmt = *KnownSA;
2040       if (ShAmt == 0)
2041         return TLO.CombineTo(Op, Op0);
2042 
2043       // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2044       // supports sext_inreg.
2045       if (Op0.getOpcode() == ISD::SHL) {
2046         if (std::optional<uint64_t> InnerSA =
2047                 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2048           unsigned LowBits = BitWidth - ShAmt;
2049           EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2050           if (VT.isVector())
2051             ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2052                                      VT.getVectorElementCount());
2053 
2054           if (*InnerSA == ShAmt) {
2055             if (!TLO.LegalOperations() ||
2056                 getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
2057               return TLO.CombineTo(
2058                   Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2059                                       Op0.getOperand(0),
2060                                       TLO.DAG.getValueType(ExtVT)));
2061 
2062             // Even if we can't convert to sext_inreg, we might be able to
2063             // remove this shift pair if the input is already sign extended.
2064             unsigned NumSignBits =
2065                 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2066             if (NumSignBits > ShAmt)
2067               return TLO.CombineTo(Op, Op0.getOperand(0));
2068           }
2069         }
2070       }
2071 
2072       APInt InDemandedMask = (DemandedBits << ShAmt);
2073 
2074       // If the shift is exact, then it does demand the low bits (and knows that
2075       // they are zero).
2076       if (Op->getFlags().hasExact())
2077         InDemandedMask.setLowBits(ShAmt);
2078 
2079       // If any of the demanded bits are produced by the sign extension, we also
2080       // demand the input sign bit.
2081       if (DemandedBits.countl_zero() < ShAmt)
2082         InDemandedMask.setSignBit();
2083 
2084       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2085                                Depth + 1))
2086         return true;
2087       Known.Zero.lshrInPlace(ShAmt);
2088       Known.One.lshrInPlace(ShAmt);
2089 
2090       // If the input sign bit is known to be zero, or if none of the top bits
2091       // are demanded, turn this into an unsigned shift right.
2092       if (Known.Zero[BitWidth - ShAmt - 1] ||
2093           DemandedBits.countl_zero() >= ShAmt) {
2094         SDNodeFlags Flags;
2095         Flags.setExact(Op->getFlags().hasExact());
2096         return TLO.CombineTo(
2097             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2098       }
2099 
2100       int Log2 = DemandedBits.exactLogBase2();
2101       if (Log2 >= 0) {
2102         // The bit must come from the sign.
2103         SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2104         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2105       }
2106 
2107       if (Known.One[BitWidth - ShAmt - 1])
2108         // New bits are known one.
2109         Known.One.setHighBits(ShAmt);
2110 
2111       // Attempt to avoid multi-use ops if we don't need anything from them.
2112       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2113         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2114             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2115         if (DemandedOp0) {
2116           SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2117           return TLO.CombineTo(Op, NewOp);
2118         }
2119       }
2120     }
2121 
2122     // Try to match AVG patterns (after shift simplification).
2123     if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2124                                         DemandedElts, Depth + 1))
2125       return TLO.CombineTo(Op, AVG);
2126 
2127     break;
2128   }
2129   case ISD::FSHL:
2130   case ISD::FSHR: {
2131     SDValue Op0 = Op.getOperand(0);
2132     SDValue Op1 = Op.getOperand(1);
2133     SDValue Op2 = Op.getOperand(2);
2134     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2135 
2136     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2137       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2138 
2139       // For fshl, 0-shift returns the 1st arg.
2140       // For fshr, 0-shift returns the 2nd arg.
2141       if (Amt == 0) {
2142         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2143                                  Known, TLO, Depth + 1))
2144           return true;
2145         break;
2146       }
2147 
2148       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2149       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2150       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2151       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2152       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2153                                Depth + 1))
2154         return true;
2155       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2156                                Depth + 1))
2157         return true;
2158 
2159       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2160       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2161       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2162       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2163       Known = Known.unionWith(Known2);
2164 
2165       // Attempt to avoid multi-use ops if we don't need anything from them.
2166       if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2167           !DemandedElts.isAllOnes()) {
2168         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2169             Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2170         SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2171             Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2172         if (DemandedOp0 || DemandedOp1) {
2173           DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2174           DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2175           SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2176                                           DemandedOp1, Op2);
2177           return TLO.CombineTo(Op, NewOp);
2178         }
2179       }
2180     }
2181 
2182     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2183     if (isPowerOf2_32(BitWidth)) {
2184       APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2185       if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2186                                Known2, TLO, Depth + 1))
2187         return true;
2188     }
2189     break;
2190   }
2191   case ISD::ROTL:
2192   case ISD::ROTR: {
2193     SDValue Op0 = Op.getOperand(0);
2194     SDValue Op1 = Op.getOperand(1);
2195     bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2196 
2197     // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2198     if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2199       return TLO.CombineTo(Op, Op0);
2200 
2201     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2202       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2203       unsigned RevAmt = BitWidth - Amt;
2204 
2205       // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2206       // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2207       APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2208       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2209                                Depth + 1))
2210         return true;
2211 
2212       // rot*(x, 0) --> x
2213       if (Amt == 0)
2214         return TLO.CombineTo(Op, Op0);
2215 
2216       // See if we don't demand either half of the rotated bits.
2217       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2218           DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2219         Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2220         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2221       }
2222       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2223           DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2224         Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2225         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2226       }
2227     }
2228 
2229     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2230     if (isPowerOf2_32(BitWidth)) {
2231       APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2232       if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2233                                Depth + 1))
2234         return true;
2235     }
2236     break;
2237   }
2238   case ISD::SMIN:
2239   case ISD::SMAX:
2240   case ISD::UMIN:
2241   case ISD::UMAX: {
2242     unsigned Opc = Op.getOpcode();
2243     SDValue Op0 = Op.getOperand(0);
2244     SDValue Op1 = Op.getOperand(1);
2245 
2246     // If we're only demanding signbits, then we can simplify to OR/AND node.
2247     unsigned BitOp =
2248         (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2249     unsigned NumSignBits =
2250         std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2251                  TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2252     unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2253     if (NumSignBits >= NumDemandedUpperBits)
2254       return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2255 
2256     // Check if one arg is always less/greater than (or equal) to the other arg.
2257     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2258     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2259     switch (Opc) {
2260     case ISD::SMIN:
2261       if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2262         return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2263       if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2264         return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2265       Known = KnownBits::smin(Known0, Known1);
2266       break;
2267     case ISD::SMAX:
2268       if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2269         return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2270       if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2271         return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2272       Known = KnownBits::smax(Known0, Known1);
2273       break;
2274     case ISD::UMIN:
2275       if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2276         return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2277       if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2278         return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2279       Known = KnownBits::umin(Known0, Known1);
2280       break;
2281     case ISD::UMAX:
2282       if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2283         return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2284       if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2285         return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2286       Known = KnownBits::umax(Known0, Known1);
2287       break;
2288     }
2289     break;
2290   }
2291   case ISD::BITREVERSE: {
2292     SDValue Src = Op.getOperand(0);
2293     APInt DemandedSrcBits = DemandedBits.reverseBits();
2294     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2295                              Depth + 1))
2296       return true;
2297     Known.One = Known2.One.reverseBits();
2298     Known.Zero = Known2.Zero.reverseBits();
2299     break;
2300   }
2301   case ISD::BSWAP: {
2302     SDValue Src = Op.getOperand(0);
2303 
2304     // If the only bits demanded come from one byte of the bswap result,
2305     // just shift the input byte into position to eliminate the bswap.
2306     unsigned NLZ = DemandedBits.countl_zero();
2307     unsigned NTZ = DemandedBits.countr_zero();
2308 
2309     // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
2310     // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
2311     // have 14 leading zeros, round to 8.
2312     NLZ = alignDown(NLZ, 8);
2313     NTZ = alignDown(NTZ, 8);
2314     // If we need exactly one byte, we can do this transformation.
2315     if (BitWidth - NLZ - NTZ == 8) {
2316       // Replace this with either a left or right shift to get the byte into
2317       // the right place.
2318       unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2319       if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2320         unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2321         SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2322         SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2323         return TLO.CombineTo(Op, NewOp);
2324       }
2325     }
2326 
2327     APInt DemandedSrcBits = DemandedBits.byteSwap();
2328     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2329                              Depth + 1))
2330       return true;
2331     Known.One = Known2.One.byteSwap();
2332     Known.Zero = Known2.Zero.byteSwap();
2333     break;
2334   }
2335   case ISD::CTPOP: {
2336     // If only 1 bit is demanded, replace with PARITY as long as we're before
2337     // op legalization.
2338     // FIXME: Limit to scalars for now.
2339     if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2340       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2341                                                Op.getOperand(0)));
2342 
2343     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2344     break;
2345   }
2346   case ISD::SIGN_EXTEND_INREG: {
2347     SDValue Op0 = Op.getOperand(0);
2348     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2349     unsigned ExVTBits = ExVT.getScalarSizeInBits();
2350 
2351     // If we only care about the highest bit, don't bother shifting right.
2352     if (DemandedBits.isSignMask()) {
2353       unsigned MinSignedBits =
2354           TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2355       bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2356       // However if the input is already sign extended we expect the sign
2357       // extension to be dropped altogether later and do not simplify.
2358       if (!AlreadySignExtended) {
2359         // Compute the correct shift amount type, which must be getShiftAmountTy
2360         // for scalar types after legalization.
2361         SDValue ShiftAmt =
2362             TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2363         return TLO.CombineTo(Op,
2364                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2365       }
2366     }
2367 
2368     // If none of the extended bits are demanded, eliminate the sextinreg.
2369     if (DemandedBits.getActiveBits() <= ExVTBits)
2370       return TLO.CombineTo(Op, Op0);
2371 
2372     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2373 
2374     // Since the sign extended bits are demanded, we know that the sign
2375     // bit is demanded.
2376     InputDemandedBits.setBit(ExVTBits - 1);
2377 
2378     if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2379                              Depth + 1))
2380       return true;
2381 
2382     // If the sign bit of the input is known set or clear, then we know the
2383     // top bits of the result.
2384 
2385     // If the input sign bit is known zero, convert this into a zero extension.
2386     if (Known.Zero[ExVTBits - 1])
2387       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2388 
2389     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2390     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2391       Known.One.setBitsFrom(ExVTBits);
2392       Known.Zero &= Mask;
2393     } else { // Input sign bit unknown
2394       Known.Zero &= Mask;
2395       Known.One &= Mask;
2396     }
2397     break;
2398   }
2399   case ISD::BUILD_PAIR: {
2400     EVT HalfVT = Op.getOperand(0).getValueType();
2401     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2402 
2403     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2404     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2405 
2406     KnownBits KnownLo, KnownHi;
2407 
2408     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2409       return true;
2410 
2411     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2412       return true;
2413 
2414     Known = KnownHi.concat(KnownLo);
2415     break;
2416   }
2417   case ISD::ZERO_EXTEND_VECTOR_INREG:
2418     if (VT.isScalableVector())
2419       return false;
2420     [[fallthrough]];
2421   case ISD::ZERO_EXTEND: {
2422     SDValue Src = Op.getOperand(0);
2423     EVT SrcVT = Src.getValueType();
2424     unsigned InBits = SrcVT.getScalarSizeInBits();
2425     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2426     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2427 
2428     // If none of the top bits are demanded, convert this into an any_extend.
2429     if (DemandedBits.getActiveBits() <= InBits) {
2430       // If we only need the non-extended bits of the bottom element
2431       // then we can just bitcast to the result.
2432       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2433           VT.getSizeInBits() == SrcVT.getSizeInBits())
2434         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2435 
2436       unsigned Opc =
2437           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2438       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2439         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2440     }
2441 
2442     APInt InDemandedBits = DemandedBits.trunc(InBits);
2443     APInt InDemandedElts = DemandedElts.zext(InElts);
2444     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2445                              Depth + 1)) {
2446       Op->dropFlags(SDNodeFlags::NonNeg);
2447       return true;
2448     }
2449     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2450     Known = Known.zext(BitWidth);
2451 
2452     // Attempt to avoid multi-use ops if we don't need anything from them.
2453     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2454             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2455       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2456     break;
2457   }
2458   case ISD::SIGN_EXTEND_VECTOR_INREG:
2459     if (VT.isScalableVector())
2460       return false;
2461     [[fallthrough]];
2462   case ISD::SIGN_EXTEND: {
2463     SDValue Src = Op.getOperand(0);
2464     EVT SrcVT = Src.getValueType();
2465     unsigned InBits = SrcVT.getScalarSizeInBits();
2466     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2467     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2468 
2469     APInt InDemandedElts = DemandedElts.zext(InElts);
2470     APInt InDemandedBits = DemandedBits.trunc(InBits);
2471 
2472     // Since some of the sign extended bits are demanded, we know that the sign
2473     // bit is demanded.
2474     InDemandedBits.setBit(InBits - 1);
2475 
2476     // If none of the top bits are demanded, convert this into an any_extend.
2477     if (DemandedBits.getActiveBits() <= InBits) {
2478       // If we only need the non-extended bits of the bottom element
2479       // then we can just bitcast to the result.
2480       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2481           VT.getSizeInBits() == SrcVT.getSizeInBits())
2482         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2483 
2484       // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2485       if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
2486           TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2487               InBits) {
2488         unsigned Opc =
2489             IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2490         if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2491           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2492       }
2493     }
2494 
2495     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2496                              Depth + 1))
2497       return true;
2498     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2499 
2500     // If the sign bit is known one, the top bits match.
2501     Known = Known.sext(BitWidth);
2502 
2503     // If the sign bit is known zero, convert this to a zero extend.
2504     if (Known.isNonNegative()) {
2505       unsigned Opc =
2506           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2507       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2508         SDNodeFlags Flags;
2509         if (!IsVecInReg)
2510           Flags |= SDNodeFlags::NonNeg;
2511         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2512       }
2513     }
2514 
2515     // Attempt to avoid multi-use ops if we don't need anything from them.
2516     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2517             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2518       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2519     break;
2520   }
2521   case ISD::ANY_EXTEND_VECTOR_INREG:
2522     if (VT.isScalableVector())
2523       return false;
2524     [[fallthrough]];
2525   case ISD::ANY_EXTEND: {
2526     SDValue Src = Op.getOperand(0);
2527     EVT SrcVT = Src.getValueType();
2528     unsigned InBits = SrcVT.getScalarSizeInBits();
2529     unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2530     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2531 
2532     // If we only need the bottom element then we can just bitcast.
2533     // TODO: Handle ANY_EXTEND?
2534     if (IsLE && IsVecInReg && DemandedElts == 1 &&
2535         VT.getSizeInBits() == SrcVT.getSizeInBits())
2536       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2537 
2538     APInt InDemandedBits = DemandedBits.trunc(InBits);
2539     APInt InDemandedElts = DemandedElts.zext(InElts);
2540     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2541                              Depth + 1))
2542       return true;
2543     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2544     Known = Known.anyext(BitWidth);
2545 
2546     // Attempt to avoid multi-use ops if we don't need anything from them.
2547     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2548             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2549       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2550     break;
2551   }
2552   case ISD::TRUNCATE: {
2553     SDValue Src = Op.getOperand(0);
2554 
2555     // Simplify the input, using demanded bit information, and compute the known
2556     // zero/one bits live out.
2557     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2558     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2559     if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2560                              Depth + 1))
2561       return true;
2562     Known = Known.trunc(BitWidth);
2563 
2564     // Attempt to avoid multi-use ops if we don't need anything from them.
2565     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2566             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2567       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2568 
2569     // If the input is only used by this truncate, see if we can shrink it based
2570     // on the known demanded bits.
2571     switch (Src.getOpcode()) {
2572     default:
2573       break;
2574     case ISD::SRL:
2575       // Shrink SRL by a constant if none of the high bits shifted in are
2576       // demanded.
2577       if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2578         // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2579         // undesirable.
2580         break;
2581 
2582       if (Src.getNode()->hasOneUse()) {
2583         if (isTruncateFree(Src, VT) &&
2584             !isTruncateFree(Src.getValueType(), VT)) {
2585           // If truncate is only free at trunc(srl), do not turn it into
2586           // srl(trunc). The check is done by first check the truncate is free
2587           // at Src's opcode(srl), then check the truncate is not done by
2588           // referencing sub-register. In test, if both trunc(srl) and
2589           // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2590           // trunc(srl)'s trunc is free, trunc(srl) is better.
2591           break;
2592         }
2593 
2594         std::optional<uint64_t> ShAmtC =
2595             TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2596         if (!ShAmtC || *ShAmtC >= BitWidth)
2597           break;
2598         uint64_t ShVal = *ShAmtC;
2599 
2600         APInt HighBits =
2601             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2602         HighBits.lshrInPlace(ShVal);
2603         HighBits = HighBits.trunc(BitWidth);
2604         if (!(HighBits & DemandedBits)) {
2605           // None of the shifted in bits are needed.  Add a truncate of the
2606           // shift input, then shift it.
2607           SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2608           SDValue NewTrunc =
2609               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2610           return TLO.CombineTo(
2611               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2612         }
2613       }
2614       break;
2615     }
2616 
2617     break;
2618   }
2619   case ISD::AssertZext: {
2620     // AssertZext demands all of the high bits, plus any of the low bits
2621     // demanded by its users.
2622     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2623     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2624     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2625                              TLO, Depth + 1))
2626       return true;
2627 
2628     Known.Zero |= ~InMask;
2629     Known.One &= (~Known.Zero);
2630     break;
2631   }
2632   case ISD::EXTRACT_VECTOR_ELT: {
2633     SDValue Src = Op.getOperand(0);
2634     SDValue Idx = Op.getOperand(1);
2635     ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2636     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2637 
2638     if (SrcEltCnt.isScalable())
2639       return false;
2640 
2641     // Demand the bits from every vector element without a constant index.
2642     unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2643     APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2644     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2645       if (CIdx->getAPIntValue().ult(NumSrcElts))
2646         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2647 
2648     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2649     // anything about the extended bits.
2650     APInt DemandedSrcBits = DemandedBits;
2651     if (BitWidth > EltBitWidth)
2652       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2653 
2654     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2655                              Depth + 1))
2656       return true;
2657 
2658     // Attempt to avoid multi-use ops if we don't need anything from them.
2659     if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2660       if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2661               Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2662         SDValue NewOp =
2663             TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2664         return TLO.CombineTo(Op, NewOp);
2665       }
2666     }
2667 
2668     Known = Known2;
2669     if (BitWidth > EltBitWidth)
2670       Known = Known.anyext(BitWidth);
2671     break;
2672   }
2673   case ISD::BITCAST: {
2674     if (VT.isScalableVector())
2675       return false;
2676     SDValue Src = Op.getOperand(0);
2677     EVT SrcVT = Src.getValueType();
2678     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2679 
2680     // If this is an FP->Int bitcast and if the sign bit is the only
2681     // thing demanded, turn this into a FGETSIGN.
2682     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2683         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2684         SrcVT.isFloatingPoint()) {
2685       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2686       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2687       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2688           SrcVT != MVT::f128) {
2689         // Cannot eliminate/lower SHL for f128 yet.
2690         EVT Ty = OpVTLegal ? VT : MVT::i32;
2691         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2692         // place.  We expect the SHL to be eliminated by other optimizations.
2693         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2694         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2695         if (!OpVTLegal && OpVTSizeInBits > 32)
2696           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2697         unsigned ShVal = Op.getValueSizeInBits() - 1;
2698         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2699         return TLO.CombineTo(Op,
2700                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2701       }
2702     }
2703 
2704     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2705     // Demand the elt/bit if any of the original elts/bits are demanded.
2706     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2707       unsigned Scale = BitWidth / NumSrcEltBits;
2708       unsigned NumSrcElts = SrcVT.getVectorNumElements();
2709       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2710       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2711       for (unsigned i = 0; i != Scale; ++i) {
2712         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2713         unsigned BitOffset = EltOffset * NumSrcEltBits;
2714         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2715         if (!Sub.isZero()) {
2716           DemandedSrcBits |= Sub;
2717           for (unsigned j = 0; j != NumElts; ++j)
2718             if (DemandedElts[j])
2719               DemandedSrcElts.setBit((j * Scale) + i);
2720         }
2721       }
2722 
2723       APInt KnownSrcUndef, KnownSrcZero;
2724       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2725                                      KnownSrcZero, TLO, Depth + 1))
2726         return true;
2727 
2728       KnownBits KnownSrcBits;
2729       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2730                                KnownSrcBits, TLO, Depth + 1))
2731         return true;
2732     } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2733       // TODO - bigendian once we have test coverage.
2734       unsigned Scale = NumSrcEltBits / BitWidth;
2735       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2736       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2737       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2738       for (unsigned i = 0; i != NumElts; ++i)
2739         if (DemandedElts[i]) {
2740           unsigned Offset = (i % Scale) * BitWidth;
2741           DemandedSrcBits.insertBits(DemandedBits, Offset);
2742           DemandedSrcElts.setBit(i / Scale);
2743         }
2744 
2745       if (SrcVT.isVector()) {
2746         APInt KnownSrcUndef, KnownSrcZero;
2747         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2748                                        KnownSrcZero, TLO, Depth + 1))
2749           return true;
2750       }
2751 
2752       KnownBits KnownSrcBits;
2753       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2754                                KnownSrcBits, TLO, Depth + 1))
2755         return true;
2756 
2757       // Attempt to avoid multi-use ops if we don't need anything from them.
2758       if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2759         if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2760                 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2761           SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2762           return TLO.CombineTo(Op, NewOp);
2763         }
2764       }
2765     }
2766 
2767     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2768     // recursive call where Known may be useful to the caller.
2769     if (Depth > 0) {
2770       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2771       return false;
2772     }
2773     break;
2774   }
2775   case ISD::MUL:
2776     if (DemandedBits.isPowerOf2()) {
2777       // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2778       // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2779       // odd (has LSB set), then the left-shifted low bit of X is the answer.
2780       unsigned CTZ = DemandedBits.countr_zero();
2781       ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2782       if (C && C->getAPIntValue().countr_zero() == CTZ) {
2783         SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2784         SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2785         return TLO.CombineTo(Op, Shl);
2786       }
2787     }
2788     // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2789     // X * X is odd iff X is odd.
2790     // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2791     if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2792       SDValue One = TLO.DAG.getConstant(1, dl, VT);
2793       SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2794       return TLO.CombineTo(Op, And1);
2795     }
2796     [[fallthrough]];
2797   case ISD::ADD:
2798   case ISD::SUB: {
2799     // Add, Sub, and Mul don't demand any bits in positions beyond that
2800     // of the highest bit demanded of them.
2801     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2802     SDNodeFlags Flags = Op.getNode()->getFlags();
2803     unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2804     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2805     KnownBits KnownOp0, KnownOp1;
2806     auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2807                                       const KnownBits &KnownRHS) {
2808       if (Op.getOpcode() == ISD::MUL)
2809         Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2810       return Demanded;
2811     };
2812     if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2813                              Depth + 1) ||
2814         SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2815                              DemandedElts, KnownOp0, TLO, Depth + 1) ||
2816         // See if the operation should be performed at a smaller bit width.
2817         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2818       // Disable the nsw and nuw flags. We can no longer guarantee that we
2819       // won't wrap after simplification.
2820       Op->dropFlags(SDNodeFlags::NoWrap);
2821       return true;
2822     }
2823 
2824     // neg x with only low bit demanded is simply x.
2825     if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2826         isNullConstant(Op0))
2827       return TLO.CombineTo(Op, Op1);
2828 
2829     // Attempt to avoid multi-use ops if we don't need anything from them.
2830     if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2831       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2832           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2833       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2834           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2835       if (DemandedOp0 || DemandedOp1) {
2836         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2837         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2838         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2839                                         Flags & ~SDNodeFlags::NoWrap);
2840         return TLO.CombineTo(Op, NewOp);
2841       }
2842     }
2843 
2844     // If we have a constant operand, we may be able to turn it into -1 if we
2845     // do not demand the high bits. This can make the constant smaller to
2846     // encode, allow more general folding, or match specialized instruction
2847     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2848     // is probably not useful (and could be detrimental).
2849     ConstantSDNode *C = isConstOrConstSplat(Op1);
2850     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2851     if (C && !C->isAllOnes() && !C->isOne() &&
2852         (C->getAPIntValue() | HighMask).isAllOnes()) {
2853       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2854       // Disable the nsw and nuw flags. We can no longer guarantee that we
2855       // won't wrap after simplification.
2856       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2857                                       Flags & ~SDNodeFlags::NoWrap);
2858       return TLO.CombineTo(Op, NewOp);
2859     }
2860 
2861     // Match a multiply with a disguised negated-power-of-2 and convert to a
2862     // an equivalent shift-left amount.
2863     // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2864     auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2865       if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2866         return 0;
2867 
2868       // Don't touch opaque constants. Also, ignore zero and power-of-2
2869       // multiplies. Those will get folded later.
2870       ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2871       if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2872           !MulC->getAPIntValue().isPowerOf2()) {
2873         APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2874         if (UnmaskedC.isNegatedPowerOf2())
2875           return (-UnmaskedC).logBase2();
2876       }
2877       return 0;
2878     };
2879 
2880     auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2881                        unsigned ShlAmt) {
2882       SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2883       SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2884       SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2885       return TLO.CombineTo(Op, Res);
2886     };
2887 
2888     if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2889       if (Op.getOpcode() == ISD::ADD) {
2890         // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2891         if (unsigned ShAmt = getShiftLeftAmt(Op0))
2892           return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2893         // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2894         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2895           return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2896       }
2897       if (Op.getOpcode() == ISD::SUB) {
2898         // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2899         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2900           return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2901       }
2902     }
2903 
2904     if (Op.getOpcode() == ISD::MUL) {
2905       Known = KnownBits::mul(KnownOp0, KnownOp1);
2906     } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2907       Known = KnownBits::computeForAddSub(
2908           Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2909           Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2910     }
2911     break;
2912   }
2913   default:
2914     // We also ask the target about intrinsics (which could be specific to it).
2915     if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2916         Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2917       // TODO: Probably okay to remove after audit; here to reduce change size
2918       // in initial enablement patch for scalable vectors
2919       if (Op.getValueType().isScalableVector())
2920         break;
2921       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2922                                             Known, TLO, Depth))
2923         return true;
2924       break;
2925     }
2926 
2927     // Just use computeKnownBits to compute output bits.
2928     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2929     break;
2930   }
2931 
2932   // If we know the value of all of the demanded bits, return this as a
2933   // constant.
2934   if (!isTargetCanonicalConstantNode(Op) &&
2935       DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2936     // Avoid folding to a constant if any OpaqueConstant is involved.
2937     const SDNode *N = Op.getNode();
2938     for (SDNode *Op :
2939          llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
2940       if (auto *C = dyn_cast<ConstantSDNode>(Op))
2941         if (C->isOpaque())
2942           return false;
2943     }
2944     if (VT.isInteger())
2945       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2946     if (VT.isFloatingPoint())
2947       return TLO.CombineTo(
2948           Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
2949                                     dl, VT));
2950   }
2951 
2952   // A multi use 'all demanded elts' simplify failed to find any knownbits.
2953   // Try again just for the original demanded elts.
2954   // Ensure we do this AFTER constant folding above.
2955   if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2956     Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2957 
2958   return false;
2959 }
2960 
2961 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2962                                                 const APInt &DemandedElts,
2963                                                 DAGCombinerInfo &DCI) const {
2964   SelectionDAG &DAG = DCI.DAG;
2965   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2966                         !DCI.isBeforeLegalizeOps());
2967 
2968   APInt KnownUndef, KnownZero;
2969   bool Simplified =
2970       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2971   if (Simplified) {
2972     DCI.AddToWorklist(Op.getNode());
2973     DCI.CommitTargetLoweringOpt(TLO);
2974   }
2975 
2976   return Simplified;
2977 }
2978 
2979 /// Given a vector binary operation and known undefined elements for each input
2980 /// operand, compute whether each element of the output is undefined.
2981 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2982                                          const APInt &UndefOp0,
2983                                          const APInt &UndefOp1) {
2984   EVT VT = BO.getValueType();
2985   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2986          "Vector binop only");
2987 
2988   EVT EltVT = VT.getVectorElementType();
2989   unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2990   assert(UndefOp0.getBitWidth() == NumElts &&
2991          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2992 
2993   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2994                                    const APInt &UndefVals) {
2995     if (UndefVals[Index])
2996       return DAG.getUNDEF(EltVT);
2997 
2998     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2999       // Try hard to make sure that the getNode() call is not creating temporary
3000       // nodes. Ignore opaque integers because they do not constant fold.
3001       SDValue Elt = BV->getOperand(Index);
3002       auto *C = dyn_cast<ConstantSDNode>(Elt);
3003       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3004         return Elt;
3005     }
3006 
3007     return SDValue();
3008   };
3009 
3010   APInt KnownUndef = APInt::getZero(NumElts);
3011   for (unsigned i = 0; i != NumElts; ++i) {
3012     // If both inputs for this element are either constant or undef and match
3013     // the element type, compute the constant/undef result for this element of
3014     // the vector.
3015     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3016     // not handle FP constants. The code within getNode() should be refactored
3017     // to avoid the danger of creating a bogus temporary node here.
3018     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3019     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3020     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3021       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3022         KnownUndef.setBit(i);
3023   }
3024   return KnownUndef;
3025 }
3026 
3027 bool TargetLowering::SimplifyDemandedVectorElts(
3028     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3029     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3030     bool AssumeSingleUse) const {
3031   EVT VT = Op.getValueType();
3032   unsigned Opcode = Op.getOpcode();
3033   APInt DemandedElts = OriginalDemandedElts;
3034   unsigned NumElts = DemandedElts.getBitWidth();
3035   assert(VT.isVector() && "Expected vector op");
3036 
3037   KnownUndef = KnownZero = APInt::getZero(NumElts);
3038 
3039   if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3040     return false;
3041 
3042   // TODO: For now we assume we know nothing about scalable vectors.
3043   if (VT.isScalableVector())
3044     return false;
3045 
3046   assert(VT.getVectorNumElements() == NumElts &&
3047          "Mask size mismatches value type element count!");
3048 
3049   // Undef operand.
3050   if (Op.isUndef()) {
3051     KnownUndef.setAllBits();
3052     return false;
3053   }
3054 
3055   // If Op has other users, assume that all elements are needed.
3056   if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3057     DemandedElts.setAllBits();
3058 
3059   // Not demanding any elements from Op.
3060   if (DemandedElts == 0) {
3061     KnownUndef.setAllBits();
3062     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3063   }
3064 
3065   // Limit search depth.
3066   if (Depth >= SelectionDAG::MaxRecursionDepth)
3067     return false;
3068 
3069   SDLoc DL(Op);
3070   unsigned EltSizeInBits = VT.getScalarSizeInBits();
3071   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3072 
3073   // Helper for demanding the specified elements and all the bits of both binary
3074   // operands.
3075   auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3076     SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3077                                                            TLO.DAG, Depth + 1);
3078     SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3079                                                            TLO.DAG, Depth + 1);
3080     if (NewOp0 || NewOp1) {
3081       SDValue NewOp =
3082           TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3083                           NewOp1 ? NewOp1 : Op1, Op->getFlags());
3084       return TLO.CombineTo(Op, NewOp);
3085     }
3086     return false;
3087   };
3088 
3089   switch (Opcode) {
3090   case ISD::SCALAR_TO_VECTOR: {
3091     if (!DemandedElts[0]) {
3092       KnownUndef.setAllBits();
3093       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3094     }
3095     SDValue ScalarSrc = Op.getOperand(0);
3096     if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3097       SDValue Src = ScalarSrc.getOperand(0);
3098       SDValue Idx = ScalarSrc.getOperand(1);
3099       EVT SrcVT = Src.getValueType();
3100 
3101       ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3102 
3103       if (SrcEltCnt.isScalable())
3104         return false;
3105 
3106       unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3107       if (isNullConstant(Idx)) {
3108         APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3109         APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3110         APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3111         if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3112                                        TLO, Depth + 1))
3113           return true;
3114       }
3115     }
3116     KnownUndef.setHighBits(NumElts - 1);
3117     break;
3118   }
3119   case ISD::BITCAST: {
3120     SDValue Src = Op.getOperand(0);
3121     EVT SrcVT = Src.getValueType();
3122 
3123     // We only handle vectors here.
3124     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3125     if (!SrcVT.isVector())
3126       break;
3127 
3128     // Fast handling of 'identity' bitcasts.
3129     unsigned NumSrcElts = SrcVT.getVectorNumElements();
3130     if (NumSrcElts == NumElts)
3131       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3132                                         KnownZero, TLO, Depth + 1);
3133 
3134     APInt SrcDemandedElts, SrcZero, SrcUndef;
3135 
3136     // Bitcast from 'large element' src vector to 'small element' vector, we
3137     // must demand a source element if any DemandedElt maps to it.
3138     if ((NumElts % NumSrcElts) == 0) {
3139       unsigned Scale = NumElts / NumSrcElts;
3140       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3141       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3142                                      TLO, Depth + 1))
3143         return true;
3144 
3145       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3146       // of the large element.
3147       // TODO - bigendian once we have test coverage.
3148       if (IsLE) {
3149         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3150         APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3151         for (unsigned i = 0; i != NumElts; ++i)
3152           if (DemandedElts[i]) {
3153             unsigned Ofs = (i % Scale) * EltSizeInBits;
3154             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3155           }
3156 
3157         KnownBits Known;
3158         if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3159                                  TLO, Depth + 1))
3160           return true;
3161 
3162         // The bitcast has split each wide element into a number of
3163         // narrow subelements. We have just computed the Known bits
3164         // for wide elements. See if element splitting results in
3165         // some subelements being zero. Only for demanded elements!
3166         for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3167           if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3168                    .isAllOnes())
3169             continue;
3170           for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3171             unsigned Elt = Scale * SrcElt + SubElt;
3172             if (DemandedElts[Elt])
3173               KnownZero.setBit(Elt);
3174           }
3175         }
3176       }
3177 
3178       // If the src element is zero/undef then all the output elements will be -
3179       // only demanded elements are guaranteed to be correct.
3180       for (unsigned i = 0; i != NumSrcElts; ++i) {
3181         if (SrcDemandedElts[i]) {
3182           if (SrcZero[i])
3183             KnownZero.setBits(i * Scale, (i + 1) * Scale);
3184           if (SrcUndef[i])
3185             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3186         }
3187       }
3188     }
3189 
3190     // Bitcast from 'small element' src vector to 'large element' vector, we
3191     // demand all smaller source elements covered by the larger demanded element
3192     // of this vector.
3193     if ((NumSrcElts % NumElts) == 0) {
3194       unsigned Scale = NumSrcElts / NumElts;
3195       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3196       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3197                                      TLO, Depth + 1))
3198         return true;
3199 
3200       // If all the src elements covering an output element are zero/undef, then
3201       // the output element will be as well, assuming it was demanded.
3202       for (unsigned i = 0; i != NumElts; ++i) {
3203         if (DemandedElts[i]) {
3204           if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3205             KnownZero.setBit(i);
3206           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3207             KnownUndef.setBit(i);
3208         }
3209       }
3210     }
3211     break;
3212   }
3213   case ISD::FREEZE: {
3214     SDValue N0 = Op.getOperand(0);
3215     if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3216                                                  /*PoisonOnly=*/false))
3217       return TLO.CombineTo(Op, N0);
3218 
3219     // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3220     // freeze(op(x, ...)) -> op(freeze(x), ...).
3221     if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3222       return TLO.CombineTo(
3223           Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
3224                               TLO.DAG.getFreeze(N0.getOperand(0))));
3225     break;
3226   }
3227   case ISD::BUILD_VECTOR: {
3228     // Check all elements and simplify any unused elements with UNDEF.
3229     if (!DemandedElts.isAllOnes()) {
3230       // Don't simplify BROADCASTS.
3231       if (llvm::any_of(Op->op_values(),
3232                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3233         SmallVector<SDValue, 32> Ops(Op->ops());
3234         bool Updated = false;
3235         for (unsigned i = 0; i != NumElts; ++i) {
3236           if (!DemandedElts[i] && !Ops[i].isUndef()) {
3237             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3238             KnownUndef.setBit(i);
3239             Updated = true;
3240           }
3241         }
3242         if (Updated)
3243           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3244       }
3245     }
3246     for (unsigned i = 0; i != NumElts; ++i) {
3247       SDValue SrcOp = Op.getOperand(i);
3248       if (SrcOp.isUndef()) {
3249         KnownUndef.setBit(i);
3250       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3251                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
3252         KnownZero.setBit(i);
3253       }
3254     }
3255     break;
3256   }
3257   case ISD::CONCAT_VECTORS: {
3258     EVT SubVT = Op.getOperand(0).getValueType();
3259     unsigned NumSubVecs = Op.getNumOperands();
3260     unsigned NumSubElts = SubVT.getVectorNumElements();
3261     for (unsigned i = 0; i != NumSubVecs; ++i) {
3262       SDValue SubOp = Op.getOperand(i);
3263       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3264       APInt SubUndef, SubZero;
3265       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3266                                      Depth + 1))
3267         return true;
3268       KnownUndef.insertBits(SubUndef, i * NumSubElts);
3269       KnownZero.insertBits(SubZero, i * NumSubElts);
3270     }
3271 
3272     // Attempt to avoid multi-use ops if we don't need anything from them.
3273     if (!DemandedElts.isAllOnes()) {
3274       bool FoundNewSub = false;
3275       SmallVector<SDValue, 2> DemandedSubOps;
3276       for (unsigned i = 0; i != NumSubVecs; ++i) {
3277         SDValue SubOp = Op.getOperand(i);
3278         APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3279         SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3280             SubOp, SubElts, TLO.DAG, Depth + 1);
3281         DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3282         FoundNewSub = NewSubOp ? true : FoundNewSub;
3283       }
3284       if (FoundNewSub) {
3285         SDValue NewOp =
3286             TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3287         return TLO.CombineTo(Op, NewOp);
3288       }
3289     }
3290     break;
3291   }
3292   case ISD::INSERT_SUBVECTOR: {
3293     // Demand any elements from the subvector and the remainder from the src its
3294     // inserted into.
3295     SDValue Src = Op.getOperand(0);
3296     SDValue Sub = Op.getOperand(1);
3297     uint64_t Idx = Op.getConstantOperandVal(2);
3298     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3299     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3300     APInt DemandedSrcElts = DemandedElts;
3301     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3302 
3303     APInt SubUndef, SubZero;
3304     if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3305                                    Depth + 1))
3306       return true;
3307 
3308     // If none of the src operand elements are demanded, replace it with undef.
3309     if (!DemandedSrcElts && !Src.isUndef())
3310       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3311                                                TLO.DAG.getUNDEF(VT), Sub,
3312                                                Op.getOperand(2)));
3313 
3314     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3315                                    TLO, Depth + 1))
3316       return true;
3317     KnownUndef.insertBits(SubUndef, Idx);
3318     KnownZero.insertBits(SubZero, Idx);
3319 
3320     // Attempt to avoid multi-use ops if we don't need anything from them.
3321     if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3322       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3323           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3324       SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3325           Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3326       if (NewSrc || NewSub) {
3327         NewSrc = NewSrc ? NewSrc : Src;
3328         NewSub = NewSub ? NewSub : Sub;
3329         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3330                                         NewSub, Op.getOperand(2));
3331         return TLO.CombineTo(Op, NewOp);
3332       }
3333     }
3334     break;
3335   }
3336   case ISD::EXTRACT_SUBVECTOR: {
3337     // Offset the demanded elts by the subvector index.
3338     SDValue Src = Op.getOperand(0);
3339     if (Src.getValueType().isScalableVector())
3340       break;
3341     uint64_t Idx = Op.getConstantOperandVal(1);
3342     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3343     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3344 
3345     APInt SrcUndef, SrcZero;
3346     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3347                                    Depth + 1))
3348       return true;
3349     KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3350     KnownZero = SrcZero.extractBits(NumElts, Idx);
3351 
3352     // Attempt to avoid multi-use ops if we don't need anything from them.
3353     if (!DemandedElts.isAllOnes()) {
3354       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3355           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3356       if (NewSrc) {
3357         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3358                                         Op.getOperand(1));
3359         return TLO.CombineTo(Op, NewOp);
3360       }
3361     }
3362     break;
3363   }
3364   case ISD::INSERT_VECTOR_ELT: {
3365     SDValue Vec = Op.getOperand(0);
3366     SDValue Scl = Op.getOperand(1);
3367     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3368 
3369     // For a legal, constant insertion index, if we don't need this insertion
3370     // then strip it, else remove it from the demanded elts.
3371     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3372       unsigned Idx = CIdx->getZExtValue();
3373       if (!DemandedElts[Idx])
3374         return TLO.CombineTo(Op, Vec);
3375 
3376       APInt DemandedVecElts(DemandedElts);
3377       DemandedVecElts.clearBit(Idx);
3378       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3379                                      KnownZero, TLO, Depth + 1))
3380         return true;
3381 
3382       KnownUndef.setBitVal(Idx, Scl.isUndef());
3383 
3384       KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3385       break;
3386     }
3387 
3388     APInt VecUndef, VecZero;
3389     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3390                                    Depth + 1))
3391       return true;
3392     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3393     break;
3394   }
3395   case ISD::VSELECT: {
3396     SDValue Sel = Op.getOperand(0);
3397     SDValue LHS = Op.getOperand(1);
3398     SDValue RHS = Op.getOperand(2);
3399 
3400     // Try to transform the select condition based on the current demanded
3401     // elements.
3402     APInt UndefSel, ZeroSel;
3403     if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3404                                    Depth + 1))
3405       return true;
3406 
3407     // See if we can simplify either vselect operand.
3408     APInt DemandedLHS(DemandedElts);
3409     APInt DemandedRHS(DemandedElts);
3410     APInt UndefLHS, ZeroLHS;
3411     APInt UndefRHS, ZeroRHS;
3412     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3413                                    Depth + 1))
3414       return true;
3415     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3416                                    Depth + 1))
3417       return true;
3418 
3419     KnownUndef = UndefLHS & UndefRHS;
3420     KnownZero = ZeroLHS & ZeroRHS;
3421 
3422     // If we know that the selected element is always zero, we don't need the
3423     // select value element.
3424     APInt DemandedSel = DemandedElts & ~KnownZero;
3425     if (DemandedSel != DemandedElts)
3426       if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3427                                      Depth + 1))
3428         return true;
3429 
3430     break;
3431   }
3432   case ISD::VECTOR_SHUFFLE: {
3433     SDValue LHS = Op.getOperand(0);
3434     SDValue RHS = Op.getOperand(1);
3435     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3436 
3437     // Collect demanded elements from shuffle operands..
3438     APInt DemandedLHS(NumElts, 0);
3439     APInt DemandedRHS(NumElts, 0);
3440     for (unsigned i = 0; i != NumElts; ++i) {
3441       int M = ShuffleMask[i];
3442       if (M < 0 || !DemandedElts[i])
3443         continue;
3444       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3445       if (M < (int)NumElts)
3446         DemandedLHS.setBit(M);
3447       else
3448         DemandedRHS.setBit(M - NumElts);
3449     }
3450 
3451     // See if we can simplify either shuffle operand.
3452     APInt UndefLHS, ZeroLHS;
3453     APInt UndefRHS, ZeroRHS;
3454     if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3455                                    Depth + 1))
3456       return true;
3457     if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3458                                    Depth + 1))
3459       return true;
3460 
3461     // Simplify mask using undef elements from LHS/RHS.
3462     bool Updated = false;
3463     bool IdentityLHS = true, IdentityRHS = true;
3464     SmallVector<int, 32> NewMask(ShuffleMask);
3465     for (unsigned i = 0; i != NumElts; ++i) {
3466       int &M = NewMask[i];
3467       if (M < 0)
3468         continue;
3469       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3470           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3471         Updated = true;
3472         M = -1;
3473       }
3474       IdentityLHS &= (M < 0) || (M == (int)i);
3475       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3476     }
3477 
3478     // Update legal shuffle masks based on demanded elements if it won't reduce
3479     // to Identity which can cause premature removal of the shuffle mask.
3480     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3481       SDValue LegalShuffle =
3482           buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3483       if (LegalShuffle)
3484         return TLO.CombineTo(Op, LegalShuffle);
3485     }
3486 
3487     // Propagate undef/zero elements from LHS/RHS.
3488     for (unsigned i = 0; i != NumElts; ++i) {
3489       int M = ShuffleMask[i];
3490       if (M < 0) {
3491         KnownUndef.setBit(i);
3492       } else if (M < (int)NumElts) {
3493         if (UndefLHS[M])
3494           KnownUndef.setBit(i);
3495         if (ZeroLHS[M])
3496           KnownZero.setBit(i);
3497       } else {
3498         if (UndefRHS[M - NumElts])
3499           KnownUndef.setBit(i);
3500         if (ZeroRHS[M - NumElts])
3501           KnownZero.setBit(i);
3502       }
3503     }
3504     break;
3505   }
3506   case ISD::ANY_EXTEND_VECTOR_INREG:
3507   case ISD::SIGN_EXTEND_VECTOR_INREG:
3508   case ISD::ZERO_EXTEND_VECTOR_INREG: {
3509     APInt SrcUndef, SrcZero;
3510     SDValue Src = Op.getOperand(0);
3511     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3512     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3513     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3514                                    Depth + 1))
3515       return true;
3516     KnownZero = SrcZero.zextOrTrunc(NumElts);
3517     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3518 
3519     if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3520         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3521         DemandedSrcElts == 1) {
3522       // aext - if we just need the bottom element then we can bitcast.
3523       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3524     }
3525 
3526     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3527       // zext(undef) upper bits are guaranteed to be zero.
3528       if (DemandedElts.isSubsetOf(KnownUndef))
3529         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3530       KnownUndef.clearAllBits();
3531 
3532       // zext - if we just need the bottom element then we can mask:
3533       // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3534       if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3535           Op->isOnlyUserOf(Src.getNode()) &&
3536           Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3537         SDLoc DL(Op);
3538         EVT SrcVT = Src.getValueType();
3539         EVT SrcSVT = SrcVT.getScalarType();
3540         SmallVector<SDValue> MaskElts;
3541         MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3542         MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3543         SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3544         if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3545                 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3546           Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3547           return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3548         }
3549       }
3550     }
3551     break;
3552   }
3553 
3554   // TODO: There are more binop opcodes that could be handled here - MIN,
3555   // MAX, saturated math, etc.
3556   case ISD::ADD: {
3557     SDValue Op0 = Op.getOperand(0);
3558     SDValue Op1 = Op.getOperand(1);
3559     if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3560       APInt UndefLHS, ZeroLHS;
3561       if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3562                                      Depth + 1, /*AssumeSingleUse*/ true))
3563         return true;
3564     }
3565     [[fallthrough]];
3566   }
3567   case ISD::AVGCEILS:
3568   case ISD::AVGCEILU:
3569   case ISD::AVGFLOORS:
3570   case ISD::AVGFLOORU:
3571   case ISD::OR:
3572   case ISD::XOR:
3573   case ISD::SUB:
3574   case ISD::FADD:
3575   case ISD::FSUB:
3576   case ISD::FMUL:
3577   case ISD::FDIV:
3578   case ISD::FREM: {
3579     SDValue Op0 = Op.getOperand(0);
3580     SDValue Op1 = Op.getOperand(1);
3581 
3582     APInt UndefRHS, ZeroRHS;
3583     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3584                                    Depth + 1))
3585       return true;
3586     APInt UndefLHS, ZeroLHS;
3587     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3588                                    Depth + 1))
3589       return true;
3590 
3591     KnownZero = ZeroLHS & ZeroRHS;
3592     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3593 
3594     // Attempt to avoid multi-use ops if we don't need anything from them.
3595     // TODO - use KnownUndef to relax the demandedelts?
3596     if (!DemandedElts.isAllOnes())
3597       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3598         return true;
3599     break;
3600   }
3601   case ISD::SHL:
3602   case ISD::SRL:
3603   case ISD::SRA:
3604   case ISD::ROTL:
3605   case ISD::ROTR: {
3606     SDValue Op0 = Op.getOperand(0);
3607     SDValue Op1 = Op.getOperand(1);
3608 
3609     APInt UndefRHS, ZeroRHS;
3610     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3611                                    Depth + 1))
3612       return true;
3613     APInt UndefLHS, ZeroLHS;
3614     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3615                                    Depth + 1))
3616       return true;
3617 
3618     KnownZero = ZeroLHS;
3619     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3620 
3621     // Attempt to avoid multi-use ops if we don't need anything from them.
3622     // TODO - use KnownUndef to relax the demandedelts?
3623     if (!DemandedElts.isAllOnes())
3624       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3625         return true;
3626     break;
3627   }
3628   case ISD::MUL:
3629   case ISD::MULHU:
3630   case ISD::MULHS:
3631   case ISD::AND: {
3632     SDValue Op0 = Op.getOperand(0);
3633     SDValue Op1 = Op.getOperand(1);
3634 
3635     APInt SrcUndef, SrcZero;
3636     if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3637                                    Depth + 1))
3638       return true;
3639     // If we know that a demanded element was zero in Op1 we don't need to
3640     // demand it in Op0 - its guaranteed to be zero.
3641     APInt DemandedElts0 = DemandedElts & ~SrcZero;
3642     if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3643                                    TLO, Depth + 1))
3644       return true;
3645 
3646     KnownUndef &= DemandedElts0;
3647     KnownZero &= DemandedElts0;
3648 
3649     // If every element pair has a zero/undef then just fold to zero.
3650     // fold (and x, undef) -> 0  /  (and x, 0) -> 0
3651     // fold (mul x, undef) -> 0  /  (mul x, 0) -> 0
3652     if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3653       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3654 
3655     // If either side has a zero element, then the result element is zero, even
3656     // if the other is an UNDEF.
3657     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3658     // and then handle 'and' nodes with the rest of the binop opcodes.
3659     KnownZero |= SrcZero;
3660     KnownUndef &= SrcUndef;
3661     KnownUndef &= ~KnownZero;
3662 
3663     // Attempt to avoid multi-use ops if we don't need anything from them.
3664     if (!DemandedElts.isAllOnes())
3665       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3666         return true;
3667     break;
3668   }
3669   case ISD::TRUNCATE:
3670   case ISD::SIGN_EXTEND:
3671   case ISD::ZERO_EXTEND:
3672     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3673                                    KnownZero, TLO, Depth + 1))
3674       return true;
3675 
3676     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3677       // zext(undef) upper bits are guaranteed to be zero.
3678       if (DemandedElts.isSubsetOf(KnownUndef))
3679         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3680       KnownUndef.clearAllBits();
3681     }
3682     break;
3683   default: {
3684     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3685       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3686                                                   KnownZero, TLO, Depth))
3687         return true;
3688     } else {
3689       KnownBits Known;
3690       APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3691       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3692                                TLO, Depth, AssumeSingleUse))
3693         return true;
3694     }
3695     break;
3696   }
3697   }
3698   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3699 
3700   // Constant fold all undef cases.
3701   // TODO: Handle zero cases as well.
3702   if (DemandedElts.isSubsetOf(KnownUndef))
3703     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3704 
3705   return false;
3706 }
3707 
3708 /// Determine which of the bits specified in Mask are known to be either zero or
3709 /// one and return them in the Known.
3710 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3711                                                    KnownBits &Known,
3712                                                    const APInt &DemandedElts,
3713                                                    const SelectionDAG &DAG,
3714                                                    unsigned Depth) const {
3715   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3716           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3717           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3718           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3719          "Should use MaskedValueIsZero if you don't know whether Op"
3720          " is a target node!");
3721   Known.resetAll();
3722 }
3723 
3724 void TargetLowering::computeKnownBitsForTargetInstr(
3725     GISelKnownBits &Analysis, Register R, KnownBits &Known,
3726     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3727     unsigned Depth) const {
3728   Known.resetAll();
3729 }
3730 
3731 void TargetLowering::computeKnownBitsForFrameIndex(
3732   const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3733   // The low bits are known zero if the pointer is aligned.
3734   Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3735 }
3736 
3737 Align TargetLowering::computeKnownAlignForTargetInstr(
3738   GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3739   unsigned Depth) const {
3740   return Align(1);
3741 }
3742 
3743 /// This method can be implemented by targets that want to expose additional
3744 /// information about sign bits to the DAG Combiner.
3745 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3746                                                          const APInt &,
3747                                                          const SelectionDAG &,
3748                                                          unsigned Depth) const {
3749   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3750           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3751           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3752           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3753          "Should use ComputeNumSignBits if you don't know whether Op"
3754          " is a target node!");
3755   return 1;
3756 }
3757 
3758 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3759   GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3760   const MachineRegisterInfo &MRI, unsigned Depth) const {
3761   return 1;
3762 }
3763 
3764 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3765     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3766     TargetLoweringOpt &TLO, unsigned Depth) const {
3767   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3768           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3769           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3770           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3771          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3772          " is a target node!");
3773   return false;
3774 }
3775 
3776 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3777     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3778     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3779   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3780           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3781           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3782           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3783          "Should use SimplifyDemandedBits if you don't know whether Op"
3784          " is a target node!");
3785   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3786   return false;
3787 }
3788 
3789 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3790     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3791     SelectionDAG &DAG, unsigned Depth) const {
3792   assert(
3793       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3794        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3795        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3796        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3797       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3798       " is a target node!");
3799   return SDValue();
3800 }
3801 
3802 SDValue
3803 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3804                                         SDValue N1, MutableArrayRef<int> Mask,
3805                                         SelectionDAG &DAG) const {
3806   bool LegalMask = isShuffleMaskLegal(Mask, VT);
3807   if (!LegalMask) {
3808     std::swap(N0, N1);
3809     ShuffleVectorSDNode::commuteMask(Mask);
3810     LegalMask = isShuffleMaskLegal(Mask, VT);
3811   }
3812 
3813   if (!LegalMask)
3814     return SDValue();
3815 
3816   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3817 }
3818 
3819 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3820   return nullptr;
3821 }
3822 
3823 bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3824     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3825     bool PoisonOnly, unsigned Depth) const {
3826   assert(
3827       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3828        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3829        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3830        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3831       "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3832       " is a target node!");
3833 
3834   // If Op can't create undef/poison and none of its operands are undef/poison
3835   // then Op is never undef/poison.
3836   return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3837                                               /*ConsiderFlags*/ true, Depth) &&
3838          all_of(Op->ops(), [&](SDValue V) {
3839            return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3840                                                        Depth + 1);
3841          });
3842 }
3843 
3844 bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3845     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3846     bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3847   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3848           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3849           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3850           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3851          "Should use canCreateUndefOrPoison if you don't know whether Op"
3852          " is a target node!");
3853   // Be conservative and return true.
3854   return true;
3855 }
3856 
3857 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3858                                                   const SelectionDAG &DAG,
3859                                                   bool SNaN,
3860                                                   unsigned Depth) const {
3861   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3862           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3863           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3864           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3865          "Should use isKnownNeverNaN if you don't know whether Op"
3866          " is a target node!");
3867   return false;
3868 }
3869 
3870 bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3871                                                const APInt &DemandedElts,
3872                                                APInt &UndefElts,
3873                                                const SelectionDAG &DAG,
3874                                                unsigned Depth) const {
3875   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3876           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3877           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3878           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3879          "Should use isSplatValue if you don't know whether Op"
3880          " is a target node!");
3881   return false;
3882 }
3883 
3884 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3885 // work with truncating build vectors and vectors with elements of less than
3886 // 8 bits.
3887 bool TargetLowering::isConstTrueVal(SDValue N) const {
3888   if (!N)
3889     return false;
3890 
3891   unsigned EltWidth;
3892   APInt CVal;
3893   if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3894                                                /*AllowTruncation=*/true)) {
3895     CVal = CN->getAPIntValue();
3896     EltWidth = N.getValueType().getScalarSizeInBits();
3897   } else
3898     return false;
3899 
3900   // If this is a truncating splat, truncate the splat value.
3901   // Otherwise, we may fail to match the expected values below.
3902   if (EltWidth < CVal.getBitWidth())
3903     CVal = CVal.trunc(EltWidth);
3904 
3905   switch (getBooleanContents(N.getValueType())) {
3906   case UndefinedBooleanContent:
3907     return CVal[0];
3908   case ZeroOrOneBooleanContent:
3909     return CVal.isOne();
3910   case ZeroOrNegativeOneBooleanContent:
3911     return CVal.isAllOnes();
3912   }
3913 
3914   llvm_unreachable("Invalid boolean contents");
3915 }
3916 
3917 bool TargetLowering::isConstFalseVal(SDValue N) const {
3918   if (!N)
3919     return false;
3920 
3921   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3922   if (!CN) {
3923     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3924     if (!BV)
3925       return false;
3926 
3927     // Only interested in constant splats, we don't care about undef
3928     // elements in identifying boolean constants and getConstantSplatNode
3929     // returns NULL if all ops are undef;
3930     CN = BV->getConstantSplatNode();
3931     if (!CN)
3932       return false;
3933   }
3934 
3935   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3936     return !CN->getAPIntValue()[0];
3937 
3938   return CN->isZero();
3939 }
3940 
3941 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3942                                        bool SExt) const {
3943   if (VT == MVT::i1)
3944     return N->isOne();
3945 
3946   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3947   switch (Cnt) {
3948   case TargetLowering::ZeroOrOneBooleanContent:
3949     // An extended value of 1 is always true, unless its original type is i1,
3950     // in which case it will be sign extended to -1.
3951     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3952   case TargetLowering::UndefinedBooleanContent:
3953   case TargetLowering::ZeroOrNegativeOneBooleanContent:
3954     return N->isAllOnes() && SExt;
3955   }
3956   llvm_unreachable("Unexpected enumeration.");
3957 }
3958 
3959 /// This helper function of SimplifySetCC tries to optimize the comparison when
3960 /// either operand of the SetCC node is a bitwise-and instruction.
3961 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3962                                          ISD::CondCode Cond, const SDLoc &DL,
3963                                          DAGCombinerInfo &DCI) const {
3964   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3965     std::swap(N0, N1);
3966 
3967   SelectionDAG &DAG = DCI.DAG;
3968   EVT OpVT = N0.getValueType();
3969   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3970       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3971     return SDValue();
3972 
3973   // (X & Y) != 0 --> zextOrTrunc(X & Y)
3974   // iff everything but LSB is known zero:
3975   if (Cond == ISD::SETNE && isNullConstant(N1) &&
3976       (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
3977        getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3978     unsigned NumEltBits = OpVT.getScalarSizeInBits();
3979     APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3980     if (DAG.MaskedValueIsZero(N0, UpperBits))
3981       return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3982   }
3983 
3984   // Try to eliminate a power-of-2 mask constant by converting to a signbit
3985   // test in a narrow type that we can truncate to with no cost. Examples:
3986   // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3987   // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3988   // TODO: This conservatively checks for type legality on the source and
3989   //       destination types. That may inhibit optimizations, but it also
3990   //       allows setcc->shift transforms that may be more beneficial.
3991   auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3992   if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3993       isTypeLegal(OpVT) && N0.hasOneUse()) {
3994     EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3995                                      AndC->getAPIntValue().getActiveBits());
3996     if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3997       SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3998       SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3999       return DAG.getSetCC(DL, VT, Trunc, Zero,
4000                           Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4001     }
4002   }
4003 
4004   // Match these patterns in any of their permutations:
4005   // (X & Y) == Y
4006   // (X & Y) != Y
4007   SDValue X, Y;
4008   if (N0.getOperand(0) == N1) {
4009     X = N0.getOperand(1);
4010     Y = N0.getOperand(0);
4011   } else if (N0.getOperand(1) == N1) {
4012     X = N0.getOperand(0);
4013     Y = N0.getOperand(1);
4014   } else {
4015     return SDValue();
4016   }
4017 
4018   // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4019   // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4020   // its liable to create and infinite loop.
4021   SDValue Zero = DAG.getConstant(0, DL, OpVT);
4022   if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4023       DAG.isKnownToBeAPowerOfTwo(Y)) {
4024     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4025     // Note that where Y is variable and is known to have at most one bit set
4026     // (for example, if it is Z & 1) we cannot do this; the expressions are not
4027     // equivalent when Y == 0.
4028     assert(OpVT.isInteger());
4029     Cond = ISD::getSetCCInverse(Cond, OpVT);
4030     if (DCI.isBeforeLegalizeOps() ||
4031         isCondCodeLegal(Cond, N0.getSimpleValueType()))
4032       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4033   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4034     // If the target supports an 'and-not' or 'and-complement' logic operation,
4035     // try to use that to make a comparison operation more efficient.
4036     // But don't do this transform if the mask is a single bit because there are
4037     // more efficient ways to deal with that case (for example, 'bt' on x86 or
4038     // 'rlwinm' on PPC).
4039 
4040     // Bail out if the compare operand that we want to turn into a zero is
4041     // already a zero (otherwise, infinite loop).
4042     if (isNullConstant(Y))
4043       return SDValue();
4044 
4045     // Transform this into: ~X & Y == 0.
4046     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4047     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4048     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4049   }
4050 
4051   return SDValue();
4052 }
4053 
4054 /// There are multiple IR patterns that could be checking whether certain
4055 /// truncation of a signed number would be lossy or not. The pattern which is
4056 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
4057 /// We are looking for the following pattern: (KeptBits is a constant)
4058 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4059 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4060 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
4061 /// We will unfold it into the natural trunc+sext pattern:
4062 ///   ((%x << C) a>> C) dstcond %x
4063 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
4064 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4065     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4066     const SDLoc &DL) const {
4067   // We must be comparing with a constant.
4068   ConstantSDNode *C1;
4069   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4070     return SDValue();
4071 
4072   // N0 should be:  add %x, (1 << (KeptBits-1))
4073   if (N0->getOpcode() != ISD::ADD)
4074     return SDValue();
4075 
4076   // And we must be 'add'ing a constant.
4077   ConstantSDNode *C01;
4078   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4079     return SDValue();
4080 
4081   SDValue X = N0->getOperand(0);
4082   EVT XVT = X.getValueType();
4083 
4084   // Validate constants ...
4085 
4086   APInt I1 = C1->getAPIntValue();
4087 
4088   ISD::CondCode NewCond;
4089   if (Cond == ISD::CondCode::SETULT) {
4090     NewCond = ISD::CondCode::SETEQ;
4091   } else if (Cond == ISD::CondCode::SETULE) {
4092     NewCond = ISD::CondCode::SETEQ;
4093     // But need to 'canonicalize' the constant.
4094     I1 += 1;
4095   } else if (Cond == ISD::CondCode::SETUGT) {
4096     NewCond = ISD::CondCode::SETNE;
4097     // But need to 'canonicalize' the constant.
4098     I1 += 1;
4099   } else if (Cond == ISD::CondCode::SETUGE) {
4100     NewCond = ISD::CondCode::SETNE;
4101   } else
4102     return SDValue();
4103 
4104   APInt I01 = C01->getAPIntValue();
4105 
4106   auto checkConstants = [&I1, &I01]() -> bool {
4107     // Both of them must be power-of-two, and the constant from setcc is bigger.
4108     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4109   };
4110 
4111   if (checkConstants()) {
4112     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
4113   } else {
4114     // What if we invert constants? (and the target predicate)
4115     I1.negate();
4116     I01.negate();
4117     assert(XVT.isInteger());
4118     NewCond = getSetCCInverse(NewCond, XVT);
4119     if (!checkConstants())
4120       return SDValue();
4121     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
4122   }
4123 
4124   // They are power-of-two, so which bit is set?
4125   const unsigned KeptBits = I1.logBase2();
4126   const unsigned KeptBitsMinusOne = I01.logBase2();
4127 
4128   // Magic!
4129   if (KeptBits != (KeptBitsMinusOne + 1))
4130     return SDValue();
4131   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4132 
4133   // We don't want to do this in every single case.
4134   SelectionDAG &DAG = DCI.DAG;
4135   if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4136     return SDValue();
4137 
4138   // Unfold into:  sext_inreg(%x) cond %x
4139   // Where 'cond' will be either 'eq' or 'ne'.
4140   SDValue SExtInReg = DAG.getNode(
4141       ISD::SIGN_EXTEND_INREG, DL, XVT, X,
4142       DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4143   return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4144 }
4145 
4146 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4147 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4148     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4149     DAGCombinerInfo &DCI, const SDLoc &DL) const {
4150   assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4151          "Should be a comparison with 0.");
4152   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4153          "Valid only for [in]equality comparisons.");
4154 
4155   unsigned NewShiftOpcode;
4156   SDValue X, C, Y;
4157 
4158   SelectionDAG &DAG = DCI.DAG;
4159 
4160   // Look for '(C l>>/<< Y)'.
4161   auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4162     // The shift should be one-use.
4163     if (!V.hasOneUse())
4164       return false;
4165     unsigned OldShiftOpcode = V.getOpcode();
4166     switch (OldShiftOpcode) {
4167     case ISD::SHL:
4168       NewShiftOpcode = ISD::SRL;
4169       break;
4170     case ISD::SRL:
4171       NewShiftOpcode = ISD::SHL;
4172       break;
4173     default:
4174       return false; // must be a logical shift.
4175     }
4176     // We should be shifting a constant.
4177     // FIXME: best to use isConstantOrConstantVector().
4178     C = V.getOperand(0);
4179     ConstantSDNode *CC =
4180         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4181     if (!CC)
4182       return false;
4183     Y = V.getOperand(1);
4184 
4185     ConstantSDNode *XC =
4186         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4187     return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4188         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4189   };
4190 
4191   // LHS of comparison should be an one-use 'and'.
4192   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4193     return SDValue();
4194 
4195   X = N0.getOperand(0);
4196   SDValue Mask = N0.getOperand(1);
4197 
4198   // 'and' is commutative!
4199   if (!Match(Mask)) {
4200     std::swap(X, Mask);
4201     if (!Match(Mask))
4202       return SDValue();
4203   }
4204 
4205   EVT VT = X.getValueType();
4206 
4207   // Produce:
4208   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4209   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4210   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4211   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4212   return T2;
4213 }
4214 
4215 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4216 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4217 /// handle the commuted versions of these patterns.
4218 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4219                                            ISD::CondCode Cond, const SDLoc &DL,
4220                                            DAGCombinerInfo &DCI) const {
4221   unsigned BOpcode = N0.getOpcode();
4222   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4223          "Unexpected binop");
4224   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4225 
4226   // (X + Y) == X --> Y == 0
4227   // (X - Y) == X --> Y == 0
4228   // (X ^ Y) == X --> Y == 0
4229   SelectionDAG &DAG = DCI.DAG;
4230   EVT OpVT = N0.getValueType();
4231   SDValue X = N0.getOperand(0);
4232   SDValue Y = N0.getOperand(1);
4233   if (X == N1)
4234     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4235 
4236   if (Y != N1)
4237     return SDValue();
4238 
4239   // (X + Y) == Y --> X == 0
4240   // (X ^ Y) == Y --> X == 0
4241   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4242     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4243 
4244   // The shift would not be valid if the operands are boolean (i1).
4245   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4246     return SDValue();
4247 
4248   // (X - Y) == Y --> X == Y << 1
4249   SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4250   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4251   if (!DCI.isCalledByLegalizer())
4252     DCI.AddToWorklist(YShl1.getNode());
4253   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4254 }
4255 
4256 static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4257                                       SDValue N0, const APInt &C1,
4258                                       ISD::CondCode Cond, const SDLoc &dl,
4259                                       SelectionDAG &DAG) {
4260   // Look through truncs that don't change the value of a ctpop.
4261   // FIXME: Add vector support? Need to be careful with setcc result type below.
4262   SDValue CTPOP = N0;
4263   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4264       N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
4265     CTPOP = N0.getOperand(0);
4266 
4267   if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4268     return SDValue();
4269 
4270   EVT CTVT = CTPOP.getValueType();
4271   SDValue CTOp = CTPOP.getOperand(0);
4272 
4273   // Expand a power-of-2-or-zero comparison based on ctpop:
4274   // (ctpop x) u< 2 -> (x & x-1) == 0
4275   // (ctpop x) u> 1 -> (x & x-1) != 0
4276   if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4277     // Keep the CTPOP if it is a cheap vector op.
4278     if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4279       return SDValue();
4280 
4281     unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4282     if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4283       return SDValue();
4284     if (C1 == 0 && (Cond == ISD::SETULT))
4285       return SDValue(); // This is handled elsewhere.
4286 
4287     unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4288 
4289     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4290     SDValue Result = CTOp;
4291     for (unsigned i = 0; i < Passes; i++) {
4292       SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4293       Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4294     }
4295     ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4296     return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4297   }
4298 
4299   // Expand a power-of-2 comparison based on ctpop
4300   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4301     // Keep the CTPOP if it is cheap.
4302     if (TLI.isCtpopFast(CTVT))
4303       return SDValue();
4304 
4305     SDValue Zero = DAG.getConstant(0, dl, CTVT);
4306     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4307     assert(CTVT.isInteger());
4308     SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4309 
4310     // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4311     // check before emitting a potentially unnecessary op.
4312     if (DAG.isKnownNeverZero(CTOp)) {
4313       // (ctpop x) == 1 --> (x & x-1) == 0
4314       // (ctpop x) != 1 --> (x & x-1) != 0
4315       SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4316       SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4317       return RHS;
4318     }
4319 
4320     // (ctpop x) == 1 --> (x ^ x-1) >  x-1
4321     // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4322     SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4323     ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4324     return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4325   }
4326 
4327   return SDValue();
4328 }
4329 
4330 static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4331                                    ISD::CondCode Cond, const SDLoc &dl,
4332                                    SelectionDAG &DAG) {
4333   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4334     return SDValue();
4335 
4336   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4337   if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4338     return SDValue();
4339 
4340   auto getRotateSource = [](SDValue X) {
4341     if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4342       return X.getOperand(0);
4343     return SDValue();
4344   };
4345 
4346   // Peek through a rotated value compared against 0 or -1:
4347   // (rot X, Y) == 0/-1 --> X == 0/-1
4348   // (rot X, Y) != 0/-1 --> X != 0/-1
4349   if (SDValue R = getRotateSource(N0))
4350     return DAG.getSetCC(dl, VT, R, N1, Cond);
4351 
4352   // Peek through an 'or' of a rotated value compared against 0:
4353   // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4354   // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4355   //
4356   // TODO: Add the 'and' with -1 sibling.
4357   // TODO: Recurse through a series of 'or' ops to find the rotate.
4358   EVT OpVT = N0.getValueType();
4359   if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4360     if (SDValue R = getRotateSource(N0.getOperand(0))) {
4361       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4362       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4363     }
4364     if (SDValue R = getRotateSource(N0.getOperand(1))) {
4365       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4366       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4367     }
4368   }
4369 
4370   return SDValue();
4371 }
4372 
4373 static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4374                                         ISD::CondCode Cond, const SDLoc &dl,
4375                                         SelectionDAG &DAG) {
4376   // If we are testing for all-bits-clear, we might be able to do that with
4377   // less shifting since bit-order does not matter.
4378   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4379     return SDValue();
4380 
4381   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4382   if (!C1 || !C1->isZero())
4383     return SDValue();
4384 
4385   if (!N0.hasOneUse() ||
4386       (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4387     return SDValue();
4388 
4389   unsigned BitWidth = N0.getScalarValueSizeInBits();
4390   auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4391   if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4392     return SDValue();
4393 
4394   // Canonicalize fshr as fshl to reduce pattern-matching.
4395   unsigned ShAmt = ShAmtC->getZExtValue();
4396   if (N0.getOpcode() == ISD::FSHR)
4397     ShAmt = BitWidth - ShAmt;
4398 
4399   // Match an 'or' with a specific operand 'Other' in either commuted variant.
4400   SDValue X, Y;
4401   auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4402     if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4403       return false;
4404     if (Or.getOperand(0) == Other) {
4405       X = Or.getOperand(0);
4406       Y = Or.getOperand(1);
4407       return true;
4408     }
4409     if (Or.getOperand(1) == Other) {
4410       X = Or.getOperand(1);
4411       Y = Or.getOperand(0);
4412       return true;
4413     }
4414     return false;
4415   };
4416 
4417   EVT OpVT = N0.getValueType();
4418   EVT ShAmtVT = N0.getOperand(2).getValueType();
4419   SDValue F0 = N0.getOperand(0);
4420   SDValue F1 = N0.getOperand(1);
4421   if (matchOr(F0, F1)) {
4422     // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4423     SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4424     SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4425     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4426     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4427   }
4428   if (matchOr(F1, F0)) {
4429     // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4430     SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4431     SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4432     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4433     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4434   }
4435 
4436   return SDValue();
4437 }
4438 
4439 /// Try to simplify a setcc built with the specified operands and cc. If it is
4440 /// unable to simplify it, return a null SDValue.
4441 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4442                                       ISD::CondCode Cond, bool foldBooleans,
4443                                       DAGCombinerInfo &DCI,
4444                                       const SDLoc &dl) const {
4445   SelectionDAG &DAG = DCI.DAG;
4446   const DataLayout &Layout = DAG.getDataLayout();
4447   EVT OpVT = N0.getValueType();
4448   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4449 
4450   // Constant fold or commute setcc.
4451   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4452     return Fold;
4453 
4454   bool N0ConstOrSplat =
4455       isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4456   bool N1ConstOrSplat =
4457       isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4458 
4459   // Canonicalize toward having the constant on the RHS.
4460   // TODO: Handle non-splat vector constants. All undef causes trouble.
4461   // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4462   // infinite loop here when we encounter one.
4463   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
4464   if (N0ConstOrSplat && !N1ConstOrSplat &&
4465       (DCI.isBeforeLegalizeOps() ||
4466        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4467     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4468 
4469   // If we have a subtract with the same 2 non-constant operands as this setcc
4470   // -- but in reverse order -- then try to commute the operands of this setcc
4471   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4472   // instruction on some targets.
4473   if (!N0ConstOrSplat && !N1ConstOrSplat &&
4474       (DCI.isBeforeLegalizeOps() ||
4475        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4476       DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4477       !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4478     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4479 
4480   if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4481     return V;
4482 
4483   if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4484     return V;
4485 
4486   if (auto *N1C = isConstOrConstSplat(N1)) {
4487     const APInt &C1 = N1C->getAPIntValue();
4488 
4489     // Optimize some CTPOP cases.
4490     if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4491       return V;
4492 
4493     // For equality to 0 of a no-wrap multiply, decompose and test each op:
4494     // X * Y == 0 --> (X == 0) || (Y == 0)
4495     // X * Y != 0 --> (X != 0) && (Y != 0)
4496     // TODO: This bails out if minsize is set, but if the target doesn't have a
4497     //       single instruction multiply for this type, it would likely be
4498     //       smaller to decompose.
4499     if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4500         N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4501         (N0->getFlags().hasNoUnsignedWrap() ||
4502          N0->getFlags().hasNoSignedWrap()) &&
4503         !Attr.hasFnAttr(Attribute::MinSize)) {
4504       SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4505       SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4506       unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4507       return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4508     }
4509 
4510     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4511     // equality comparison, then we're just comparing whether X itself is
4512     // zero.
4513     if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4514         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4515         llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4516       if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4517         if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4518             ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4519           if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4520             // (srl (ctlz x), 5) == 0  -> X != 0
4521             // (srl (ctlz x), 5) != 1  -> X != 0
4522             Cond = ISD::SETNE;
4523           } else {
4524             // (srl (ctlz x), 5) != 0  -> X == 0
4525             // (srl (ctlz x), 5) == 1  -> X == 0
4526             Cond = ISD::SETEQ;
4527           }
4528           SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4529           return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4530                               Cond);
4531         }
4532       }
4533     }
4534   }
4535 
4536   // FIXME: Support vectors.
4537   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4538     const APInt &C1 = N1C->getAPIntValue();
4539 
4540     // (zext x) == C --> x == (trunc C)
4541     // (sext x) == C --> x == (trunc C)
4542     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4543         DCI.isBeforeLegalize() && N0->hasOneUse()) {
4544       unsigned MinBits = N0.getValueSizeInBits();
4545       SDValue PreExt;
4546       bool Signed = false;
4547       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4548         // ZExt
4549         MinBits = N0->getOperand(0).getValueSizeInBits();
4550         PreExt = N0->getOperand(0);
4551       } else if (N0->getOpcode() == ISD::AND) {
4552         // DAGCombine turns costly ZExts into ANDs
4553         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4554           if ((C->getAPIntValue()+1).isPowerOf2()) {
4555             MinBits = C->getAPIntValue().countr_one();
4556             PreExt = N0->getOperand(0);
4557           }
4558       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4559         // SExt
4560         MinBits = N0->getOperand(0).getValueSizeInBits();
4561         PreExt = N0->getOperand(0);
4562         Signed = true;
4563       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4564         // ZEXTLOAD / SEXTLOAD
4565         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4566           MinBits = LN0->getMemoryVT().getSizeInBits();
4567           PreExt = N0;
4568         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4569           Signed = true;
4570           MinBits = LN0->getMemoryVT().getSizeInBits();
4571           PreExt = N0;
4572         }
4573       }
4574 
4575       // Figure out how many bits we need to preserve this constant.
4576       unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4577 
4578       // Make sure we're not losing bits from the constant.
4579       if (MinBits > 0 &&
4580           MinBits < C1.getBitWidth() &&
4581           MinBits >= ReqdBits) {
4582         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4583         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4584           // Will get folded away.
4585           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4586           if (MinBits == 1 && C1 == 1)
4587             // Invert the condition.
4588             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4589                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4590           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4591           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4592         }
4593 
4594         // If truncating the setcc operands is not desirable, we can still
4595         // simplify the expression in some cases:
4596         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4597         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4598         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4599         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4600         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4601         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4602         SDValue TopSetCC = N0->getOperand(0);
4603         unsigned N0Opc = N0->getOpcode();
4604         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4605         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4606             TopSetCC.getOpcode() == ISD::SETCC &&
4607             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4608             (isConstFalseVal(N1) ||
4609              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4610 
4611           bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4612                          (!N1C->isZero() && Cond == ISD::SETNE);
4613 
4614           if (!Inverse)
4615             return TopSetCC;
4616 
4617           ISD::CondCode InvCond = ISD::getSetCCInverse(
4618               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4619               TopSetCC.getOperand(0).getValueType());
4620           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4621                                       TopSetCC.getOperand(1),
4622                                       InvCond);
4623         }
4624       }
4625     }
4626 
4627     // If the LHS is '(and load, const)', the RHS is 0, the test is for
4628     // equality or unsigned, and all 1 bits of the const are in the same
4629     // partial word, see if we can shorten the load.
4630     if (DCI.isBeforeLegalize() &&
4631         !ISD::isSignedIntSetCC(Cond) &&
4632         N0.getOpcode() == ISD::AND && C1 == 0 &&
4633         N0.getNode()->hasOneUse() &&
4634         isa<LoadSDNode>(N0.getOperand(0)) &&
4635         N0.getOperand(0).getNode()->hasOneUse() &&
4636         isa<ConstantSDNode>(N0.getOperand(1))) {
4637       auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4638       APInt bestMask;
4639       unsigned bestWidth = 0, bestOffset = 0;
4640       if (Lod->isSimple() && Lod->isUnindexed() &&
4641           (Lod->getMemoryVT().isByteSized() ||
4642            isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4643         unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4644         unsigned origWidth = N0.getValueSizeInBits();
4645         unsigned maskWidth = origWidth;
4646         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4647         // 8 bits, but have to be careful...
4648         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4649           origWidth = Lod->getMemoryVT().getSizeInBits();
4650         const APInt &Mask = N0.getConstantOperandAPInt(1);
4651         // Only consider power-of-2 widths (and at least one byte) as candiates
4652         // for the narrowed load.
4653         for (unsigned width = 8; width < origWidth; width *= 2) {
4654           EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4655           if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4656             continue;
4657           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4658           // Avoid accessing any padding here for now (we could use memWidth
4659           // instead of origWidth here otherwise).
4660           unsigned maxOffset = origWidth - width;
4661           for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4662             if (Mask.isSubsetOf(newMask)) {
4663               unsigned ptrOffset =
4664                   Layout.isLittleEndian() ? offset : memWidth - width - offset;
4665               unsigned IsFast = 0;
4666               Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4667               if (allowsMemoryAccess(
4668                       *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4669                       NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4670                   IsFast) {
4671                 bestOffset = ptrOffset / 8;
4672                 bestMask = Mask.lshr(offset);
4673                 bestWidth = width;
4674                 break;
4675               }
4676             }
4677             newMask <<= 8;
4678           }
4679           if (bestWidth)
4680             break;
4681         }
4682       }
4683       if (bestWidth) {
4684         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4685         SDValue Ptr = Lod->getBasePtr();
4686         if (bestOffset != 0)
4687           Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4688         SDValue NewLoad =
4689             DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4690                         Lod->getPointerInfo().getWithOffset(bestOffset),
4691                         Lod->getOriginalAlign());
4692         SDValue And =
4693             DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4694                         DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4695         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4696       }
4697     }
4698 
4699     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4700     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4701       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4702 
4703       // If the comparison constant has bits in the upper part, the
4704       // zero-extended value could never match.
4705       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4706                                               C1.getBitWidth() - InSize))) {
4707         switch (Cond) {
4708         case ISD::SETUGT:
4709         case ISD::SETUGE:
4710         case ISD::SETEQ:
4711           return DAG.getConstant(0, dl, VT);
4712         case ISD::SETULT:
4713         case ISD::SETULE:
4714         case ISD::SETNE:
4715           return DAG.getConstant(1, dl, VT);
4716         case ISD::SETGT:
4717         case ISD::SETGE:
4718           // True if the sign bit of C1 is set.
4719           return DAG.getConstant(C1.isNegative(), dl, VT);
4720         case ISD::SETLT:
4721         case ISD::SETLE:
4722           // True if the sign bit of C1 isn't set.
4723           return DAG.getConstant(C1.isNonNegative(), dl, VT);
4724         default:
4725           break;
4726         }
4727       }
4728 
4729       // Otherwise, we can perform the comparison with the low bits.
4730       switch (Cond) {
4731       case ISD::SETEQ:
4732       case ISD::SETNE:
4733       case ISD::SETUGT:
4734       case ISD::SETUGE:
4735       case ISD::SETULT:
4736       case ISD::SETULE: {
4737         EVT newVT = N0.getOperand(0).getValueType();
4738         // FIXME: Should use isNarrowingProfitable.
4739         if (DCI.isBeforeLegalizeOps() ||
4740             (isOperationLegal(ISD::SETCC, newVT) &&
4741              isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4742              isTypeDesirableForOp(ISD::SETCC, newVT))) {
4743           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4744           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4745 
4746           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4747                                           NewConst, Cond);
4748           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4749         }
4750         break;
4751       }
4752       default:
4753         break; // todo, be more careful with signed comparisons
4754       }
4755     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4756                (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4757                !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4758                                       OpVT)) {
4759       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4760       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4761       EVT ExtDstTy = N0.getValueType();
4762       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4763 
4764       // If the constant doesn't fit into the number of bits for the source of
4765       // the sign extension, it is impossible for both sides to be equal.
4766       if (C1.getSignificantBits() > ExtSrcTyBits)
4767         return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4768 
4769       assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4770              ExtDstTy != ExtSrcTy && "Unexpected types!");
4771       APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4772       SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4773                                    DAG.getConstant(Imm, dl, ExtDstTy));
4774       if (!DCI.isCalledByLegalizer())
4775         DCI.AddToWorklist(ZextOp.getNode());
4776       // Otherwise, make this a use of a zext.
4777       return DAG.getSetCC(dl, VT, ZextOp,
4778                           DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4779     } else if ((N1C->isZero() || N1C->isOne()) &&
4780                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4781       // SETCC (X), [0|1], [EQ|NE]  -> X if X is known 0/1. i1 types are
4782       // excluded as they are handled below whilst checking for foldBooleans.
4783       if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4784           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4785           (N0.getValueType() == MVT::i1 ||
4786            getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4787           DAG.MaskedValueIsZero(
4788               N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {
4789         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4790         if (TrueWhenTrue)
4791           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4792         // Invert the condition.
4793         if (N0.getOpcode() == ISD::SETCC) {
4794           ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4795           CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4796           if (DCI.isBeforeLegalizeOps() ||
4797               isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4798             return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4799         }
4800       }
4801 
4802       if ((N0.getOpcode() == ISD::XOR ||
4803            (N0.getOpcode() == ISD::AND &&
4804             N0.getOperand(0).getOpcode() == ISD::XOR &&
4805             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4806           isOneConstant(N0.getOperand(1))) {
4807         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
4808         // can only do this if the top bits are known zero.
4809         unsigned BitWidth = N0.getValueSizeInBits();
4810         if (DAG.MaskedValueIsZero(N0,
4811                                   APInt::getHighBitsSet(BitWidth,
4812                                                         BitWidth-1))) {
4813           // Okay, get the un-inverted input value.
4814           SDValue Val;
4815           if (N0.getOpcode() == ISD::XOR) {
4816             Val = N0.getOperand(0);
4817           } else {
4818             assert(N0.getOpcode() == ISD::AND &&
4819                     N0.getOperand(0).getOpcode() == ISD::XOR);
4820             // ((X^1)&1)^1 -> X & 1
4821             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4822                               N0.getOperand(0).getOperand(0),
4823                               N0.getOperand(1));
4824           }
4825 
4826           return DAG.getSetCC(dl, VT, Val, N1,
4827                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4828         }
4829       } else if (N1C->isOne()) {
4830         SDValue Op0 = N0;
4831         if (Op0.getOpcode() == ISD::TRUNCATE)
4832           Op0 = Op0.getOperand(0);
4833 
4834         if ((Op0.getOpcode() == ISD::XOR) &&
4835             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4836             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4837           SDValue XorLHS = Op0.getOperand(0);
4838           SDValue XorRHS = Op0.getOperand(1);
4839           // Ensure that the input setccs return an i1 type or 0/1 value.
4840           if (Op0.getValueType() == MVT::i1 ||
4841               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4842                       ZeroOrOneBooleanContent &&
4843                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4844                         ZeroOrOneBooleanContent)) {
4845             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4846             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4847             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4848           }
4849         }
4850         if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4851           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4852           if (Op0.getValueType().bitsGT(VT))
4853             Op0 = DAG.getNode(ISD::AND, dl, VT,
4854                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4855                           DAG.getConstant(1, dl, VT));
4856           else if (Op0.getValueType().bitsLT(VT))
4857             Op0 = DAG.getNode(ISD::AND, dl, VT,
4858                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4859                         DAG.getConstant(1, dl, VT));
4860 
4861           return DAG.getSetCC(dl, VT, Op0,
4862                               DAG.getConstant(0, dl, Op0.getValueType()),
4863                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4864         }
4865         if (Op0.getOpcode() == ISD::AssertZext &&
4866             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4867           return DAG.getSetCC(dl, VT, Op0,
4868                               DAG.getConstant(0, dl, Op0.getValueType()),
4869                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4870       }
4871     }
4872 
4873     // Given:
4874     //   icmp eq/ne (urem %x, %y), 0
4875     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4876     //   icmp eq/ne %x, 0
4877     if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4878         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4879       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4880       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4881       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4882         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4883     }
4884 
4885     // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4886     //  and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4887     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4888         N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4889         N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4890         N1C->isAllOnes()) {
4891       return DAG.getSetCC(dl, VT, N0.getOperand(0),
4892                           DAG.getConstant(0, dl, OpVT),
4893                           Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4894     }
4895 
4896     if (SDValue V =
4897             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4898       return V;
4899   }
4900 
4901   // These simplifications apply to splat vectors as well.
4902   // TODO: Handle more splat vector cases.
4903   if (auto *N1C = isConstOrConstSplat(N1)) {
4904     const APInt &C1 = N1C->getAPIntValue();
4905 
4906     APInt MinVal, MaxVal;
4907     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4908     if (ISD::isSignedIntSetCC(Cond)) {
4909       MinVal = APInt::getSignedMinValue(OperandBitSize);
4910       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4911     } else {
4912       MinVal = APInt::getMinValue(OperandBitSize);
4913       MaxVal = APInt::getMaxValue(OperandBitSize);
4914     }
4915 
4916     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4917     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4918       // X >= MIN --> true
4919       if (C1 == MinVal)
4920         return DAG.getBoolConstant(true, dl, VT, OpVT);
4921 
4922       if (!VT.isVector()) { // TODO: Support this for vectors.
4923         // X >= C0 --> X > (C0 - 1)
4924         APInt C = C1 - 1;
4925         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4926         if ((DCI.isBeforeLegalizeOps() ||
4927              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4928             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4929                                   isLegalICmpImmediate(C.getSExtValue())))) {
4930           return DAG.getSetCC(dl, VT, N0,
4931                               DAG.getConstant(C, dl, N1.getValueType()),
4932                               NewCC);
4933         }
4934       }
4935     }
4936 
4937     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4938       // X <= MAX --> true
4939       if (C1 == MaxVal)
4940         return DAG.getBoolConstant(true, dl, VT, OpVT);
4941 
4942       // X <= C0 --> X < (C0 + 1)
4943       if (!VT.isVector()) { // TODO: Support this for vectors.
4944         APInt C = C1 + 1;
4945         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4946         if ((DCI.isBeforeLegalizeOps() ||
4947              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4948             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4949                                   isLegalICmpImmediate(C.getSExtValue())))) {
4950           return DAG.getSetCC(dl, VT, N0,
4951                               DAG.getConstant(C, dl, N1.getValueType()),
4952                               NewCC);
4953         }
4954       }
4955     }
4956 
4957     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4958       if (C1 == MinVal)
4959         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4960 
4961       // TODO: Support this for vectors after legalize ops.
4962       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4963         // Canonicalize setlt X, Max --> setne X, Max
4964         if (C1 == MaxVal)
4965           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4966 
4967         // If we have setult X, 1, turn it into seteq X, 0
4968         if (C1 == MinVal+1)
4969           return DAG.getSetCC(dl, VT, N0,
4970                               DAG.getConstant(MinVal, dl, N0.getValueType()),
4971                               ISD::SETEQ);
4972       }
4973     }
4974 
4975     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4976       if (C1 == MaxVal)
4977         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4978 
4979       // TODO: Support this for vectors after legalize ops.
4980       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4981         // Canonicalize setgt X, Min --> setne X, Min
4982         if (C1 == MinVal)
4983           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4984 
4985         // If we have setugt X, Max-1, turn it into seteq X, Max
4986         if (C1 == MaxVal-1)
4987           return DAG.getSetCC(dl, VT, N0,
4988                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
4989                               ISD::SETEQ);
4990       }
4991     }
4992 
4993     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4994       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4995       if (C1.isZero())
4996         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4997                 VT, N0, N1, Cond, DCI, dl))
4998           return CC;
4999 
5000       // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5001       // For example, when high 32-bits of i64 X are known clear:
5002       // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0
5003       // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1
5004       bool CmpZero = N1C->isZero();
5005       bool CmpNegOne = N1C->isAllOnes();
5006       if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5007         // Match or(lo,shl(hi,bw/2)) pattern.
5008         auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5009           unsigned EltBits = V.getScalarValueSizeInBits();
5010           if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5011             return false;
5012           SDValue LHS = V.getOperand(0);
5013           SDValue RHS = V.getOperand(1);
5014           APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5015           // Unshifted element must have zero upperbits.
5016           if (RHS.getOpcode() == ISD::SHL &&
5017               isa<ConstantSDNode>(RHS.getOperand(1)) &&
5018               RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5019               DAG.MaskedValueIsZero(LHS, HiBits)) {
5020             Lo = LHS;
5021             Hi = RHS.getOperand(0);
5022             return true;
5023           }
5024           if (LHS.getOpcode() == ISD::SHL &&
5025               isa<ConstantSDNode>(LHS.getOperand(1)) &&
5026               LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5027               DAG.MaskedValueIsZero(RHS, HiBits)) {
5028             Lo = RHS;
5029             Hi = LHS.getOperand(0);
5030             return true;
5031           }
5032           return false;
5033         };
5034 
5035         auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5036           unsigned EltBits = N0.getScalarValueSizeInBits();
5037           unsigned HalfBits = EltBits / 2;
5038           APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5039           SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5040           SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5041           SDValue NewN0 =
5042               DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5043           SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5044           return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5045         };
5046 
5047         SDValue Lo, Hi;
5048         if (IsConcat(N0, Lo, Hi))
5049           return MergeConcat(Lo, Hi);
5050 
5051         if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5052           SDValue Lo0, Lo1, Hi0, Hi1;
5053           if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5054               IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5055             return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5056                                DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5057           }
5058         }
5059       }
5060     }
5061 
5062     // If we have "setcc X, C0", check to see if we can shrink the immediate
5063     // by changing cc.
5064     // TODO: Support this for vectors after legalize ops.
5065     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5066       // SETUGT X, SINTMAX  -> SETLT X, 0
5067       // SETUGE X, SINTMIN -> SETLT X, 0
5068       if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5069           (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5070         return DAG.getSetCC(dl, VT, N0,
5071                             DAG.getConstant(0, dl, N1.getValueType()),
5072                             ISD::SETLT);
5073 
5074       // SETULT X, SINTMIN  -> SETGT X, -1
5075       // SETULE X, SINTMAX  -> SETGT X, -1
5076       if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5077           (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5078         return DAG.getSetCC(dl, VT, N0,
5079                             DAG.getAllOnesConstant(dl, N1.getValueType()),
5080                             ISD::SETGT);
5081     }
5082   }
5083 
5084   // Back to non-vector simplifications.
5085   // TODO: Can we do these for vector splats?
5086   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5087     const APInt &C1 = N1C->getAPIntValue();
5088     EVT ShValTy = N0.getValueType();
5089 
5090     // Fold bit comparisons when we can. This will result in an
5091     // incorrect value when boolean false is negative one, unless
5092     // the bitsize is 1 in which case the false value is the same
5093     // in practice regardless of the representation.
5094     if ((VT.getSizeInBits() == 1 ||
5095          getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
5096         (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5097         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5098         N0.getOpcode() == ISD::AND) {
5099       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5100         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
5101           // Perform the xform if the AND RHS is a single bit.
5102           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5103           if (AndRHS->getAPIntValue().isPowerOf2() &&
5104               !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5105             return DAG.getNode(
5106                 ISD::TRUNCATE, dl, VT,
5107                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5108                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5109           }
5110         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5111           // (X & 8) == 8  -->  (X & 8) >> 3
5112           // Perform the xform if C1 is a single bit.
5113           unsigned ShCt = C1.logBase2();
5114           if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5115             return DAG.getNode(
5116                 ISD::TRUNCATE, dl, VT,
5117                 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5118                             DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5119           }
5120         }
5121       }
5122     }
5123 
5124     if (C1.getSignificantBits() <= 64 &&
5125         !isLegalICmpImmediate(C1.getSExtValue())) {
5126       // (X & -256) == 256 -> (X >> 8) == 1
5127       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5128           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5129         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5130           const APInt &AndRHSC = AndRHS->getAPIntValue();
5131           if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5132             unsigned ShiftBits = AndRHSC.countr_zero();
5133             if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5134               SDValue Shift = DAG.getNode(
5135                   ISD::SRL, dl, ShValTy, N0.getOperand(0),
5136                   DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5137               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5138               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5139             }
5140           }
5141         }
5142       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5143                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5144         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5145         // X <  0x100000000 -> (X >> 32) <  1
5146         // X >= 0x100000000 -> (X >> 32) >= 1
5147         // X <= 0x0ffffffff -> (X >> 32) <  1
5148         // X >  0x0ffffffff -> (X >> 32) >= 1
5149         unsigned ShiftBits;
5150         APInt NewC = C1;
5151         ISD::CondCode NewCond = Cond;
5152         if (AdjOne) {
5153           ShiftBits = C1.countr_one();
5154           NewC = NewC + 1;
5155           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5156         } else {
5157           ShiftBits = C1.countr_zero();
5158         }
5159         NewC.lshrInPlace(ShiftBits);
5160         if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5161             isLegalICmpImmediate(NewC.getSExtValue()) &&
5162             !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5163           SDValue Shift =
5164               DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5165                           DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5166           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5167           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5168         }
5169       }
5170     }
5171   }
5172 
5173   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5174     auto *CFP = cast<ConstantFPSDNode>(N1);
5175     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5176 
5177     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
5178     // constant if knowing that the operand is non-nan is enough.  We prefer to
5179     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5180     // materialize 0.0.
5181     if (Cond == ISD::SETO || Cond == ISD::SETUO)
5182       return DAG.getSetCC(dl, VT, N0, N0, Cond);
5183 
5184     // setcc (fneg x), C -> setcc swap(pred) x, -C
5185     if (N0.getOpcode() == ISD::FNEG) {
5186       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
5187       if (DCI.isBeforeLegalizeOps() ||
5188           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5189         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5190         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5191       }
5192     }
5193 
5194     // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5195     if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
5196         !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5197       bool IsFabs = N0.getOpcode() == ISD::FABS;
5198       SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5199       if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5200         FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5201                                              : (IsFabs ? fcInf : fcPosInf);
5202         if (Cond == ISD::SETUEQ)
5203           Flag |= fcNan;
5204         return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5205                            DAG.getTargetConstant(Flag, dl, MVT::i32));
5206       }
5207     }
5208 
5209     // If the condition is not legal, see if we can find an equivalent one
5210     // which is legal.
5211     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
5212       // If the comparison was an awkward floating-point == or != and one of
5213       // the comparison operands is infinity or negative infinity, convert the
5214       // condition to a less-awkward <= or >=.
5215       if (CFP->getValueAPF().isInfinity()) {
5216         bool IsNegInf = CFP->getValueAPF().isNegative();
5217         ISD::CondCode NewCond = ISD::SETCC_INVALID;
5218         switch (Cond) {
5219         case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5220         case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5221         case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5222         case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5223         default: break;
5224         }
5225         if (NewCond != ISD::SETCC_INVALID &&
5226             isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5227           return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5228       }
5229     }
5230   }
5231 
5232   if (N0 == N1) {
5233     // The sext(setcc()) => setcc() optimization relies on the appropriate
5234     // constant being emitted.
5235     assert(!N0.getValueType().isInteger() &&
5236            "Integer types should be handled by FoldSetCC");
5237 
5238     bool EqTrue = ISD::isTrueWhenEqual(Cond);
5239     unsigned UOF = ISD::getUnorderedFlavor(Cond);
5240     if (UOF == 2) // FP operators that are undefined on NaNs.
5241       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5242     if (UOF == unsigned(EqTrue))
5243       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5244     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
5245     // if it is not already.
5246     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5247     if (NewCond != Cond &&
5248         (DCI.isBeforeLegalizeOps() ||
5249                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5250       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5251   }
5252 
5253   // ~X > ~Y --> Y > X
5254   // ~X < ~Y --> Y < X
5255   // ~X < C --> X > ~C
5256   // ~X > C --> X < ~C
5257   if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5258       N0.getValueType().isInteger()) {
5259     if (isBitwiseNot(N0)) {
5260       if (isBitwiseNot(N1))
5261         return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5262 
5263       if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
5264           !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
5265         SDValue Not = DAG.getNOT(dl, N1, OpVT);
5266         return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5267       }
5268     }
5269   }
5270 
5271   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5272       N0.getValueType().isInteger()) {
5273     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5274         N0.getOpcode() == ISD::XOR) {
5275       // Simplify (X+Y) == (X+Z) -->  Y == Z
5276       if (N0.getOpcode() == N1.getOpcode()) {
5277         if (N0.getOperand(0) == N1.getOperand(0))
5278           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5279         if (N0.getOperand(1) == N1.getOperand(1))
5280           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5281         if (isCommutativeBinOp(N0.getOpcode())) {
5282           // If X op Y == Y op X, try other combinations.
5283           if (N0.getOperand(0) == N1.getOperand(1))
5284             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5285                                 Cond);
5286           if (N0.getOperand(1) == N1.getOperand(0))
5287             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5288                                 Cond);
5289         }
5290       }
5291 
5292       // If RHS is a legal immediate value for a compare instruction, we need
5293       // to be careful about increasing register pressure needlessly.
5294       bool LegalRHSImm = false;
5295 
5296       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5297         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5298           // Turn (X+C1) == C2 --> X == C2-C1
5299           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5300             return DAG.getSetCC(
5301                 dl, VT, N0.getOperand(0),
5302                 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5303                                 dl, N0.getValueType()),
5304                 Cond);
5305 
5306           // Turn (X^C1) == C2 --> X == C1^C2
5307           if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5308             return DAG.getSetCC(
5309                 dl, VT, N0.getOperand(0),
5310                 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5311                                 dl, N0.getValueType()),
5312                 Cond);
5313         }
5314 
5315         // Turn (C1-X) == C2 --> X == C1-C2
5316         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5317           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5318             return DAG.getSetCC(
5319                 dl, VT, N0.getOperand(1),
5320                 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5321                                 dl, N0.getValueType()),
5322                 Cond);
5323 
5324         // Could RHSC fold directly into a compare?
5325         if (RHSC->getValueType(0).getSizeInBits() <= 64)
5326           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5327       }
5328 
5329       // (X+Y) == X --> Y == 0 and similar folds.
5330       // Don't do this if X is an immediate that can fold into a cmp
5331       // instruction and X+Y has other uses. It could be an induction variable
5332       // chain, and the transform would increase register pressure.
5333       if (!LegalRHSImm || N0.hasOneUse())
5334         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5335           return V;
5336     }
5337 
5338     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5339         N1.getOpcode() == ISD::XOR)
5340       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5341         return V;
5342 
5343     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5344       return V;
5345   }
5346 
5347   // Fold remainder of division by a constant.
5348   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5349       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5350     // When division is cheap or optimizing for minimum size,
5351     // fall through to DIVREM creation by skipping this fold.
5352     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5353       if (N0.getOpcode() == ISD::UREM) {
5354         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5355           return Folded;
5356       } else if (N0.getOpcode() == ISD::SREM) {
5357         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5358           return Folded;
5359       }
5360     }
5361   }
5362 
5363   // Fold away ALL boolean setcc's.
5364   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5365     SDValue Temp;
5366     switch (Cond) {
5367     default: llvm_unreachable("Unknown integer setcc!");
5368     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
5369       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5370       N0 = DAG.getNOT(dl, Temp, OpVT);
5371       if (!DCI.isCalledByLegalizer())
5372         DCI.AddToWorklist(Temp.getNode());
5373       break;
5374     case ISD::SETNE:  // X != Y   -->  (X^Y)
5375       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5376       break;
5377     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
5378     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
5379       Temp = DAG.getNOT(dl, N0, OpVT);
5380       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5381       if (!DCI.isCalledByLegalizer())
5382         DCI.AddToWorklist(Temp.getNode());
5383       break;
5384     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
5385     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
5386       Temp = DAG.getNOT(dl, N1, OpVT);
5387       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5388       if (!DCI.isCalledByLegalizer())
5389         DCI.AddToWorklist(Temp.getNode());
5390       break;
5391     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
5392     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
5393       Temp = DAG.getNOT(dl, N0, OpVT);
5394       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5395       if (!DCI.isCalledByLegalizer())
5396         DCI.AddToWorklist(Temp.getNode());
5397       break;
5398     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
5399     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
5400       Temp = DAG.getNOT(dl, N1, OpVT);
5401       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5402       break;
5403     }
5404     if (VT.getScalarType() != MVT::i1) {
5405       if (!DCI.isCalledByLegalizer())
5406         DCI.AddToWorklist(N0.getNode());
5407       // FIXME: If running after legalize, we probably can't do this.
5408       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
5409       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5410     }
5411     return N0;
5412   }
5413 
5414   // Could not fold it.
5415   return SDValue();
5416 }
5417 
5418 /// Returns true (and the GlobalValue and the offset) if the node is a
5419 /// GlobalAddress + offset.
5420 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5421                                     int64_t &Offset) const {
5422 
5423   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5424 
5425   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5426     GA = GASD->getGlobal();
5427     Offset += GASD->getOffset();
5428     return true;
5429   }
5430 
5431   if (N->getOpcode() == ISD::ADD) {
5432     SDValue N1 = N->getOperand(0);
5433     SDValue N2 = N->getOperand(1);
5434     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5435       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5436         Offset += V->getSExtValue();
5437         return true;
5438       }
5439     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5440       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5441         Offset += V->getSExtValue();
5442         return true;
5443       }
5444     }
5445   }
5446 
5447   return false;
5448 }
5449 
5450 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5451                                           DAGCombinerInfo &DCI) const {
5452   // Default implementation: no optimization.
5453   return SDValue();
5454 }
5455 
5456 //===----------------------------------------------------------------------===//
5457 //  Inline Assembler Implementation Methods
5458 //===----------------------------------------------------------------------===//
5459 
5460 TargetLowering::ConstraintType
5461 TargetLowering::getConstraintType(StringRef Constraint) const {
5462   unsigned S = Constraint.size();
5463 
5464   if (S == 1) {
5465     switch (Constraint[0]) {
5466     default: break;
5467     case 'r':
5468       return C_RegisterClass;
5469     case 'm': // memory
5470     case 'o': // offsetable
5471     case 'V': // not offsetable
5472       return C_Memory;
5473     case 'p': // Address.
5474       return C_Address;
5475     case 'n': // Simple Integer
5476     case 'E': // Floating Point Constant
5477     case 'F': // Floating Point Constant
5478       return C_Immediate;
5479     case 'i': // Simple Integer or Relocatable Constant
5480     case 's': // Relocatable Constant
5481     case 'X': // Allow ANY value.
5482     case 'I': // Target registers.
5483     case 'J':
5484     case 'K':
5485     case 'L':
5486     case 'M':
5487     case 'N':
5488     case 'O':
5489     case 'P':
5490     case '<':
5491     case '>':
5492       return C_Other;
5493     }
5494   }
5495 
5496   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5497     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5498       return C_Memory;
5499     return C_Register;
5500   }
5501   return C_Unknown;
5502 }
5503 
5504 /// Try to replace an X constraint, which matches anything, with another that
5505 /// has more specific requirements based on the type of the corresponding
5506 /// operand.
5507 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5508   if (ConstraintVT.isInteger())
5509     return "r";
5510   if (ConstraintVT.isFloatingPoint())
5511     return "f"; // works for many targets
5512   return nullptr;
5513 }
5514 
5515 SDValue TargetLowering::LowerAsmOutputForConstraint(
5516     SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5517     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5518   return SDValue();
5519 }
5520 
5521 /// Lower the specified operand into the Ops vector.
5522 /// If it is invalid, don't add anything to Ops.
5523 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5524                                                   StringRef Constraint,
5525                                                   std::vector<SDValue> &Ops,
5526                                                   SelectionDAG &DAG) const {
5527 
5528   if (Constraint.size() > 1)
5529     return;
5530 
5531   char ConstraintLetter = Constraint[0];
5532   switch (ConstraintLetter) {
5533   default: break;
5534   case 'X':    // Allows any operand
5535   case 'i':    // Simple Integer or Relocatable Constant
5536   case 'n':    // Simple Integer
5537   case 's': {  // Relocatable Constant
5538 
5539     ConstantSDNode *C;
5540     uint64_t Offset = 0;
5541 
5542     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5543     // etc., since getelementpointer is variadic. We can't use
5544     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5545     // while in this case the GA may be furthest from the root node which is
5546     // likely an ISD::ADD.
5547     while (true) {
5548       if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5549         // gcc prints these as sign extended.  Sign extend value to 64 bits
5550         // now; without this it would get ZExt'd later in
5551         // ScheduleDAGSDNodes::EmitNode, which is very generic.
5552         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5553         BooleanContent BCont = getBooleanContents(MVT::i64);
5554         ISD::NodeType ExtOpc =
5555             IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5556         int64_t ExtVal =
5557             ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5558         Ops.push_back(
5559             DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5560         return;
5561       }
5562       if (ConstraintLetter != 'n') {
5563         if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5564           Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5565                                                    GA->getValueType(0),
5566                                                    Offset + GA->getOffset()));
5567           return;
5568         }
5569         if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5570           Ops.push_back(DAG.getTargetBlockAddress(
5571               BA->getBlockAddress(), BA->getValueType(0),
5572               Offset + BA->getOffset(), BA->getTargetFlags()));
5573           return;
5574         }
5575         if (isa<BasicBlockSDNode>(Op)) {
5576           Ops.push_back(Op);
5577           return;
5578         }
5579       }
5580       const unsigned OpCode = Op.getOpcode();
5581       if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5582         if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5583           Op = Op.getOperand(1);
5584         // Subtraction is not commutative.
5585         else if (OpCode == ISD::ADD &&
5586                  (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5587           Op = Op.getOperand(0);
5588         else
5589           return;
5590         Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5591         continue;
5592       }
5593       return;
5594     }
5595     break;
5596   }
5597   }
5598 }
5599 
5600 void TargetLowering::CollectTargetIntrinsicOperands(
5601     const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5602 }
5603 
5604 std::pair<unsigned, const TargetRegisterClass *>
5605 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5606                                              StringRef Constraint,
5607                                              MVT VT) const {
5608   if (!Constraint.starts_with("{"))
5609     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5610   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5611 
5612   // Remove the braces from around the name.
5613   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5614 
5615   std::pair<unsigned, const TargetRegisterClass *> R =
5616       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5617 
5618   // Figure out which register class contains this reg.
5619   for (const TargetRegisterClass *RC : RI->regclasses()) {
5620     // If none of the value types for this register class are valid, we
5621     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5622     if (!isLegalRC(*RI, *RC))
5623       continue;
5624 
5625     for (const MCPhysReg &PR : *RC) {
5626       if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5627         std::pair<unsigned, const TargetRegisterClass *> S =
5628             std::make_pair(PR, RC);
5629 
5630         // If this register class has the requested value type, return it,
5631         // otherwise keep searching and return the first class found
5632         // if no other is found which explicitly has the requested type.
5633         if (RI->isTypeLegalForClass(*RC, VT))
5634           return S;
5635         if (!R.second)
5636           R = S;
5637       }
5638     }
5639   }
5640 
5641   return R;
5642 }
5643 
5644 //===----------------------------------------------------------------------===//
5645 // Constraint Selection.
5646 
5647 /// Return true of this is an input operand that is a matching constraint like
5648 /// "4".
5649 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5650   assert(!ConstraintCode.empty() && "No known constraint!");
5651   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5652 }
5653 
5654 /// If this is an input matching constraint, this method returns the output
5655 /// operand it matches.
5656 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5657   assert(!ConstraintCode.empty() && "No known constraint!");
5658   return atoi(ConstraintCode.c_str());
5659 }
5660 
5661 /// Split up the constraint string from the inline assembly value into the
5662 /// specific constraints and their prefixes, and also tie in the associated
5663 /// operand values.
5664 /// If this returns an empty vector, and if the constraint string itself
5665 /// isn't empty, there was an error parsing.
5666 TargetLowering::AsmOperandInfoVector
5667 TargetLowering::ParseConstraints(const DataLayout &DL,
5668                                  const TargetRegisterInfo *TRI,
5669                                  const CallBase &Call) const {
5670   /// Information about all of the constraints.
5671   AsmOperandInfoVector ConstraintOperands;
5672   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5673   unsigned maCount = 0; // Largest number of multiple alternative constraints.
5674 
5675   // Do a prepass over the constraints, canonicalizing them, and building up the
5676   // ConstraintOperands list.
5677   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5678   unsigned ResNo = 0; // ResNo - The result number of the next output.
5679   unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5680 
5681   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5682     ConstraintOperands.emplace_back(std::move(CI));
5683     AsmOperandInfo &OpInfo = ConstraintOperands.back();
5684 
5685     // Update multiple alternative constraint count.
5686     if (OpInfo.multipleAlternatives.size() > maCount)
5687       maCount = OpInfo.multipleAlternatives.size();
5688 
5689     OpInfo.ConstraintVT = MVT::Other;
5690 
5691     // Compute the value type for each operand.
5692     switch (OpInfo.Type) {
5693     case InlineAsm::isOutput:
5694       // Indirect outputs just consume an argument.
5695       if (OpInfo.isIndirect) {
5696         OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5697         break;
5698       }
5699 
5700       // The return value of the call is this value.  As such, there is no
5701       // corresponding argument.
5702       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5703       if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5704         OpInfo.ConstraintVT =
5705             getSimpleValueType(DL, STy->getElementType(ResNo));
5706       } else {
5707         assert(ResNo == 0 && "Asm only has one result!");
5708         OpInfo.ConstraintVT =
5709             getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5710       }
5711       ++ResNo;
5712       break;
5713     case InlineAsm::isInput:
5714       OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5715       break;
5716     case InlineAsm::isLabel:
5717       OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5718       ++LabelNo;
5719       continue;
5720     case InlineAsm::isClobber:
5721       // Nothing to do.
5722       break;
5723     }
5724 
5725     if (OpInfo.CallOperandVal) {
5726       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5727       if (OpInfo.isIndirect) {
5728         OpTy = Call.getParamElementType(ArgNo);
5729         assert(OpTy && "Indirect operand must have elementtype attribute");
5730       }
5731 
5732       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5733       if (StructType *STy = dyn_cast<StructType>(OpTy))
5734         if (STy->getNumElements() == 1)
5735           OpTy = STy->getElementType(0);
5736 
5737       // If OpTy is not a single value, it may be a struct/union that we
5738       // can tile with integers.
5739       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5740         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5741         switch (BitSize) {
5742         default: break;
5743         case 1:
5744         case 8:
5745         case 16:
5746         case 32:
5747         case 64:
5748         case 128:
5749           OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5750           break;
5751         }
5752       }
5753 
5754       EVT VT = getAsmOperandValueType(DL, OpTy, true);
5755       OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5756       ArgNo++;
5757     }
5758   }
5759 
5760   // If we have multiple alternative constraints, select the best alternative.
5761   if (!ConstraintOperands.empty()) {
5762     if (maCount) {
5763       unsigned bestMAIndex = 0;
5764       int bestWeight = -1;
5765       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
5766       int weight = -1;
5767       unsigned maIndex;
5768       // Compute the sums of the weights for each alternative, keeping track
5769       // of the best (highest weight) one so far.
5770       for (maIndex = 0; maIndex < maCount; ++maIndex) {
5771         int weightSum = 0;
5772         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5773              cIndex != eIndex; ++cIndex) {
5774           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5775           if (OpInfo.Type == InlineAsm::isClobber)
5776             continue;
5777 
5778           // If this is an output operand with a matching input operand,
5779           // look up the matching input. If their types mismatch, e.g. one
5780           // is an integer, the other is floating point, or their sizes are
5781           // different, flag it as an maCantMatch.
5782           if (OpInfo.hasMatchingInput()) {
5783             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5784             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5785               if ((OpInfo.ConstraintVT.isInteger() !=
5786                    Input.ConstraintVT.isInteger()) ||
5787                   (OpInfo.ConstraintVT.getSizeInBits() !=
5788                    Input.ConstraintVT.getSizeInBits())) {
5789                 weightSum = -1; // Can't match.
5790                 break;
5791               }
5792             }
5793           }
5794           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5795           if (weight == -1) {
5796             weightSum = -1;
5797             break;
5798           }
5799           weightSum += weight;
5800         }
5801         // Update best.
5802         if (weightSum > bestWeight) {
5803           bestWeight = weightSum;
5804           bestMAIndex = maIndex;
5805         }
5806       }
5807 
5808       // Now select chosen alternative in each constraint.
5809       for (AsmOperandInfo &cInfo : ConstraintOperands)
5810         if (cInfo.Type != InlineAsm::isClobber)
5811           cInfo.selectAlternative(bestMAIndex);
5812     }
5813   }
5814 
5815   // Check and hook up tied operands, choose constraint code to use.
5816   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5817        cIndex != eIndex; ++cIndex) {
5818     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5819 
5820     // If this is an output operand with a matching input operand, look up the
5821     // matching input. If their types mismatch, e.g. one is an integer, the
5822     // other is floating point, or their sizes are different, flag it as an
5823     // error.
5824     if (OpInfo.hasMatchingInput()) {
5825       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5826 
5827       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5828         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5829             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5830                                          OpInfo.ConstraintVT);
5831         std::pair<unsigned, const TargetRegisterClass *> InputRC =
5832             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5833                                          Input.ConstraintVT);
5834         const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
5835                                     OpInfo.ConstraintVT.isFloatingPoint();
5836         const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
5837                                    Input.ConstraintVT.isFloatingPoint();
5838         if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
5839             (MatchRC.second != InputRC.second)) {
5840           report_fatal_error("Unsupported asm: input constraint"
5841                              " with a matching output constraint of"
5842                              " incompatible type!");
5843         }
5844       }
5845     }
5846   }
5847 
5848   return ConstraintOperands;
5849 }
5850 
5851 /// Return a number indicating our preference for chosing a type of constraint
5852 /// over another, for the purpose of sorting them. Immediates are almost always
5853 /// preferrable (when they can be emitted). A higher return value means a
5854 /// stronger preference for one constraint type relative to another.
5855 /// FIXME: We should prefer registers over memory but doing so may lead to
5856 /// unrecoverable register exhaustion later.
5857 /// https://github.com/llvm/llvm-project/issues/20571
5858 static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5859   switch (CT) {
5860   case TargetLowering::C_Immediate:
5861   case TargetLowering::C_Other:
5862     return 4;
5863   case TargetLowering::C_Memory:
5864   case TargetLowering::C_Address:
5865     return 3;
5866   case TargetLowering::C_RegisterClass:
5867     return 2;
5868   case TargetLowering::C_Register:
5869     return 1;
5870   case TargetLowering::C_Unknown:
5871     return 0;
5872   }
5873   llvm_unreachable("Invalid constraint type");
5874 }
5875 
5876 /// Examine constraint type and operand type and determine a weight value.
5877 /// This object must already have been set up with the operand type
5878 /// and the current alternative constraint selected.
5879 TargetLowering::ConstraintWeight
5880   TargetLowering::getMultipleConstraintMatchWeight(
5881     AsmOperandInfo &info, int maIndex) const {
5882   InlineAsm::ConstraintCodeVector *rCodes;
5883   if (maIndex >= (int)info.multipleAlternatives.size())
5884     rCodes = &info.Codes;
5885   else
5886     rCodes = &info.multipleAlternatives[maIndex].Codes;
5887   ConstraintWeight BestWeight = CW_Invalid;
5888 
5889   // Loop over the options, keeping track of the most general one.
5890   for (const std::string &rCode : *rCodes) {
5891     ConstraintWeight weight =
5892         getSingleConstraintMatchWeight(info, rCode.c_str());
5893     if (weight > BestWeight)
5894       BestWeight = weight;
5895   }
5896 
5897   return BestWeight;
5898 }
5899 
5900 /// Examine constraint type and operand type and determine a weight value.
5901 /// This object must already have been set up with the operand type
5902 /// and the current alternative constraint selected.
5903 TargetLowering::ConstraintWeight
5904   TargetLowering::getSingleConstraintMatchWeight(
5905     AsmOperandInfo &info, const char *constraint) const {
5906   ConstraintWeight weight = CW_Invalid;
5907   Value *CallOperandVal = info.CallOperandVal;
5908     // If we don't have a value, we can't do a match,
5909     // but allow it at the lowest weight.
5910   if (!CallOperandVal)
5911     return CW_Default;
5912   // Look at the constraint type.
5913   switch (*constraint) {
5914     case 'i': // immediate integer.
5915     case 'n': // immediate integer with a known value.
5916       if (isa<ConstantInt>(CallOperandVal))
5917         weight = CW_Constant;
5918       break;
5919     case 's': // non-explicit intregal immediate.
5920       if (isa<GlobalValue>(CallOperandVal))
5921         weight = CW_Constant;
5922       break;
5923     case 'E': // immediate float if host format.
5924     case 'F': // immediate float.
5925       if (isa<ConstantFP>(CallOperandVal))
5926         weight = CW_Constant;
5927       break;
5928     case '<': // memory operand with autodecrement.
5929     case '>': // memory operand with autoincrement.
5930     case 'm': // memory operand.
5931     case 'o': // offsettable memory operand
5932     case 'V': // non-offsettable memory operand
5933       weight = CW_Memory;
5934       break;
5935     case 'r': // general register.
5936     case 'g': // general register, memory operand or immediate integer.
5937               // note: Clang converts "g" to "imr".
5938       if (CallOperandVal->getType()->isIntegerTy())
5939         weight = CW_Register;
5940       break;
5941     case 'X': // any operand.
5942   default:
5943     weight = CW_Default;
5944     break;
5945   }
5946   return weight;
5947 }
5948 
5949 /// If there are multiple different constraints that we could pick for this
5950 /// operand (e.g. "imr") try to pick the 'best' one.
5951 /// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5952 /// into seven classes:
5953 ///    Register      -> one specific register
5954 ///    RegisterClass -> a group of regs
5955 ///    Memory        -> memory
5956 ///    Address       -> a symbolic memory reference
5957 ///    Immediate     -> immediate values
5958 ///    Other         -> magic values (such as "Flag Output Operands")
5959 ///    Unknown       -> something we don't recognize yet and can't handle
5960 /// Ideally, we would pick the most specific constraint possible: if we have
5961 /// something that fits into a register, we would pick it.  The problem here
5962 /// is that if we have something that could either be in a register or in
5963 /// memory that use of the register could cause selection of *other*
5964 /// operands to fail: they might only succeed if we pick memory.  Because of
5965 /// this the heuristic we use is:
5966 ///
5967 ///  1) If there is an 'other' constraint, and if the operand is valid for
5968 ///     that constraint, use it.  This makes us take advantage of 'i'
5969 ///     constraints when available.
5970 ///  2) Otherwise, pick the most general constraint present.  This prefers
5971 ///     'm' over 'r', for example.
5972 ///
5973 TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
5974     TargetLowering::AsmOperandInfo &OpInfo) const {
5975   ConstraintGroup Ret;
5976 
5977   Ret.reserve(OpInfo.Codes.size());
5978   for (StringRef Code : OpInfo.Codes) {
5979     TargetLowering::ConstraintType CType = getConstraintType(Code);
5980 
5981     // Indirect 'other' or 'immediate' constraints are not allowed.
5982     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5983                                CType == TargetLowering::C_Register ||
5984                                CType == TargetLowering::C_RegisterClass))
5985       continue;
5986 
5987     // Things with matching constraints can only be registers, per gcc
5988     // documentation.  This mainly affects "g" constraints.
5989     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5990       continue;
5991 
5992     Ret.emplace_back(Code, CType);
5993   }
5994 
5995   std::stable_sort(
5996       Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
5997         return getConstraintPiority(a.second) > getConstraintPiority(b.second);
5998       });
5999 
6000   return Ret;
6001 }
6002 
6003 /// If we have an immediate, see if we can lower it. Return true if we can,
6004 /// false otherwise.
6005 static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6006                                      SDValue Op, SelectionDAG *DAG,
6007                                      const TargetLowering &TLI) {
6008 
6009   assert((P.second == TargetLowering::C_Other ||
6010           P.second == TargetLowering::C_Immediate) &&
6011          "need immediate or other");
6012 
6013   if (!Op.getNode())
6014     return false;
6015 
6016   std::vector<SDValue> ResultOps;
6017   TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6018   return !ResultOps.empty();
6019 }
6020 
6021 /// Determines the constraint code and constraint type to use for the specific
6022 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6023 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6024                                             SDValue Op,
6025                                             SelectionDAG *DAG) const {
6026   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6027 
6028   // Single-letter constraints ('r') are very common.
6029   if (OpInfo.Codes.size() == 1) {
6030     OpInfo.ConstraintCode = OpInfo.Codes[0];
6031     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6032   } else {
6033     ConstraintGroup G = getConstraintPreferences(OpInfo);
6034     if (G.empty())
6035       return;
6036 
6037     unsigned BestIdx = 0;
6038     for (const unsigned E = G.size();
6039          BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6040                          G[BestIdx].second == TargetLowering::C_Immediate);
6041          ++BestIdx) {
6042       if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6043         break;
6044       // If we're out of constraints, just pick the first one.
6045       if (BestIdx + 1 == E) {
6046         BestIdx = 0;
6047         break;
6048       }
6049     }
6050 
6051     OpInfo.ConstraintCode = G[BestIdx].first;
6052     OpInfo.ConstraintType = G[BestIdx].second;
6053   }
6054 
6055   // 'X' matches anything.
6056   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6057     // Constants are handled elsewhere.  For Functions, the type here is the
6058     // type of the result, which is not what we want to look at; leave them
6059     // alone.
6060     Value *v = OpInfo.CallOperandVal;
6061     if (isa<ConstantInt>(v) || isa<Function>(v)) {
6062       return;
6063     }
6064 
6065     if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6066       OpInfo.ConstraintCode = "i";
6067       return;
6068     }
6069 
6070     // Otherwise, try to resolve it to something we know about by looking at
6071     // the actual operand type.
6072     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6073       OpInfo.ConstraintCode = Repl;
6074       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6075     }
6076   }
6077 }
6078 
6079 /// Given an exact SDIV by a constant, create a multiplication
6080 /// with the multiplicative inverse of the constant.
6081 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6082 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6083                               const SDLoc &dl, SelectionDAG &DAG,
6084                               SmallVectorImpl<SDNode *> &Created) {
6085   SDValue Op0 = N->getOperand(0);
6086   SDValue Op1 = N->getOperand(1);
6087   EVT VT = N->getValueType(0);
6088   EVT SVT = VT.getScalarType();
6089   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6090   EVT ShSVT = ShVT.getScalarType();
6091 
6092   bool UseSRA = false;
6093   SmallVector<SDValue, 16> Shifts, Factors;
6094 
6095   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6096     if (C->isZero())
6097       return false;
6098     APInt Divisor = C->getAPIntValue();
6099     unsigned Shift = Divisor.countr_zero();
6100     if (Shift) {
6101       Divisor.ashrInPlace(Shift);
6102       UseSRA = true;
6103     }
6104     APInt Factor = Divisor.multiplicativeInverse();
6105     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6106     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6107     return true;
6108   };
6109 
6110   // Collect all magic values from the build vector.
6111   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6112     return SDValue();
6113 
6114   SDValue Shift, Factor;
6115   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6116     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6117     Factor = DAG.getBuildVector(VT, dl, Factors);
6118   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6119     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6120            "Expected matchUnaryPredicate to return one element for scalable "
6121            "vectors");
6122     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6123     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6124   } else {
6125     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6126     Shift = Shifts[0];
6127     Factor = Factors[0];
6128   }
6129 
6130   SDValue Res = Op0;
6131   if (UseSRA) {
6132     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6133     Created.push_back(Res.getNode());
6134   }
6135 
6136   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6137 }
6138 
6139 /// Given an exact UDIV by a constant, create a multiplication
6140 /// with the multiplicative inverse of the constant.
6141 /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6142 static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6143                               const SDLoc &dl, SelectionDAG &DAG,
6144                               SmallVectorImpl<SDNode *> &Created) {
6145   EVT VT = N->getValueType(0);
6146   EVT SVT = VT.getScalarType();
6147   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6148   EVT ShSVT = ShVT.getScalarType();
6149 
6150   bool UseSRL = false;
6151   SmallVector<SDValue, 16> Shifts, Factors;
6152 
6153   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6154     if (C->isZero())
6155       return false;
6156     APInt Divisor = C->getAPIntValue();
6157     unsigned Shift = Divisor.countr_zero();
6158     if (Shift) {
6159       Divisor.lshrInPlace(Shift);
6160       UseSRL = true;
6161     }
6162     // Calculate the multiplicative inverse modulo BW.
6163     APInt Factor = Divisor.multiplicativeInverse();
6164     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6165     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6166     return true;
6167   };
6168 
6169   SDValue Op1 = N->getOperand(1);
6170 
6171   // Collect all magic values from the build vector.
6172   if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6173     return SDValue();
6174 
6175   SDValue Shift, Factor;
6176   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6177     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6178     Factor = DAG.getBuildVector(VT, dl, Factors);
6179   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6180     assert(Shifts.size() == 1 && Factors.size() == 1 &&
6181            "Expected matchUnaryPredicate to return one element for scalable "
6182            "vectors");
6183     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6184     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6185   } else {
6186     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6187     Shift = Shifts[0];
6188     Factor = Factors[0];
6189   }
6190 
6191   SDValue Res = N->getOperand(0);
6192   if (UseSRL) {
6193     Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6194     Created.push_back(Res.getNode());
6195   }
6196 
6197   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6198 }
6199 
6200 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6201                               SelectionDAG &DAG,
6202                               SmallVectorImpl<SDNode *> &Created) const {
6203   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6204   if (isIntDivCheap(N->getValueType(0), Attr))
6205     return SDValue(N, 0); // Lower SDIV as SDIV
6206   return SDValue();
6207 }
6208 
6209 SDValue
6210 TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6211                               SelectionDAG &DAG,
6212                               SmallVectorImpl<SDNode *> &Created) const {
6213   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6214   if (isIntDivCheap(N->getValueType(0), Attr))
6215     return SDValue(N, 0); // Lower SREM as SREM
6216   return SDValue();
6217 }
6218 
6219 /// Build sdiv by power-of-2 with conditional move instructions
6220 /// Ref: "Hacker's Delight" by Henry Warren 10-1
6221 /// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6222 ///   bgez x, label
6223 ///   add x, x, 2**k-1
6224 /// label:
6225 ///   sra res, x, k
6226 ///   neg res, res (when the divisor is negative)
6227 SDValue TargetLowering::buildSDIVPow2WithCMov(
6228     SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6229     SmallVectorImpl<SDNode *> &Created) const {
6230   unsigned Lg2 = Divisor.countr_zero();
6231   EVT VT = N->getValueType(0);
6232 
6233   SDLoc DL(N);
6234   SDValue N0 = N->getOperand(0);
6235   SDValue Zero = DAG.getConstant(0, DL, VT);
6236   APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6237   SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6238 
6239   // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6240   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6241   SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6242   SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6243   SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6244 
6245   Created.push_back(Cmp.getNode());
6246   Created.push_back(Add.getNode());
6247   Created.push_back(CMov.getNode());
6248 
6249   // Divide by pow2.
6250   SDValue SRA =
6251       DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6252 
6253   // If we're dividing by a positive value, we're done.  Otherwise, we must
6254   // negate the result.
6255   if (Divisor.isNonNegative())
6256     return SRA;
6257 
6258   Created.push_back(SRA.getNode());
6259   return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6260 }
6261 
6262 /// Given an ISD::SDIV node expressing a divide by constant,
6263 /// return a DAG expression to select that will generate the same value by
6264 /// multiplying by a magic number.
6265 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6266 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6267                                   bool IsAfterLegalization,
6268                                   bool IsAfterLegalTypes,
6269                                   SmallVectorImpl<SDNode *> &Created) const {
6270   SDLoc dl(N);
6271   EVT VT = N->getValueType(0);
6272   EVT SVT = VT.getScalarType();
6273   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6274   EVT ShSVT = ShVT.getScalarType();
6275   unsigned EltBits = VT.getScalarSizeInBits();
6276   EVT MulVT;
6277 
6278   // Check to see if we can do this.
6279   // FIXME: We should be more aggressive here.
6280   if (!isTypeLegal(VT)) {
6281     // Limit this to simple scalars for now.
6282     if (VT.isVector() || !VT.isSimple())
6283       return SDValue();
6284 
6285     // If this type will be promoted to a large enough type with a legal
6286     // multiply operation, we can go ahead and do this transform.
6287     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6288       return SDValue();
6289 
6290     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6291     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6292         !isOperationLegal(ISD::MUL, MulVT))
6293       return SDValue();
6294   }
6295 
6296   // If the sdiv has an 'exact' bit we can use a simpler lowering.
6297   if (N->getFlags().hasExact())
6298     return BuildExactSDIV(*this, N, dl, DAG, Created);
6299 
6300   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6301 
6302   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6303     if (C->isZero())
6304       return false;
6305 
6306     const APInt &Divisor = C->getAPIntValue();
6307     SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
6308     int NumeratorFactor = 0;
6309     int ShiftMask = -1;
6310 
6311     if (Divisor.isOne() || Divisor.isAllOnes()) {
6312       // If d is +1/-1, we just multiply the numerator by +1/-1.
6313       NumeratorFactor = Divisor.getSExtValue();
6314       magics.Magic = 0;
6315       magics.ShiftAmount = 0;
6316       ShiftMask = 0;
6317     } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6318       // If d > 0 and m < 0, add the numerator.
6319       NumeratorFactor = 1;
6320     } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6321       // If d < 0 and m > 0, subtract the numerator.
6322       NumeratorFactor = -1;
6323     }
6324 
6325     MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6326     Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6327     Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6328     ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6329     return true;
6330   };
6331 
6332   SDValue N0 = N->getOperand(0);
6333   SDValue N1 = N->getOperand(1);
6334 
6335   // Collect the shifts / magic values from each element.
6336   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6337     return SDValue();
6338 
6339   SDValue MagicFactor, Factor, Shift, ShiftMask;
6340   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6341     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6342     Factor = DAG.getBuildVector(VT, dl, Factors);
6343     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6344     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6345   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6346     assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6347            Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6348            "Expected matchUnaryPredicate to return one element for scalable "
6349            "vectors");
6350     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6351     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6352     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6353     ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6354   } else {
6355     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6356     MagicFactor = MagicFactors[0];
6357     Factor = Factors[0];
6358     Shift = Shifts[0];
6359     ShiftMask = ShiftMasks[0];
6360   }
6361 
6362   // Multiply the numerator (operand 0) by the magic value.
6363   // FIXME: We should support doing a MUL in a wider type.
6364   auto GetMULHS = [&](SDValue X, SDValue Y) {
6365     // If the type isn't legal, use a wider mul of the type calculated
6366     // earlier.
6367     if (!isTypeLegal(VT)) {
6368       X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6369       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6370       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6371       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6372                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6373       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6374     }
6375 
6376     if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6377       return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6378     if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6379       SDValue LoHi =
6380           DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6381       return SDValue(LoHi.getNode(), 1);
6382     }
6383     // If type twice as wide legal, widen and use a mul plus a shift.
6384     unsigned Size = VT.getScalarSizeInBits();
6385     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6386     if (VT.isVector())
6387       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6388                                 VT.getVectorElementCount());
6389     // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6390     // custom lowered. This is very expensive so avoid it at all costs for
6391     // constant divisors.
6392     if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6393          isOperationCustom(ISD::SDIVREM, VT.getScalarType())) ||
6394         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6395       X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6396       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6397       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6398       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6399                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6400       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6401     }
6402     return SDValue();
6403   };
6404 
6405   SDValue Q = GetMULHS(N0, MagicFactor);
6406   if (!Q)
6407     return SDValue();
6408 
6409   Created.push_back(Q.getNode());
6410 
6411   // (Optionally) Add/subtract the numerator using Factor.
6412   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6413   Created.push_back(Factor.getNode());
6414   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6415   Created.push_back(Q.getNode());
6416 
6417   // Shift right algebraic by shift value.
6418   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6419   Created.push_back(Q.getNode());
6420 
6421   // Extract the sign bit, mask it and add it to the quotient.
6422   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6423   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6424   Created.push_back(T.getNode());
6425   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6426   Created.push_back(T.getNode());
6427   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6428 }
6429 
6430 /// Given an ISD::UDIV node expressing a divide by constant,
6431 /// return a DAG expression to select that will generate the same value by
6432 /// multiplying by a magic number.
6433 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6434 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6435                                   bool IsAfterLegalization,
6436                                   bool IsAfterLegalTypes,
6437                                   SmallVectorImpl<SDNode *> &Created) const {
6438   SDLoc dl(N);
6439   EVT VT = N->getValueType(0);
6440   EVT SVT = VT.getScalarType();
6441   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6442   EVT ShSVT = ShVT.getScalarType();
6443   unsigned EltBits = VT.getScalarSizeInBits();
6444   EVT MulVT;
6445 
6446   // Check to see if we can do this.
6447   // FIXME: We should be more aggressive here.
6448   if (!isTypeLegal(VT)) {
6449     // Limit this to simple scalars for now.
6450     if (VT.isVector() || !VT.isSimple())
6451       return SDValue();
6452 
6453     // If this type will be promoted to a large enough type with a legal
6454     // multiply operation, we can go ahead and do this transform.
6455     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6456       return SDValue();
6457 
6458     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6459     if (MulVT.getSizeInBits() < (2 * EltBits) ||
6460         !isOperationLegal(ISD::MUL, MulVT))
6461       return SDValue();
6462   }
6463 
6464   // If the udiv has an 'exact' bit we can use a simpler lowering.
6465   if (N->getFlags().hasExact())
6466     return BuildExactUDIV(*this, N, dl, DAG, Created);
6467 
6468   SDValue N0 = N->getOperand(0);
6469   SDValue N1 = N->getOperand(1);
6470 
6471   // Try to use leading zeros of the dividend to reduce the multiplier and
6472   // avoid expensive fixups.
6473   unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6474 
6475   bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6476   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6477 
6478   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6479     if (C->isZero())
6480       return false;
6481     const APInt& Divisor = C->getAPIntValue();
6482 
6483     SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6484 
6485     // Magic algorithm doesn't work for division by 1. We need to emit a select
6486     // at the end.
6487     if (Divisor.isOne()) {
6488       PreShift = PostShift = DAG.getUNDEF(ShSVT);
6489       MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6490     } else {
6491       UnsignedDivisionByConstantInfo magics =
6492           UnsignedDivisionByConstantInfo::get(
6493               Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6494 
6495       MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6496 
6497       assert(magics.PreShift < Divisor.getBitWidth() &&
6498              "We shouldn't generate an undefined shift!");
6499       assert(magics.PostShift < Divisor.getBitWidth() &&
6500              "We shouldn't generate an undefined shift!");
6501       assert((!magics.IsAdd || magics.PreShift == 0) &&
6502              "Unexpected pre-shift");
6503       PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6504       PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6505       NPQFactor = DAG.getConstant(
6506           magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6507                        : APInt::getZero(EltBits),
6508           dl, SVT);
6509       UseNPQ |= magics.IsAdd;
6510       UsePreShift |= magics.PreShift != 0;
6511       UsePostShift |= magics.PostShift != 0;
6512     }
6513 
6514     PreShifts.push_back(PreShift);
6515     MagicFactors.push_back(MagicFactor);
6516     NPQFactors.push_back(NPQFactor);
6517     PostShifts.push_back(PostShift);
6518     return true;
6519   };
6520 
6521   // Collect the shifts/magic values from each element.
6522   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6523     return SDValue();
6524 
6525   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6526   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6527     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6528     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6529     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6530     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6531   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6532     assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6533            NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6534            "Expected matchUnaryPredicate to return one for scalable vectors");
6535     PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6536     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6537     NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6538     PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6539   } else {
6540     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6541     PreShift = PreShifts[0];
6542     MagicFactor = MagicFactors[0];
6543     PostShift = PostShifts[0];
6544   }
6545 
6546   SDValue Q = N0;
6547   if (UsePreShift) {
6548     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6549     Created.push_back(Q.getNode());
6550   }
6551 
6552   // FIXME: We should support doing a MUL in a wider type.
6553   auto GetMULHU = [&](SDValue X, SDValue Y) {
6554     // If the type isn't legal, use a wider mul of the type calculated
6555     // earlier.
6556     if (!isTypeLegal(VT)) {
6557       X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6558       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6559       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6560       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6561                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6562       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6563     }
6564 
6565     if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6566       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6567     if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6568       SDValue LoHi =
6569           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6570       return SDValue(LoHi.getNode(), 1);
6571     }
6572     // If type twice as wide legal, widen and use a mul plus a shift.
6573     unsigned Size = VT.getScalarSizeInBits();
6574     EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6575     if (VT.isVector())
6576       WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6577                                 VT.getVectorElementCount());
6578     // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6579     // custom lowered. This is very expensive so avoid it at all costs for
6580     // constant divisors.
6581     if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6582          isOperationCustom(ISD::UDIVREM, VT.getScalarType())) ||
6583         isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6584       X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6585       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6586       Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6587       Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6588                       DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6589       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6590     }
6591     return SDValue(); // No mulhu or equivalent
6592   };
6593 
6594   // Multiply the numerator (operand 0) by the magic value.
6595   Q = GetMULHU(Q, MagicFactor);
6596   if (!Q)
6597     return SDValue();
6598 
6599   Created.push_back(Q.getNode());
6600 
6601   if (UseNPQ) {
6602     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6603     Created.push_back(NPQ.getNode());
6604 
6605     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6606     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6607     if (VT.isVector())
6608       NPQ = GetMULHU(NPQ, NPQFactor);
6609     else
6610       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6611 
6612     Created.push_back(NPQ.getNode());
6613 
6614     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6615     Created.push_back(Q.getNode());
6616   }
6617 
6618   if (UsePostShift) {
6619     Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6620     Created.push_back(Q.getNode());
6621   }
6622 
6623   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6624 
6625   SDValue One = DAG.getConstant(1, dl, VT);
6626   SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6627   return DAG.getSelect(dl, VT, IsOne, N0, Q);
6628 }
6629 
6630 /// If all values in Values that *don't* match the predicate are same 'splat'
6631 /// value, then replace all values with that splat value.
6632 /// Else, if AlternativeReplacement was provided, then replace all values that
6633 /// do match predicate with AlternativeReplacement value.
6634 static void
6635 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6636                           std::function<bool(SDValue)> Predicate,
6637                           SDValue AlternativeReplacement = SDValue()) {
6638   SDValue Replacement;
6639   // Is there a value for which the Predicate does *NOT* match? What is it?
6640   auto SplatValue = llvm::find_if_not(Values, Predicate);
6641   if (SplatValue != Values.end()) {
6642     // Does Values consist only of SplatValue's and values matching Predicate?
6643     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6644           return Value == *SplatValue || Predicate(Value);
6645         })) // Then we shall replace values matching predicate with SplatValue.
6646       Replacement = *SplatValue;
6647   }
6648   if (!Replacement) {
6649     // Oops, we did not find the "baseline" splat value.
6650     if (!AlternativeReplacement)
6651       return; // Nothing to do.
6652     // Let's replace with provided value then.
6653     Replacement = AlternativeReplacement;
6654   }
6655   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6656 }
6657 
6658 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6659 /// where the divisor is constant and the comparison target is zero,
6660 /// return a DAG expression that will generate the same comparison result
6661 /// using only multiplications, additions and shifts/rotations.
6662 /// Ref: "Hacker's Delight" 10-17.
6663 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6664                                         SDValue CompTargetNode,
6665                                         ISD::CondCode Cond,
6666                                         DAGCombinerInfo &DCI,
6667                                         const SDLoc &DL) const {
6668   SmallVector<SDNode *, 5> Built;
6669   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6670                                          DCI, DL, Built)) {
6671     for (SDNode *N : Built)
6672       DCI.AddToWorklist(N);
6673     return Folded;
6674   }
6675 
6676   return SDValue();
6677 }
6678 
6679 SDValue
6680 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6681                                   SDValue CompTargetNode, ISD::CondCode Cond,
6682                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6683                                   SmallVectorImpl<SDNode *> &Created) const {
6684   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6685   // - D must be constant, with D = D0 * 2^K where D0 is odd
6686   // - P is the multiplicative inverse of D0 modulo 2^W
6687   // - Q = floor(((2^W) - 1) / D)
6688   // where W is the width of the common type of N and D.
6689   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6690          "Only applicable for (in)equality comparisons.");
6691 
6692   SelectionDAG &DAG = DCI.DAG;
6693 
6694   EVT VT = REMNode.getValueType();
6695   EVT SVT = VT.getScalarType();
6696   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6697   EVT ShSVT = ShVT.getScalarType();
6698 
6699   // If MUL is unavailable, we cannot proceed in any case.
6700   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6701     return SDValue();
6702 
6703   bool ComparingWithAllZeros = true;
6704   bool AllComparisonsWithNonZerosAreTautological = true;
6705   bool HadTautologicalLanes = false;
6706   bool AllLanesAreTautological = true;
6707   bool HadEvenDivisor = false;
6708   bool AllDivisorsArePowerOfTwo = true;
6709   bool HadTautologicalInvertedLanes = false;
6710   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6711 
6712   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6713     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6714     if (CDiv->isZero())
6715       return false;
6716 
6717     const APInt &D = CDiv->getAPIntValue();
6718     const APInt &Cmp = CCmp->getAPIntValue();
6719 
6720     ComparingWithAllZeros &= Cmp.isZero();
6721 
6722     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6723     // if C2 is not less than C1, the comparison is always false.
6724     // But we will only be able to produce the comparison that will give the
6725     // opposive tautological answer. So this lane would need to be fixed up.
6726     bool TautologicalInvertedLane = D.ule(Cmp);
6727     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6728 
6729     // If all lanes are tautological (either all divisors are ones, or divisor
6730     // is not greater than the constant we are comparing with),
6731     // we will prefer to avoid the fold.
6732     bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6733     HadTautologicalLanes |= TautologicalLane;
6734     AllLanesAreTautological &= TautologicalLane;
6735 
6736     // If we are comparing with non-zero, we need'll need  to subtract said
6737     // comparison value from the LHS. But there is no point in doing that if
6738     // every lane where we are comparing with non-zero is tautological..
6739     if (!Cmp.isZero())
6740       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6741 
6742     // Decompose D into D0 * 2^K
6743     unsigned K = D.countr_zero();
6744     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6745     APInt D0 = D.lshr(K);
6746 
6747     // D is even if it has trailing zeros.
6748     HadEvenDivisor |= (K != 0);
6749     // D is a power-of-two if D0 is one.
6750     // If all divisors are power-of-two, we will prefer to avoid the fold.
6751     AllDivisorsArePowerOfTwo &= D0.isOne();
6752 
6753     // P = inv(D0, 2^W)
6754     // 2^W requires W + 1 bits, so we have to extend and then truncate.
6755     unsigned W = D.getBitWidth();
6756     APInt P = D0.multiplicativeInverse();
6757     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6758 
6759     // Q = floor((2^W - 1) u/ D)
6760     // R = ((2^W - 1) u% D)
6761     APInt Q, R;
6762     APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6763 
6764     // If we are comparing with zero, then that comparison constant is okay,
6765     // else it may need to be one less than that.
6766     if (Cmp.ugt(R))
6767       Q -= 1;
6768 
6769     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6770            "We are expecting that K is always less than all-ones for ShSVT");
6771 
6772     // If the lane is tautological the result can be constant-folded.
6773     if (TautologicalLane) {
6774       // Set P and K amount to a bogus values so we can try to splat them.
6775       P = 0;
6776       K = -1;
6777       // And ensure that comparison constant is tautological,
6778       // it will always compare true/false.
6779       Q = -1;
6780     }
6781 
6782     PAmts.push_back(DAG.getConstant(P, DL, SVT));
6783     KAmts.push_back(
6784         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
6785                               /*implicitTrunc=*/true),
6786                         DL, ShSVT));
6787     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6788     return true;
6789   };
6790 
6791   SDValue N = REMNode.getOperand(0);
6792   SDValue D = REMNode.getOperand(1);
6793 
6794   // Collect the values from each element.
6795   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6796     return SDValue();
6797 
6798   // If all lanes are tautological, the result can be constant-folded.
6799   if (AllLanesAreTautological)
6800     return SDValue();
6801 
6802   // If this is a urem by a powers-of-two, avoid the fold since it can be
6803   // best implemented as a bit test.
6804   if (AllDivisorsArePowerOfTwo)
6805     return SDValue();
6806 
6807   SDValue PVal, KVal, QVal;
6808   if (D.getOpcode() == ISD::BUILD_VECTOR) {
6809     if (HadTautologicalLanes) {
6810       // Try to turn PAmts into a splat, since we don't care about the values
6811       // that are currently '0'. If we can't, just keep '0'`s.
6812       turnVectorIntoSplatVector(PAmts, isNullConstant);
6813       // Try to turn KAmts into a splat, since we don't care about the values
6814       // that are currently '-1'. If we can't, change them to '0'`s.
6815       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6816                                 DAG.getConstant(0, DL, ShSVT));
6817     }
6818 
6819     PVal = DAG.getBuildVector(VT, DL, PAmts);
6820     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6821     QVal = DAG.getBuildVector(VT, DL, QAmts);
6822   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6823     assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6824            "Expected matchBinaryPredicate to return one element for "
6825            "SPLAT_VECTORs");
6826     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6827     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6828     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6829   } else {
6830     PVal = PAmts[0];
6831     KVal = KAmts[0];
6832     QVal = QAmts[0];
6833   }
6834 
6835   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6836     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6837       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6838     assert(CompTargetNode.getValueType() == N.getValueType() &&
6839            "Expecting that the types on LHS and RHS of comparisons match.");
6840     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6841   }
6842 
6843   // (mul N, P)
6844   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6845   Created.push_back(Op0.getNode());
6846 
6847   // Rotate right only if any divisor was even. We avoid rotates for all-odd
6848   // divisors as a performance improvement, since rotating by 0 is a no-op.
6849   if (HadEvenDivisor) {
6850     // We need ROTR to do this.
6851     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6852       return SDValue();
6853     // UREM: (rotr (mul N, P), K)
6854     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6855     Created.push_back(Op0.getNode());
6856   }
6857 
6858   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6859   SDValue NewCC =
6860       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6861                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6862   if (!HadTautologicalInvertedLanes)
6863     return NewCC;
6864 
6865   // If any lanes previously compared always-false, the NewCC will give
6866   // always-true result for them, so we need to fixup those lanes.
6867   // Or the other way around for inequality predicate.
6868   assert(VT.isVector() && "Can/should only get here for vectors.");
6869   Created.push_back(NewCC.getNode());
6870 
6871   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6872   // if C2 is not less than C1, the comparison is always false.
6873   // But we have produced the comparison that will give the
6874   // opposive tautological answer. So these lanes would need to be fixed up.
6875   SDValue TautologicalInvertedChannels =
6876       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6877   Created.push_back(TautologicalInvertedChannels.getNode());
6878 
6879   // NOTE: we avoid letting illegal types through even if we're before legalize
6880   // ops – legalization has a hard time producing good code for this.
6881   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6882     // If we have a vector select, let's replace the comparison results in the
6883     // affected lanes with the correct tautological result.
6884     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6885                                               DL, SETCCVT, SETCCVT);
6886     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6887                        Replacement, NewCC);
6888   }
6889 
6890   // Else, we can just invert the comparison result in the appropriate lanes.
6891   //
6892   // NOTE: see the note above VSELECT above.
6893   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6894     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6895                        TautologicalInvertedChannels);
6896 
6897   return SDValue(); // Don't know how to lower.
6898 }
6899 
6900 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6901 /// where the divisor is constant and the comparison target is zero,
6902 /// return a DAG expression that will generate the same comparison result
6903 /// using only multiplications, additions and shifts/rotations.
6904 /// Ref: "Hacker's Delight" 10-17.
6905 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6906                                         SDValue CompTargetNode,
6907                                         ISD::CondCode Cond,
6908                                         DAGCombinerInfo &DCI,
6909                                         const SDLoc &DL) const {
6910   SmallVector<SDNode *, 7> Built;
6911   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6912                                          DCI, DL, Built)) {
6913     assert(Built.size() <= 7 && "Max size prediction failed.");
6914     for (SDNode *N : Built)
6915       DCI.AddToWorklist(N);
6916     return Folded;
6917   }
6918 
6919   return SDValue();
6920 }
6921 
6922 SDValue
6923 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6924                                   SDValue CompTargetNode, ISD::CondCode Cond,
6925                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6926                                   SmallVectorImpl<SDNode *> &Created) const {
6927   // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6928   // Fold:
6929   //   (seteq/ne (srem N, D), 0)
6930   // To:
6931   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
6932   //
6933   // - D must be constant, with D = D0 * 2^K where D0 is odd
6934   // - P is the multiplicative inverse of D0 modulo 2^W
6935   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6936   // - Q = floor((2 * A) / (2^K))
6937   // where W is the width of the common type of N and D.
6938   //
6939   // When D is a power of two (and thus D0 is 1), the normal
6940   // formula for A and Q don't apply, because the derivation
6941   // depends on D not dividing 2^(W-1), and thus theorem ZRS
6942   // does not apply. This specifically fails when N = INT_MIN.
6943   //
6944   // Instead, for power-of-two D, we use:
6945   // - A = 2^(W-1)
6946   // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6947   // - Q = 2^(W-K) - 1
6948   // |-> Test that the top K bits are zero after rotation
6949   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6950          "Only applicable for (in)equality comparisons.");
6951 
6952   SelectionDAG &DAG = DCI.DAG;
6953 
6954   EVT VT = REMNode.getValueType();
6955   EVT SVT = VT.getScalarType();
6956   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6957   EVT ShSVT = ShVT.getScalarType();
6958 
6959   // If we are after ops legalization, and MUL is unavailable, we can not
6960   // proceed.
6961   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6962     return SDValue();
6963 
6964   // TODO: Could support comparing with non-zero too.
6965   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6966   if (!CompTarget || !CompTarget->isZero())
6967     return SDValue();
6968 
6969   bool HadIntMinDivisor = false;
6970   bool HadOneDivisor = false;
6971   bool AllDivisorsAreOnes = true;
6972   bool HadEvenDivisor = false;
6973   bool NeedToApplyOffset = false;
6974   bool AllDivisorsArePowerOfTwo = true;
6975   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6976 
6977   auto BuildSREMPattern = [&](ConstantSDNode *C) {
6978     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6979     if (C->isZero())
6980       return false;
6981 
6982     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6983 
6984     // WARNING: this fold is only valid for positive divisors!
6985     APInt D = C->getAPIntValue();
6986     if (D.isNegative())
6987       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
6988 
6989     HadIntMinDivisor |= D.isMinSignedValue();
6990 
6991     // If all divisors are ones, we will prefer to avoid the fold.
6992     HadOneDivisor |= D.isOne();
6993     AllDivisorsAreOnes &= D.isOne();
6994 
6995     // Decompose D into D0 * 2^K
6996     unsigned K = D.countr_zero();
6997     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6998     APInt D0 = D.lshr(K);
6999 
7000     if (!D.isMinSignedValue()) {
7001       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7002       // we don't care about this lane in this fold, we'll special-handle it.
7003       HadEvenDivisor |= (K != 0);
7004     }
7005 
7006     // D is a power-of-two if D0 is one. This includes INT_MIN.
7007     // If all divisors are power-of-two, we will prefer to avoid the fold.
7008     AllDivisorsArePowerOfTwo &= D0.isOne();
7009 
7010     // P = inv(D0, 2^W)
7011     // 2^W requires W + 1 bits, so we have to extend and then truncate.
7012     unsigned W = D.getBitWidth();
7013     APInt P = D0.multiplicativeInverse();
7014     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7015 
7016     // A = floor((2^(W - 1) - 1) / D0) & -2^K
7017     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7018     A.clearLowBits(K);
7019 
7020     if (!D.isMinSignedValue()) {
7021       // If divisor INT_MIN, then we don't care about this lane in this fold,
7022       // we'll special-handle it.
7023       NeedToApplyOffset |= A != 0;
7024     }
7025 
7026     // Q = floor((2 * A) / (2^K))
7027     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7028 
7029     assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7030            "We are expecting that A is always less than all-ones for SVT");
7031     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7032            "We are expecting that K is always less than all-ones for ShSVT");
7033 
7034     // If D was a power of two, apply the alternate constant derivation.
7035     if (D0.isOne()) {
7036       // A = 2^(W-1)
7037       A = APInt::getSignedMinValue(W);
7038       // - Q = 2^(W-K) - 1
7039       Q = APInt::getAllOnes(W - K).zext(W);
7040     }
7041 
7042     // If the divisor is 1 the result can be constant-folded. Likewise, we
7043     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7044     if (D.isOne()) {
7045       // Set P, A and K to a bogus values so we can try to splat them.
7046       P = 0;
7047       A = -1;
7048       K = -1;
7049 
7050       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
7051       Q = -1;
7052     }
7053 
7054     PAmts.push_back(DAG.getConstant(P, DL, SVT));
7055     AAmts.push_back(DAG.getConstant(A, DL, SVT));
7056     KAmts.push_back(
7057         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7058                               /*implicitTrunc=*/true),
7059                         DL, ShSVT));
7060     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7061     return true;
7062   };
7063 
7064   SDValue N = REMNode.getOperand(0);
7065   SDValue D = REMNode.getOperand(1);
7066 
7067   // Collect the values from each element.
7068   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7069     return SDValue();
7070 
7071   // If this is a srem by a one, avoid the fold since it can be constant-folded.
7072   if (AllDivisorsAreOnes)
7073     return SDValue();
7074 
7075   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7076   // since it can be best implemented as a bit test.
7077   if (AllDivisorsArePowerOfTwo)
7078     return SDValue();
7079 
7080   SDValue PVal, AVal, KVal, QVal;
7081   if (D.getOpcode() == ISD::BUILD_VECTOR) {
7082     if (HadOneDivisor) {
7083       // Try to turn PAmts into a splat, since we don't care about the values
7084       // that are currently '0'. If we can't, just keep '0'`s.
7085       turnVectorIntoSplatVector(PAmts, isNullConstant);
7086       // Try to turn AAmts into a splat, since we don't care about the
7087       // values that are currently '-1'. If we can't, change them to '0'`s.
7088       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
7089                                 DAG.getConstant(0, DL, SVT));
7090       // Try to turn KAmts into a splat, since we don't care about the values
7091       // that are currently '-1'. If we can't, change them to '0'`s.
7092       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
7093                                 DAG.getConstant(0, DL, ShSVT));
7094     }
7095 
7096     PVal = DAG.getBuildVector(VT, DL, PAmts);
7097     AVal = DAG.getBuildVector(VT, DL, AAmts);
7098     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7099     QVal = DAG.getBuildVector(VT, DL, QAmts);
7100   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7101     assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7102            QAmts.size() == 1 &&
7103            "Expected matchUnaryPredicate to return one element for scalable "
7104            "vectors");
7105     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7106     AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7107     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7108     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7109   } else {
7110     assert(isa<ConstantSDNode>(D) && "Expected a constant");
7111     PVal = PAmts[0];
7112     AVal = AAmts[0];
7113     KVal = KAmts[0];
7114     QVal = QAmts[0];
7115   }
7116 
7117   // (mul N, P)
7118   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7119   Created.push_back(Op0.getNode());
7120 
7121   if (NeedToApplyOffset) {
7122     // We need ADD to do this.
7123     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7124       return SDValue();
7125 
7126     // (add (mul N, P), A)
7127     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7128     Created.push_back(Op0.getNode());
7129   }
7130 
7131   // Rotate right only if any divisor was even. We avoid rotates for all-odd
7132   // divisors as a performance improvement, since rotating by 0 is a no-op.
7133   if (HadEvenDivisor) {
7134     // We need ROTR to do this.
7135     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7136       return SDValue();
7137     // SREM: (rotr (add (mul N, P), A), K)
7138     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7139     Created.push_back(Op0.getNode());
7140   }
7141 
7142   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7143   SDValue Fold =
7144       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7145                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7146 
7147   // If we didn't have lanes with INT_MIN divisor, then we're done.
7148   if (!HadIntMinDivisor)
7149     return Fold;
7150 
7151   // That fold is only valid for positive divisors. Which effectively means,
7152   // it is invalid for INT_MIN divisors. So if we have such a lane,
7153   // we must fix-up results for said lanes.
7154   assert(VT.isVector() && "Can/should only get here for vectors.");
7155 
7156   // NOTE: we avoid letting illegal types through even if we're before legalize
7157   // ops – legalization has a hard time producing good code for the code that
7158   // follows.
7159   if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7160       !isOperationLegalOrCustom(ISD::AND, VT) ||
7161       !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
7162       !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
7163     return SDValue();
7164 
7165   Created.push_back(Fold.getNode());
7166 
7167   SDValue IntMin = DAG.getConstant(
7168       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
7169   SDValue IntMax = DAG.getConstant(
7170       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
7171   SDValue Zero =
7172       DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
7173 
7174   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7175   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7176   Created.push_back(DivisorIsIntMin.getNode());
7177 
7178   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
7179   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7180   Created.push_back(Masked.getNode());
7181   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7182   Created.push_back(MaskedIsZero.getNode());
7183 
7184   // To produce final result we need to blend 2 vectors: 'SetCC' and
7185   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7186   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7187   // constant-folded, select can get lowered to a shuffle with constant mask.
7188   SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7189                                 MaskedIsZero, Fold);
7190 
7191   return Blended;
7192 }
7193 
7194 bool TargetLowering::
7195 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7196   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7197     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7198                                 "be a constant integer");
7199     return true;
7200   }
7201 
7202   return false;
7203 }
7204 
7205 SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7206                                          const DenormalMode &Mode) const {
7207   SDLoc DL(Op);
7208   EVT VT = Op.getValueType();
7209   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7210   SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7211 
7212   // This is specifically a check for the handling of denormal inputs, not the
7213   // result.
7214   if (Mode.Input == DenormalMode::PreserveSign ||
7215       Mode.Input == DenormalMode::PositiveZero) {
7216     // Test = X == 0.0
7217     return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7218   }
7219 
7220   // Testing it with denormal inputs to avoid wrong estimate.
7221   //
7222   // Test = fabs(X) < SmallestNormal
7223   const fltSemantics &FltSem = VT.getFltSemantics();
7224   APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7225   SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7226   SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7227   return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7228 }
7229 
7230 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7231                                              bool LegalOps, bool OptForSize,
7232                                              NegatibleCost &Cost,
7233                                              unsigned Depth) const {
7234   // fneg is removable even if it has multiple uses.
7235   if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7236     Cost = NegatibleCost::Cheaper;
7237     return Op.getOperand(0);
7238   }
7239 
7240   // Don't recurse exponentially.
7241   if (Depth > SelectionDAG::MaxRecursionDepth)
7242     return SDValue();
7243 
7244   // Pre-increment recursion depth for use in recursive calls.
7245   ++Depth;
7246   const SDNodeFlags Flags = Op->getFlags();
7247   const TargetOptions &Options = DAG.getTarget().Options;
7248   EVT VT = Op.getValueType();
7249   unsigned Opcode = Op.getOpcode();
7250 
7251   // Don't allow anything with multiple uses unless we know it is free.
7252   if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7253     bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7254                         isFPExtFree(VT, Op.getOperand(0).getValueType());
7255     if (!IsFreeExtend)
7256       return SDValue();
7257   }
7258 
7259   auto RemoveDeadNode = [&](SDValue N) {
7260     if (N && N.getNode()->use_empty())
7261       DAG.RemoveDeadNode(N.getNode());
7262   };
7263 
7264   SDLoc DL(Op);
7265 
7266   // Because getNegatedExpression can delete nodes we need a handle to keep
7267   // temporary nodes alive in case the recursion manages to create an identical
7268   // node.
7269   std::list<HandleSDNode> Handles;
7270 
7271   switch (Opcode) {
7272   case ISD::ConstantFP: {
7273     // Don't invert constant FP values after legalization unless the target says
7274     // the negated constant is legal.
7275     bool IsOpLegal =
7276         isOperationLegal(ISD::ConstantFP, VT) ||
7277         isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7278                      OptForSize);
7279 
7280     if (LegalOps && !IsOpLegal)
7281       break;
7282 
7283     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7284     V.changeSign();
7285     SDValue CFP = DAG.getConstantFP(V, DL, VT);
7286 
7287     // If we already have the use of the negated floating constant, it is free
7288     // to negate it even it has multiple uses.
7289     if (!Op.hasOneUse() && CFP.use_empty())
7290       break;
7291     Cost = NegatibleCost::Neutral;
7292     return CFP;
7293   }
7294   case ISD::BUILD_VECTOR: {
7295     // Only permit BUILD_VECTOR of constants.
7296     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7297           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7298         }))
7299       break;
7300 
7301     bool IsOpLegal =
7302         (isOperationLegal(ISD::ConstantFP, VT) &&
7303          isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
7304         llvm::all_of(Op->op_values(), [&](SDValue N) {
7305           return N.isUndef() ||
7306                  isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7307                               OptForSize);
7308         });
7309 
7310     if (LegalOps && !IsOpLegal)
7311       break;
7312 
7313     SmallVector<SDValue, 4> Ops;
7314     for (SDValue C : Op->op_values()) {
7315       if (C.isUndef()) {
7316         Ops.push_back(C);
7317         continue;
7318       }
7319       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7320       V.changeSign();
7321       Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7322     }
7323     Cost = NegatibleCost::Neutral;
7324     return DAG.getBuildVector(VT, DL, Ops);
7325   }
7326   case ISD::FADD: {
7327     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7328       break;
7329 
7330     // After operation legalization, it might not be legal to create new FSUBs.
7331     if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7332       break;
7333     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7334 
7335     // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7336     NegatibleCost CostX = NegatibleCost::Expensive;
7337     SDValue NegX =
7338         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7339     // Prevent this node from being deleted by the next call.
7340     if (NegX)
7341       Handles.emplace_back(NegX);
7342 
7343     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7344     NegatibleCost CostY = NegatibleCost::Expensive;
7345     SDValue NegY =
7346         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7347 
7348     // We're done with the handles.
7349     Handles.clear();
7350 
7351     // Negate the X if its cost is less or equal than Y.
7352     if (NegX && (CostX <= CostY)) {
7353       Cost = CostX;
7354       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7355       if (NegY != N)
7356         RemoveDeadNode(NegY);
7357       return N;
7358     }
7359 
7360     // Negate the Y if it is not expensive.
7361     if (NegY) {
7362       Cost = CostY;
7363       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7364       if (NegX != N)
7365         RemoveDeadNode(NegX);
7366       return N;
7367     }
7368     break;
7369   }
7370   case ISD::FSUB: {
7371     // We can't turn -(A-B) into B-A when we honor signed zeros.
7372     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7373       break;
7374 
7375     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7376     // fold (fneg (fsub 0, Y)) -> Y
7377     if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7378       if (C->isZero()) {
7379         Cost = NegatibleCost::Cheaper;
7380         return Y;
7381       }
7382 
7383     // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7384     Cost = NegatibleCost::Neutral;
7385     return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7386   }
7387   case ISD::FMUL:
7388   case ISD::FDIV: {
7389     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7390 
7391     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7392     NegatibleCost CostX = NegatibleCost::Expensive;
7393     SDValue NegX =
7394         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7395     // Prevent this node from being deleted by the next call.
7396     if (NegX)
7397       Handles.emplace_back(NegX);
7398 
7399     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7400     NegatibleCost CostY = NegatibleCost::Expensive;
7401     SDValue NegY =
7402         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7403 
7404     // We're done with the handles.
7405     Handles.clear();
7406 
7407     // Negate the X if its cost is less or equal than Y.
7408     if (NegX && (CostX <= CostY)) {
7409       Cost = CostX;
7410       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7411       if (NegY != N)
7412         RemoveDeadNode(NegY);
7413       return N;
7414     }
7415 
7416     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7417     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7418       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7419         break;
7420 
7421     // Negate the Y if it is not expensive.
7422     if (NegY) {
7423       Cost = CostY;
7424       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7425       if (NegX != N)
7426         RemoveDeadNode(NegX);
7427       return N;
7428     }
7429     break;
7430   }
7431   case ISD::FMA:
7432   case ISD::FMAD: {
7433     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7434       break;
7435 
7436     SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7437     NegatibleCost CostZ = NegatibleCost::Expensive;
7438     SDValue NegZ =
7439         getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7440     // Give up if fail to negate the Z.
7441     if (!NegZ)
7442       break;
7443 
7444     // Prevent this node from being deleted by the next two calls.
7445     Handles.emplace_back(NegZ);
7446 
7447     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7448     NegatibleCost CostX = NegatibleCost::Expensive;
7449     SDValue NegX =
7450         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7451     // Prevent this node from being deleted by the next call.
7452     if (NegX)
7453       Handles.emplace_back(NegX);
7454 
7455     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7456     NegatibleCost CostY = NegatibleCost::Expensive;
7457     SDValue NegY =
7458         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7459 
7460     // We're done with the handles.
7461     Handles.clear();
7462 
7463     // Negate the X if its cost is less or equal than Y.
7464     if (NegX && (CostX <= CostY)) {
7465       Cost = std::min(CostX, CostZ);
7466       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7467       if (NegY != N)
7468         RemoveDeadNode(NegY);
7469       return N;
7470     }
7471 
7472     // Negate the Y if it is not expensive.
7473     if (NegY) {
7474       Cost = std::min(CostY, CostZ);
7475       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7476       if (NegX != N)
7477         RemoveDeadNode(NegX);
7478       return N;
7479     }
7480     break;
7481   }
7482 
7483   case ISD::FP_EXTEND:
7484   case ISD::FSIN:
7485     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7486                                             OptForSize, Cost, Depth))
7487       return DAG.getNode(Opcode, DL, VT, NegV);
7488     break;
7489   case ISD::FP_ROUND:
7490     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7491                                             OptForSize, Cost, Depth))
7492       return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7493     break;
7494   case ISD::SELECT:
7495   case ISD::VSELECT: {
7496     // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7497     // iff at least one cost is cheaper and the other is neutral/cheaper
7498     SDValue LHS = Op.getOperand(1);
7499     NegatibleCost CostLHS = NegatibleCost::Expensive;
7500     SDValue NegLHS =
7501         getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7502     if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7503       RemoveDeadNode(NegLHS);
7504       break;
7505     }
7506 
7507     // Prevent this node from being deleted by the next call.
7508     Handles.emplace_back(NegLHS);
7509 
7510     SDValue RHS = Op.getOperand(2);
7511     NegatibleCost CostRHS = NegatibleCost::Expensive;
7512     SDValue NegRHS =
7513         getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7514 
7515     // We're done with the handles.
7516     Handles.clear();
7517 
7518     if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7519         (CostLHS != NegatibleCost::Cheaper &&
7520          CostRHS != NegatibleCost::Cheaper)) {
7521       RemoveDeadNode(NegLHS);
7522       RemoveDeadNode(NegRHS);
7523       break;
7524     }
7525 
7526     Cost = std::min(CostLHS, CostRHS);
7527     return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7528   }
7529   }
7530 
7531   return SDValue();
7532 }
7533 
7534 //===----------------------------------------------------------------------===//
7535 // Legalization Utilities
7536 //===----------------------------------------------------------------------===//
7537 
7538 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7539                                     SDValue LHS, SDValue RHS,
7540                                     SmallVectorImpl<SDValue> &Result,
7541                                     EVT HiLoVT, SelectionDAG &DAG,
7542                                     MulExpansionKind Kind, SDValue LL,
7543                                     SDValue LH, SDValue RL, SDValue RH) const {
7544   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7545          Opcode == ISD::SMUL_LOHI);
7546 
7547   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7548                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
7549   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7550                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
7551   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7552                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
7553   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7554                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
7555 
7556   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7557     return false;
7558 
7559   unsigned OuterBitSize = VT.getScalarSizeInBits();
7560   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7561 
7562   // LL, LH, RL, and RH must be either all NULL or all set to a value.
7563   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7564          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7565 
7566   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7567   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7568                           bool Signed) -> bool {
7569     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7570       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7571       Hi = SDValue(Lo.getNode(), 1);
7572       return true;
7573     }
7574     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7575       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7576       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7577       return true;
7578     }
7579     return false;
7580   };
7581 
7582   SDValue Lo, Hi;
7583 
7584   if (!LL.getNode() && !RL.getNode() &&
7585       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7586     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7587     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7588   }
7589 
7590   if (!LL.getNode())
7591     return false;
7592 
7593   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7594   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7595       DAG.MaskedValueIsZero(RHS, HighMask)) {
7596     // The inputs are both zero-extended.
7597     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7598       Result.push_back(Lo);
7599       Result.push_back(Hi);
7600       if (Opcode != ISD::MUL) {
7601         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7602         Result.push_back(Zero);
7603         Result.push_back(Zero);
7604       }
7605       return true;
7606     }
7607   }
7608 
7609   if (!VT.isVector() && Opcode == ISD::MUL &&
7610       DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7611       DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7612     // The input values are both sign-extended.
7613     // TODO non-MUL case?
7614     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7615       Result.push_back(Lo);
7616       Result.push_back(Hi);
7617       return true;
7618     }
7619   }
7620 
7621   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7622   SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7623 
7624   if (!LH.getNode() && !RH.getNode() &&
7625       isOperationLegalOrCustom(ISD::SRL, VT) &&
7626       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7627     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7628     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7629     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7630     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7631   }
7632 
7633   if (!LH.getNode())
7634     return false;
7635 
7636   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7637     return false;
7638 
7639   Result.push_back(Lo);
7640 
7641   if (Opcode == ISD::MUL) {
7642     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7643     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7644     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7645     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7646     Result.push_back(Hi);
7647     return true;
7648   }
7649 
7650   // Compute the full width result.
7651   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7652     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7653     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7654     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7655     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7656   };
7657 
7658   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7659   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7660     return false;
7661 
7662   // This is effectively the add part of a multiply-add of half-sized operands,
7663   // so it cannot overflow.
7664   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7665 
7666   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7667     return false;
7668 
7669   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7670   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7671 
7672   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7673                   isOperationLegalOrCustom(ISD::ADDE, VT));
7674   if (UseGlue)
7675     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7676                        Merge(Lo, Hi));
7677   else
7678     Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7679                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7680 
7681   SDValue Carry = Next.getValue(1);
7682   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7683   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7684 
7685   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7686     return false;
7687 
7688   if (UseGlue)
7689     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7690                      Carry);
7691   else
7692     Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7693                      Zero, Carry);
7694 
7695   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7696 
7697   if (Opcode == ISD::SMUL_LOHI) {
7698     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7699                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7700     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7701 
7702     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7703                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7704     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7705   }
7706 
7707   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7708   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7709   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7710   return true;
7711 }
7712 
7713 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7714                                SelectionDAG &DAG, MulExpansionKind Kind,
7715                                SDValue LL, SDValue LH, SDValue RL,
7716                                SDValue RH) const {
7717   SmallVector<SDValue, 2> Result;
7718   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7719                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7720                            DAG, Kind, LL, LH, RL, RH);
7721   if (Ok) {
7722     assert(Result.size() == 2);
7723     Lo = Result[0];
7724     Hi = Result[1];
7725   }
7726   return Ok;
7727 }
7728 
7729 // Optimize unsigned division or remainder by constants for types twice as large
7730 // as a legal VT.
7731 //
7732 // If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7733 // can be computed
7734 // as:
7735 //   Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7736 //   Remainder = Sum % Constant
7737 // This is based on "Remainder by Summing Digits" from Hacker's Delight.
7738 //
7739 // For division, we can compute the remainder using the algorithm described
7740 // above, subtract it from the dividend to get an exact multiple of Constant.
7741 // Then multiply that exact multiply by the multiplicative inverse modulo
7742 // (1 << (BitWidth / 2)) to get the quotient.
7743 
7744 // If Constant is even, we can shift right the dividend and the divisor by the
7745 // number of trailing zeros in Constant before applying the remainder algorithm.
7746 // If we're after the quotient, we can subtract this value from the shifted
7747 // dividend and multiply by the multiplicative inverse of the shifted divisor.
7748 // If we want the remainder, we shift the value left by the number of trailing
7749 // zeros and add the bits that were shifted out of the dividend.
7750 bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7751                                             SmallVectorImpl<SDValue> &Result,
7752                                             EVT HiLoVT, SelectionDAG &DAG,
7753                                             SDValue LL, SDValue LH) const {
7754   unsigned Opcode = N->getOpcode();
7755   EVT VT = N->getValueType(0);
7756 
7757   // TODO: Support signed division/remainder.
7758   if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7759     return false;
7760   assert(
7761       (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7762       "Unexpected opcode");
7763 
7764   auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7765   if (!CN)
7766     return false;
7767 
7768   APInt Divisor = CN->getAPIntValue();
7769   unsigned BitWidth = Divisor.getBitWidth();
7770   unsigned HBitWidth = BitWidth / 2;
7771   assert(VT.getScalarSizeInBits() == BitWidth &&
7772          HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7773 
7774   // Divisor needs to less than (1 << HBitWidth).
7775   APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7776   if (Divisor.uge(HalfMaxPlus1))
7777     return false;
7778 
7779   // We depend on the UREM by constant optimization in DAGCombiner that requires
7780   // high multiply.
7781   if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7782       !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
7783     return false;
7784 
7785   // Don't expand if optimizing for size.
7786   if (DAG.shouldOptForSize())
7787     return false;
7788 
7789   // Early out for 0 or 1 divisors.
7790   if (Divisor.ule(1))
7791     return false;
7792 
7793   // If the divisor is even, shift it until it becomes odd.
7794   unsigned TrailingZeros = 0;
7795   if (!Divisor[0]) {
7796     TrailingZeros = Divisor.countr_zero();
7797     Divisor.lshrInPlace(TrailingZeros);
7798   }
7799 
7800   SDLoc dl(N);
7801   SDValue Sum;
7802   SDValue PartialRem;
7803 
7804   // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7805   // then add in the carry.
7806   // TODO: If we can't split it in half, we might be able to split into 3 or
7807   // more pieces using a smaller bit width.
7808   if (HalfMaxPlus1.urem(Divisor).isOne()) {
7809     assert(!LL == !LH && "Expected both input halves or no input halves!");
7810     if (!LL)
7811       std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7812 
7813     // Shift the input by the number of TrailingZeros in the divisor. The
7814     // shifted out bits will be added to the remainder later.
7815     if (TrailingZeros) {
7816       // Save the shifted off bits if we need the remainder.
7817       if (Opcode != ISD::UDIV) {
7818         APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7819         PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7820                                  DAG.getConstant(Mask, dl, HiLoVT));
7821       }
7822 
7823       LL = DAG.getNode(
7824           ISD::OR, dl, HiLoVT,
7825           DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7826                       DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7827           DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7828                       DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7829                                                  HiLoVT, dl)));
7830       LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7831                        DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7832     }
7833 
7834     // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7835     EVT SetCCType =
7836         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7837     if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
7838       SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7839       Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7840       Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7841                         DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7842     } else {
7843       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7844       SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7845       // If the boolean for the target is 0 or 1, we can add the setcc result
7846       // directly.
7847       if (getBooleanContents(HiLoVT) ==
7848           TargetLoweringBase::ZeroOrOneBooleanContent)
7849         Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7850       else
7851         Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7852                               DAG.getConstant(0, dl, HiLoVT));
7853       Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7854     }
7855   }
7856 
7857   // If we didn't find a sum, we can't do the expansion.
7858   if (!Sum)
7859     return false;
7860 
7861   // Perform a HiLoVT urem on the Sum using truncated divisor.
7862   SDValue RemL =
7863       DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7864                   DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7865   SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7866 
7867   if (Opcode != ISD::UREM) {
7868     // Subtract the remainder from the shifted dividend.
7869     SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7870     SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7871 
7872     Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7873 
7874     // Multiply by the multiplicative inverse of the divisor modulo
7875     // (1 << BitWidth).
7876     APInt MulFactor = Divisor.multiplicativeInverse();
7877 
7878     SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7879                                    DAG.getConstant(MulFactor, dl, VT));
7880 
7881     // Split the quotient into low and high parts.
7882     SDValue QuotL, QuotH;
7883     std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7884     Result.push_back(QuotL);
7885     Result.push_back(QuotH);
7886   }
7887 
7888   if (Opcode != ISD::UDIV) {
7889     // If we shifted the input, shift the remainder left and add the bits we
7890     // shifted off the input.
7891     if (TrailingZeros) {
7892       APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7893       RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7894                          DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7895       RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7896     }
7897     Result.push_back(RemL);
7898     Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7899   }
7900 
7901   return true;
7902 }
7903 
7904 // Check that (every element of) Z is undef or not an exact multiple of BW.
7905 static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7906   return ISD::matchUnaryPredicate(
7907       Z,
7908       [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7909       true);
7910 }
7911 
7912 static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7913   EVT VT = Node->getValueType(0);
7914   SDValue ShX, ShY;
7915   SDValue ShAmt, InvShAmt;
7916   SDValue X = Node->getOperand(0);
7917   SDValue Y = Node->getOperand(1);
7918   SDValue Z = Node->getOperand(2);
7919   SDValue Mask = Node->getOperand(3);
7920   SDValue VL = Node->getOperand(4);
7921 
7922   unsigned BW = VT.getScalarSizeInBits();
7923   bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7924   SDLoc DL(SDValue(Node, 0));
7925 
7926   EVT ShVT = Z.getValueType();
7927   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7928     // fshl: X << C | Y >> (BW - C)
7929     // fshr: X << (BW - C) | Y >> C
7930     // where C = Z % BW is not zero
7931     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7932     ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7933     InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7934     ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7935                       VL);
7936     ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7937                       VL);
7938   } else {
7939     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7940     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7941     SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7942     if (isPowerOf2_32(BW)) {
7943       // Z % BW -> Z & (BW - 1)
7944       ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7945       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7946       SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7947                                  DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7948       InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7949     } else {
7950       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7951       ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7952       InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7953     }
7954 
7955     SDValue One = DAG.getConstant(1, DL, ShVT);
7956     if (IsFSHL) {
7957       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7958       SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
7959       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
7960     } else {
7961       SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7962       ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7963       ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
7964     }
7965   }
7966   return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7967 }
7968 
7969 SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7970                                           SelectionDAG &DAG) const {
7971   if (Node->isVPOpcode())
7972     return expandVPFunnelShift(Node, DAG);
7973 
7974   EVT VT = Node->getValueType(0);
7975 
7976   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7977                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
7978                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
7979                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
7980     return SDValue();
7981 
7982   SDValue X = Node->getOperand(0);
7983   SDValue Y = Node->getOperand(1);
7984   SDValue Z = Node->getOperand(2);
7985 
7986   unsigned BW = VT.getScalarSizeInBits();
7987   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7988   SDLoc DL(SDValue(Node, 0));
7989 
7990   EVT ShVT = Z.getValueType();
7991 
7992   // If a funnel shift in the other direction is more supported, use it.
7993   unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7994   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7995       isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
7996     if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7997       // fshl X, Y, Z -> fshr X, Y, -Z
7998       // fshr X, Y, Z -> fshl X, Y, -Z
7999       SDValue Zero = DAG.getConstant(0, DL, ShVT);
8000       Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8001     } else {
8002       // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8003       // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8004       SDValue One = DAG.getConstant(1, DL, ShVT);
8005       if (IsFSHL) {
8006         Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8007         X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8008       } else {
8009         X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8010         Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8011       }
8012       Z = DAG.getNOT(DL, Z, ShVT);
8013     }
8014     return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8015   }
8016 
8017   SDValue ShX, ShY;
8018   SDValue ShAmt, InvShAmt;
8019   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8020     // fshl: X << C | Y >> (BW - C)
8021     // fshr: X << (BW - C) | Y >> C
8022     // where C = Z % BW is not zero
8023     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8024     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8025     InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8026     ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8027     ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8028   } else {
8029     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8030     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8031     SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8032     if (isPowerOf2_32(BW)) {
8033       // Z % BW -> Z & (BW - 1)
8034       ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8035       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8036       InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8037     } else {
8038       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8039       ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8040       InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8041     }
8042 
8043     SDValue One = DAG.getConstant(1, DL, ShVT);
8044     if (IsFSHL) {
8045       ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8046       SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8047       ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8048     } else {
8049       SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8050       ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8051       ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8052     }
8053   }
8054   return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8055 }
8056 
8057 // TODO: Merge with expandFunnelShift.
8058 SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8059                                   SelectionDAG &DAG) const {
8060   EVT VT = Node->getValueType(0);
8061   unsigned EltSizeInBits = VT.getScalarSizeInBits();
8062   bool IsLeft = Node->getOpcode() == ISD::ROTL;
8063   SDValue Op0 = Node->getOperand(0);
8064   SDValue Op1 = Node->getOperand(1);
8065   SDLoc DL(SDValue(Node, 0));
8066 
8067   EVT ShVT = Op1.getValueType();
8068   SDValue Zero = DAG.getConstant(0, DL, ShVT);
8069 
8070   // If a rotate in the other direction is more supported, use it.
8071   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8072   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8073       isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8074     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8075     return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8076   }
8077 
8078   if (!AllowVectorOps && VT.isVector() &&
8079       (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8080        !isOperationLegalOrCustom(ISD::SRL, VT) ||
8081        !isOperationLegalOrCustom(ISD::SUB, VT) ||
8082        !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
8083        !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
8084     return SDValue();
8085 
8086   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8087   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8088   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8089   SDValue ShVal;
8090   SDValue HsVal;
8091   if (isPowerOf2_32(EltSizeInBits)) {
8092     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8093     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8094     SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8095     SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8096     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8097     SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8098     HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8099   } else {
8100     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8101     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8102     SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8103     SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8104     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8105     SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8106     SDValue One = DAG.getConstant(1, DL, ShVT);
8107     HsVal =
8108         DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8109   }
8110   return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8111 }
8112 
8113 void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8114                                       SelectionDAG &DAG) const {
8115   assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8116   EVT VT = Node->getValueType(0);
8117   unsigned VTBits = VT.getScalarSizeInBits();
8118   assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8119 
8120   bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8121   bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8122   SDValue ShOpLo = Node->getOperand(0);
8123   SDValue ShOpHi = Node->getOperand(1);
8124   SDValue ShAmt = Node->getOperand(2);
8125   EVT ShAmtVT = ShAmt.getValueType();
8126   EVT ShAmtCCVT =
8127       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8128   SDLoc dl(Node);
8129 
8130   // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8131   // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8132   // away during isel.
8133   SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8134                                   DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8135   SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8136                                      DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8137                        : DAG.getConstant(0, dl, VT);
8138 
8139   SDValue Tmp2, Tmp3;
8140   if (IsSHL) {
8141     Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8142     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8143   } else {
8144     Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8145     Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8146   }
8147 
8148   // If the shift amount is larger or equal than the width of a part we don't
8149   // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8150   // values for large shift amounts.
8151   SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8152                                 DAG.getConstant(VTBits, dl, ShAmtVT));
8153   SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8154                               DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8155 
8156   if (IsSHL) {
8157     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8158     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8159   } else {
8160     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8161     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8162   }
8163 }
8164 
8165 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8166                                       SelectionDAG &DAG) const {
8167   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8168   SDValue Src = Node->getOperand(OpNo);
8169   EVT SrcVT = Src.getValueType();
8170   EVT DstVT = Node->getValueType(0);
8171   SDLoc dl(SDValue(Node, 0));
8172 
8173   // FIXME: Only f32 to i64 conversions are supported.
8174   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8175     return false;
8176 
8177   if (Node->isStrictFPOpcode())
8178     // When a NaN is converted to an integer a trap is allowed. We can't
8179     // use this expansion here because it would eliminate that trap. Other
8180     // traps are also allowed and cannot be eliminated. See
8181     // IEEE 754-2008 sec 5.8.
8182     return false;
8183 
8184   // Expand f32 -> i64 conversion
8185   // This algorithm comes from compiler-rt's implementation of fixsfdi:
8186   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8187   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8188   EVT IntVT = SrcVT.changeTypeToInteger();
8189   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8190 
8191   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8192   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8193   SDValue Bias = DAG.getConstant(127, dl, IntVT);
8194   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8195   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8196   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8197 
8198   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8199 
8200   SDValue ExponentBits = DAG.getNode(
8201       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8202       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8203   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8204 
8205   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8206                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8207                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8208   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8209 
8210   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8211                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8212                           DAG.getConstant(0x00800000, dl, IntVT));
8213 
8214   R = DAG.getZExtOrTrunc(R, dl, DstVT);
8215 
8216   R = DAG.getSelectCC(
8217       dl, Exponent, ExponentLoBit,
8218       DAG.getNode(ISD::SHL, dl, DstVT, R,
8219                   DAG.getZExtOrTrunc(
8220                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8221                       dl, IntShVT)),
8222       DAG.getNode(ISD::SRL, dl, DstVT, R,
8223                   DAG.getZExtOrTrunc(
8224                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8225                       dl, IntShVT)),
8226       ISD::SETGT);
8227 
8228   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8229                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8230 
8231   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8232                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8233   return true;
8234 }
8235 
8236 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8237                                       SDValue &Chain,
8238                                       SelectionDAG &DAG) const {
8239   SDLoc dl(SDValue(Node, 0));
8240   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8241   SDValue Src = Node->getOperand(OpNo);
8242 
8243   EVT SrcVT = Src.getValueType();
8244   EVT DstVT = Node->getValueType(0);
8245   EVT SetCCVT =
8246       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8247   EVT DstSetCCVT =
8248       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8249 
8250   // Only expand vector types if we have the appropriate vector bit operations.
8251   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8252                                                    ISD::FP_TO_SINT;
8253   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8254                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
8255     return false;
8256 
8257   // If the maximum float value is smaller then the signed integer range,
8258   // the destination signmask can't be represented by the float, so we can
8259   // just use FP_TO_SINT directly.
8260   const fltSemantics &APFSem = SrcVT.getFltSemantics();
8261   APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8262   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8263   if (APFloat::opOverflow &
8264       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8265     if (Node->isStrictFPOpcode()) {
8266       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8267                            { Node->getOperand(0), Src });
8268       Chain = Result.getValue(1);
8269     } else
8270       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8271     return true;
8272   }
8273 
8274   // Don't expand it if there isn't cheap fsub instruction.
8275   if (!isOperationLegalOrCustom(
8276           Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8277     return false;
8278 
8279   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8280   SDValue Sel;
8281 
8282   if (Node->isStrictFPOpcode()) {
8283     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8284                        Node->getOperand(0), /*IsSignaling*/ true);
8285     Chain = Sel.getValue(1);
8286   } else {
8287     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8288   }
8289 
8290   bool Strict = Node->isStrictFPOpcode() ||
8291                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8292 
8293   if (Strict) {
8294     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8295     // signmask then offset (the result of which should be fully representable).
8296     // Sel = Src < 0x8000000000000000
8297     // FltOfs = select Sel, 0, 0x8000000000000000
8298     // IntOfs = select Sel, 0, 0x8000000000000000
8299     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8300 
8301     // TODO: Should any fast-math-flags be set for the FSUB?
8302     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8303                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8304     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8305     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8306                                    DAG.getConstant(0, dl, DstVT),
8307                                    DAG.getConstant(SignMask, dl, DstVT));
8308     SDValue SInt;
8309     if (Node->isStrictFPOpcode()) {
8310       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8311                                 { Chain, Src, FltOfs });
8312       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8313                          { Val.getValue(1), Val });
8314       Chain = SInt.getValue(1);
8315     } else {
8316       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8317       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8318     }
8319     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8320   } else {
8321     // Expand based on maximum range of FP_TO_SINT:
8322     // True = fp_to_sint(Src)
8323     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8324     // Result = select (Src < 0x8000000000000000), True, False
8325 
8326     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8327     // TODO: Should any fast-math-flags be set for the FSUB?
8328     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8329                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8330     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8331                         DAG.getConstant(SignMask, dl, DstVT));
8332     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8333     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8334   }
8335   return true;
8336 }
8337 
8338 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8339                                       SDValue &Chain, SelectionDAG &DAG) const {
8340   // This transform is not correct for converting 0 when rounding mode is set
8341   // to round toward negative infinity which will produce -0.0. So disable
8342   // under strictfp.
8343   if (Node->isStrictFPOpcode())
8344     return false;
8345 
8346   SDValue Src = Node->getOperand(0);
8347   EVT SrcVT = Src.getValueType();
8348   EVT DstVT = Node->getValueType(0);
8349 
8350   // If the input is known to be non-negative and SINT_TO_FP is legal then use
8351   // it.
8352   if (Node->getFlags().hasNonNeg() &&
8353       isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT)) {
8354     Result =
8355         DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8356     return true;
8357   }
8358 
8359   if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8360     return false;
8361 
8362   // Only expand vector types if we have the appropriate vector bit
8363   // operations.
8364   if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8365                            !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
8366                            !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
8367                            !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
8368                            !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
8369     return false;
8370 
8371   SDLoc dl(SDValue(Node, 0));
8372   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8373 
8374   // Implementation of unsigned i64 to f64 following the algorithm in
8375   // __floatundidf in compiler_rt.  This implementation performs rounding
8376   // correctly in all rounding modes with the exception of converting 0
8377   // when rounding toward negative infinity. In that case the fsub will
8378   // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8379   // incorrect.
8380   SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8381   SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8382       llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8383   SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8384   SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8385   SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8386 
8387   SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8388   SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8389   SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8390   SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8391   SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8392   SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8393   SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8394   Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8395   return true;
8396 }
8397 
8398 SDValue
8399 TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8400                                                SelectionDAG &DAG) const {
8401   unsigned Opcode = Node->getOpcode();
8402   assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8403           Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8404          "Wrong opcode");
8405 
8406   if (Node->getFlags().hasNoNaNs()) {
8407     ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8408     EVT VT = Node->getValueType(0);
8409     if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8410          !isOperationLegalOrCustom(ISD::VSELECT, VT)) &&
8411         VT.isVector())
8412       return SDValue();
8413     SDValue Op1 = Node->getOperand(0);
8414     SDValue Op2 = Node->getOperand(1);
8415     SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8416     // Copy FMF flags, but always set the no-signed-zeros flag
8417     // as this is implied by the FMINNUM/FMAXNUM semantics.
8418     SelCC->setFlags(Node->getFlags() | SDNodeFlags::NoSignedZeros);
8419     return SelCC;
8420   }
8421 
8422   return SDValue();
8423 }
8424 
8425 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8426                                               SelectionDAG &DAG) const {
8427   if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8428     return Expanded;
8429 
8430   EVT VT = Node->getValueType(0);
8431   if (VT.isScalableVector())
8432     report_fatal_error(
8433         "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8434 
8435   SDLoc dl(Node);
8436   unsigned NewOp =
8437       Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8438 
8439   if (isOperationLegalOrCustom(NewOp, VT)) {
8440     SDValue Quiet0 = Node->getOperand(0);
8441     SDValue Quiet1 = Node->getOperand(1);
8442 
8443     if (!Node->getFlags().hasNoNaNs()) {
8444       // Insert canonicalizes if it's possible we need to quiet to get correct
8445       // sNaN behavior.
8446       if (!DAG.isKnownNeverSNaN(Quiet0)) {
8447         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8448                              Node->getFlags());
8449       }
8450       if (!DAG.isKnownNeverSNaN(Quiet1)) {
8451         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8452                              Node->getFlags());
8453       }
8454     }
8455 
8456     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8457   }
8458 
8459   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8460   // instead if there are no NaNs and there can't be an incompatible zero
8461   // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8462   if ((Node->getFlags().hasNoNaNs() ||
8463        (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8464         DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8465       (Node->getFlags().hasNoSignedZeros() ||
8466        DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8467        DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8468     unsigned IEEE2018Op =
8469         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8470     if (isOperationLegalOrCustom(IEEE2018Op, VT))
8471       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8472                          Node->getOperand(1), Node->getFlags());
8473   }
8474 
8475   if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8476     return SelCC;
8477 
8478   return SDValue();
8479 }
8480 
8481 SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8482                                                 SelectionDAG &DAG) const {
8483   if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8484     return Expanded;
8485 
8486   SDLoc DL(N);
8487   SDValue LHS = N->getOperand(0);
8488   SDValue RHS = N->getOperand(1);
8489   unsigned Opc = N->getOpcode();
8490   EVT VT = N->getValueType(0);
8491   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8492   bool IsMax = Opc == ISD::FMAXIMUM;
8493   SDNodeFlags Flags = N->getFlags();
8494 
8495   // First, implement comparison not propagating NaN. If no native fmin or fmax
8496   // available, use plain select with setcc instead.
8497   SDValue MinMax;
8498   unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8499   unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8500 
8501   // FIXME: We should probably define fminnum/fmaxnum variants with correct
8502   // signed zero behavior.
8503   bool MinMaxMustRespectOrderedZero = false;
8504 
8505   if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8506     MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8507     MinMaxMustRespectOrderedZero = true;
8508   } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8509     MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8510   } else {
8511     if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8512       return DAG.UnrollVectorOp(N);
8513 
8514     // NaN (if exists) will be propagated later, so orderness doesn't matter.
8515     SDValue Compare =
8516         DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8517     MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8518   }
8519 
8520   // Propagate any NaN of both operands
8521   if (!N->getFlags().hasNoNaNs() &&
8522       (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8523     ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8524                                         APFloat::getNaN(VT.getFltSemantics()));
8525     MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8526                            DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8527   }
8528 
8529   // fminimum/fmaximum requires -0.0 less than +0.0
8530   if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8531       !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8532     SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8533                                   DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8534     SDValue TestZero =
8535         DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8536     SDValue LCmp = DAG.getSelect(
8537         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8538         MinMax, Flags);
8539     SDValue RCmp = DAG.getSelect(
8540         DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8541         LCmp, Flags);
8542     MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8543   }
8544 
8545   return MinMax;
8546 }
8547 
8548 SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
8549                                                       SelectionDAG &DAG) const {
8550   SDLoc DL(Node);
8551   SDValue LHS = Node->getOperand(0);
8552   SDValue RHS = Node->getOperand(1);
8553   unsigned Opc = Node->getOpcode();
8554   EVT VT = Node->getValueType(0);
8555   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8556   bool IsMax = Opc == ISD::FMAXIMUMNUM;
8557   const TargetOptions &Options = DAG.getTarget().Options;
8558   SDNodeFlags Flags = Node->getFlags();
8559 
8560   unsigned NewOp =
8561       Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8562 
8563   if (isOperationLegalOrCustom(NewOp, VT)) {
8564     if (!Flags.hasNoNaNs()) {
8565       // Insert canonicalizes if it's possible we need to quiet to get correct
8566       // sNaN behavior.
8567       if (!DAG.isKnownNeverSNaN(LHS)) {
8568         LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8569       }
8570       if (!DAG.isKnownNeverSNaN(RHS)) {
8571         RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8572       }
8573     }
8574 
8575     return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8576   }
8577 
8578   // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8579   // same behaviors for all of other cases: +0.0 vs -0.0 included.
8580   if (Flags.hasNoNaNs() ||
8581       (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8582     unsigned IEEE2019Op =
8583         Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8584     if (isOperationLegalOrCustom(IEEE2019Op, VT))
8585       return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8586   }
8587 
8588   // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8589   // either one for +0.0 vs -0.0.
8590   if ((Flags.hasNoNaNs() ||
8591        (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8592       (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8593        DAG.isKnownNeverZeroFloat(RHS))) {
8594     unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8595     if (isOperationLegalOrCustom(IEEE2008Op, VT))
8596       return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8597   }
8598 
8599   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8600     return DAG.UnrollVectorOp(Node);
8601 
8602   // If only one operand is NaN, override it with another operand.
8603   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8604     LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8605   }
8606   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8607     RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8608   }
8609 
8610   SDValue MinMax =
8611       DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8612   // If MinMax is NaN, let's quiet it.
8613   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
8614       !DAG.isKnownNeverNaN(RHS)) {
8615     MinMax = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
8616   }
8617 
8618   // Fixup signed zero behavior.
8619   if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8620       DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
8621     return MinMax;
8622   }
8623   SDValue TestZero =
8624       DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8625   SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8626                                 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8627   SDValue LCmp = DAG.getSelect(
8628       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8629       MinMax, Flags);
8630   SDValue RCmp = DAG.getSelect(
8631       DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8632       Flags);
8633   return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8634 }
8635 
8636 /// Returns a true value if if this FPClassTest can be performed with an ordered
8637 /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8638 /// std::nullopt if it cannot be performed as a compare with 0.
8639 static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8640                                            const fltSemantics &Semantics,
8641                                            const MachineFunction &MF) {
8642   FPClassTest OrderedMask = Test & ~fcNan;
8643   FPClassTest NanTest = Test & fcNan;
8644   bool IsOrdered = NanTest == fcNone;
8645   bool IsUnordered = NanTest == fcNan;
8646 
8647   // Skip cases that are testing for only a qnan or snan.
8648   if (!IsOrdered && !IsUnordered)
8649     return std::nullopt;
8650 
8651   if (OrderedMask == fcZero &&
8652       MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8653     return IsOrdered;
8654   if (OrderedMask == (fcZero | fcSubnormal) &&
8655       MF.getDenormalMode(Semantics).inputsAreZero())
8656     return IsOrdered;
8657   return std::nullopt;
8658 }
8659 
8660 SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8661                                          const FPClassTest OrigTestMask,
8662                                          SDNodeFlags Flags, const SDLoc &DL,
8663                                          SelectionDAG &DAG) const {
8664   EVT OperandVT = Op.getValueType();
8665   assert(OperandVT.isFloatingPoint());
8666   FPClassTest Test = OrigTestMask;
8667 
8668   // Degenerated cases.
8669   if (Test == fcNone)
8670     return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8671   if (Test == fcAllFlags)
8672     return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8673 
8674   // PPC double double is a pair of doubles, of which the higher part determines
8675   // the value class.
8676   if (OperandVT == MVT::ppcf128) {
8677     Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8678                      DAG.getConstant(1, DL, MVT::i32));
8679     OperandVT = MVT::f64;
8680   }
8681 
8682   // Floating-point type properties.
8683   EVT ScalarFloatVT = OperandVT.getScalarType();
8684   const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8685   const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8686   bool IsF80 = (ScalarFloatVT == MVT::f80);
8687 
8688   // Some checks can be implemented using float comparisons, if floating point
8689   // exceptions are ignored.
8690   if (Flags.hasNoFPExcept() &&
8691       isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8692     FPClassTest FPTestMask = Test;
8693     bool IsInvertedFP = false;
8694 
8695     if (FPClassTest InvertedFPCheck =
8696             invertFPClassTestIfSimpler(FPTestMask, true)) {
8697       FPTestMask = InvertedFPCheck;
8698       IsInvertedFP = true;
8699     }
8700 
8701     ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8702     ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8703 
8704     // See if we can fold an | fcNan into an unordered compare.
8705     FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8706 
8707     // Can't fold the ordered check if we're only testing for snan or qnan
8708     // individually.
8709     if ((FPTestMask & fcNan) != fcNan)
8710       OrderedFPTestMask = FPTestMask;
8711 
8712     const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8713 
8714     if (std::optional<bool> IsCmp0 =
8715             isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8716         IsCmp0 && (isCondCodeLegalOrCustom(
8717                       *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8718                       OperandVT.getScalarType().getSimpleVT()))) {
8719 
8720       // If denormals could be implicitly treated as 0, this is not equivalent
8721       // to a compare with 0 since it will also be true for denormals.
8722       return DAG.getSetCC(DL, ResultVT, Op,
8723                           DAG.getConstantFP(0.0, DL, OperandVT),
8724                           *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8725     }
8726 
8727     if (FPTestMask == fcNan &&
8728         isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
8729                                 OperandVT.getScalarType().getSimpleVT()))
8730       return DAG.getSetCC(DL, ResultVT, Op, Op,
8731                           IsInvertedFP ? ISD::SETO : ISD::SETUO);
8732 
8733     bool IsOrderedInf = FPTestMask == fcInf;
8734     if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8735         isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8736                                              : UnorderedCmpOpcode,
8737                                 OperandVT.getScalarType().getSimpleVT()) &&
8738         isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8739         (isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) ||
8740          (OperandVT.isVector() &&
8741           isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) {
8742       // isinf(x) --> fabs(x) == inf
8743       SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8744       SDValue Inf =
8745           DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8746       return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8747                           IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8748     }
8749 
8750     if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8751         isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8752                                           : UnorderedCmpOpcode,
8753                                 OperandVT.getSimpleVT())) {
8754       // isposinf(x) --> x == inf
8755       // isneginf(x) --> x == -inf
8756       // isposinf(x) || nan --> x u== inf
8757       // isneginf(x) || nan --> x u== -inf
8758 
8759       SDValue Inf = DAG.getConstantFP(
8760           APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
8761           OperandVT);
8762       return DAG.getSetCC(DL, ResultVT, Op, Inf,
8763                           IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8764     }
8765 
8766     if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8767       // TODO: Could handle ordered case, but it produces worse code for
8768       // x86. Maybe handle ordered if fabs is free?
8769 
8770       ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8771       ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8772 
8773       if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8774                                   OperandVT.getScalarType().getSimpleVT())) {
8775         // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8776 
8777         // TODO: Maybe only makes sense if fabs is free. Integer test of
8778         // exponent bits seems better for x86.
8779         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8780         SDValue SmallestNormal = DAG.getConstantFP(
8781             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8782         return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8783                             IsOrdered ? OrderedOp : UnorderedOp);
8784       }
8785     }
8786 
8787     if (FPTestMask == fcNormal) {
8788       // TODO: Handle unordered
8789       ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8790       ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
8791 
8792       if (isCondCodeLegalOrCustom(IsFiniteOp,
8793                                   OperandVT.getScalarType().getSimpleVT()) &&
8794           isCondCodeLegalOrCustom(IsNormalOp,
8795                                   OperandVT.getScalarType().getSimpleVT()) &&
8796           isFAbsFree(OperandVT)) {
8797         // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
8798         SDValue Inf =
8799             DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8800         SDValue SmallestNormal = DAG.getConstantFP(
8801             APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8802 
8803         SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8804         SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
8805         SDValue IsNormal =
8806             DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
8807         unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
8808         return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
8809       }
8810     }
8811   }
8812 
8813   // Some checks may be represented as inversion of simpler check, for example
8814   // "inf|normal|subnormal|zero" => !"nan".
8815   bool IsInverted = false;
8816 
8817   if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8818     Test = InvertedCheck;
8819     IsInverted = true;
8820   }
8821 
8822   // In the general case use integer operations.
8823   unsigned BitSize = OperandVT.getScalarSizeInBits();
8824   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8825   if (OperandVT.isVector())
8826     IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8827                              OperandVT.getVectorElementCount());
8828   SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8829 
8830   // Various masks.
8831   APInt SignBit = APInt::getSignMask(BitSize);
8832   APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
8833   APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8834   const unsigned ExplicitIntBitInF80 = 63;
8835   APInt ExpMask = Inf;
8836   if (IsF80)
8837     ExpMask.clearBit(ExplicitIntBitInF80);
8838   APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8839   APInt QNaNBitMask =
8840       APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8841   APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8842 
8843   SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8844   SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8845   SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8846   SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8847   SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8848   SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8849 
8850   SDValue Res;
8851   const auto appendResult = [&](SDValue PartialRes) {
8852     if (PartialRes) {
8853       if (Res)
8854         Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8855       else
8856         Res = PartialRes;
8857     }
8858   };
8859 
8860   SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8861   const auto getIntBitIsSet = [&]() -> SDValue {
8862     if (!IntBitIsSetV) {
8863       APInt IntBitMask(BitSize, 0);
8864       IntBitMask.setBit(ExplicitIntBitInF80);
8865       SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8866       SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8867       IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8868     }
8869     return IntBitIsSetV;
8870   };
8871 
8872   // Split the value into sign bit and absolute value.
8873   SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8874   SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8875                                DAG.getConstant(0, DL, IntVT), ISD::SETLT);
8876 
8877   // Tests that involve more than one class should be processed first.
8878   SDValue PartialRes;
8879 
8880   if (IsF80)
8881     ; // Detect finite numbers of f80 by checking individual classes because
8882       // they have different settings of the explicit integer bit.
8883   else if ((Test & fcFinite) == fcFinite) {
8884     // finite(V) ==> abs(V) < exp_mask
8885     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8886     Test &= ~fcFinite;
8887   } else if ((Test & fcFinite) == fcPosFinite) {
8888     // finite(V) && V > 0 ==> V < exp_mask
8889     PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8890     Test &= ~fcPosFinite;
8891   } else if ((Test & fcFinite) == fcNegFinite) {
8892     // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8893     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8894     PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8895     Test &= ~fcNegFinite;
8896   }
8897   appendResult(PartialRes);
8898 
8899   if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8900     // fcZero | fcSubnormal => test all exponent bits are 0
8901     // TODO: Handle sign bit specific cases
8902     if (PartialCheck == (fcZero | fcSubnormal)) {
8903       SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8904       SDValue ExpIsZero =
8905           DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8906       appendResult(ExpIsZero);
8907       Test &= ~PartialCheck & fcAllFlags;
8908     }
8909   }
8910 
8911   // Check for individual classes.
8912 
8913   if (unsigned PartialCheck = Test & fcZero) {
8914     if (PartialCheck == fcPosZero)
8915       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8916     else if (PartialCheck == fcZero)
8917       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8918     else // ISD::fcNegZero
8919       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8920     appendResult(PartialRes);
8921   }
8922 
8923   if (unsigned PartialCheck = Test & fcSubnormal) {
8924     // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8925     // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8926     SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8927     SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8928     SDValue VMinusOneV =
8929         DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8930     PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8931     if (PartialCheck == fcNegSubnormal)
8932       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8933     appendResult(PartialRes);
8934   }
8935 
8936   if (unsigned PartialCheck = Test & fcInf) {
8937     if (PartialCheck == fcPosInf)
8938       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8939     else if (PartialCheck == fcInf)
8940       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8941     else { // ISD::fcNegInf
8942       APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8943       SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8944       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8945     }
8946     appendResult(PartialRes);
8947   }
8948 
8949   if (unsigned PartialCheck = Test & fcNan) {
8950     APInt InfWithQnanBit = Inf | QNaNBitMask;
8951     SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8952     if (PartialCheck == fcNan) {
8953       // isnan(V) ==> abs(V) > int(inf)
8954       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8955       if (IsF80) {
8956         // Recognize unsupported values as NaNs for compatibility with glibc.
8957         // In them (exp(V)==0) == int_bit.
8958         SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8959         SDValue ExpIsZero =
8960             DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8961         SDValue IsPseudo =
8962             DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8963         PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8964       }
8965     } else if (PartialCheck == fcQNan) {
8966       // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8967       PartialRes =
8968           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8969     } else { // ISD::fcSNan
8970       // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8971       //                    abs(V) < (unsigned(Inf) | quiet_bit)
8972       SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8973       SDValue IsNotQnan =
8974           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8975       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8976     }
8977     appendResult(PartialRes);
8978   }
8979 
8980   if (unsigned PartialCheck = Test & fcNormal) {
8981     // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8982     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8983     SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8984     SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8985     APInt ExpLimit = ExpMask - ExpLSB;
8986     SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8987     PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8988     if (PartialCheck == fcNegNormal)
8989       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8990     else if (PartialCheck == fcPosNormal) {
8991       SDValue PosSignV =
8992           DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8993       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8994     }
8995     if (IsF80)
8996       PartialRes =
8997           DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8998     appendResult(PartialRes);
8999   }
9000 
9001   if (!Res)
9002     return DAG.getConstant(IsInverted, DL, ResultVT);
9003   if (IsInverted)
9004     Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
9005   return Res;
9006 }
9007 
9008 // Only expand vector types if we have the appropriate vector bit operations.
9009 static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9010   assert(VT.isVector() && "Expected vector type");
9011   unsigned Len = VT.getScalarSizeInBits();
9012   return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9013          TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
9014          TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
9015          (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9016          TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
9017 }
9018 
9019 SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9020   SDLoc dl(Node);
9021   EVT VT = Node->getValueType(0);
9022   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9023   SDValue Op = Node->getOperand(0);
9024   unsigned Len = VT.getScalarSizeInBits();
9025   assert(VT.isInteger() && "CTPOP not implemented for this type.");
9026 
9027   // TODO: Add support for irregular type lengths.
9028   if (!(Len <= 128 && Len % 8 == 0))
9029     return SDValue();
9030 
9031   // Only expand vector types if we have the appropriate vector bit operations.
9032   if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9033     return SDValue();
9034 
9035   // This is the "best" algorithm from
9036   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9037   SDValue Mask55 =
9038       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9039   SDValue Mask33 =
9040       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9041   SDValue Mask0F =
9042       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9043 
9044   // v = v - ((v >> 1) & 0x55555555...)
9045   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9046                    DAG.getNode(ISD::AND, dl, VT,
9047                                DAG.getNode(ISD::SRL, dl, VT, Op,
9048                                            DAG.getConstant(1, dl, ShVT)),
9049                                Mask55));
9050   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9051   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9052                    DAG.getNode(ISD::AND, dl, VT,
9053                                DAG.getNode(ISD::SRL, dl, VT, Op,
9054                                            DAG.getConstant(2, dl, ShVT)),
9055                                Mask33));
9056   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9057   Op = DAG.getNode(ISD::AND, dl, VT,
9058                    DAG.getNode(ISD::ADD, dl, VT, Op,
9059                                DAG.getNode(ISD::SRL, dl, VT, Op,
9060                                            DAG.getConstant(4, dl, ShVT))),
9061                    Mask0F);
9062 
9063   if (Len <= 8)
9064     return Op;
9065 
9066   // Avoid the multiply if we only have 2 bytes to add.
9067   // TODO: Only doing this for scalars because vectors weren't as obviously
9068   // improved.
9069   if (Len == 16 && !VT.isVector()) {
9070     // v = (v + (v >> 8)) & 0x00FF;
9071     return DAG.getNode(ISD::AND, dl, VT,
9072                      DAG.getNode(ISD::ADD, dl, VT, Op,
9073                                  DAG.getNode(ISD::SRL, dl, VT, Op,
9074                                              DAG.getConstant(8, dl, ShVT))),
9075                      DAG.getConstant(0xFF, dl, VT));
9076   }
9077 
9078   // v = (v * 0x01010101...) >> (Len - 8)
9079   SDValue V;
9080   if (isOperationLegalOrCustomOrPromote(
9081           ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9082     SDValue Mask01 =
9083         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9084     V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9085   } else {
9086     V = Op;
9087     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9088       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9089       V = DAG.getNode(ISD::ADD, dl, VT, V,
9090                       DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9091     }
9092   }
9093   return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9094 }
9095 
9096 SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9097   SDLoc dl(Node);
9098   EVT VT = Node->getValueType(0);
9099   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9100   SDValue Op = Node->getOperand(0);
9101   SDValue Mask = Node->getOperand(1);
9102   SDValue VL = Node->getOperand(2);
9103   unsigned Len = VT.getScalarSizeInBits();
9104   assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9105 
9106   // TODO: Add support for irregular type lengths.
9107   if (!(Len <= 128 && Len % 8 == 0))
9108     return SDValue();
9109 
9110   // This is same algorithm of expandCTPOP from
9111   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9112   SDValue Mask55 =
9113       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9114   SDValue Mask33 =
9115       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9116   SDValue Mask0F =
9117       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9118 
9119   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9120 
9121   // v = v - ((v >> 1) & 0x55555555...)
9122   Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9123                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9124                                  DAG.getConstant(1, dl, ShVT), Mask, VL),
9125                      Mask55, Mask, VL);
9126   Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9127 
9128   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9129   Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9130   Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9131                      DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9132                                  DAG.getConstant(2, dl, ShVT), Mask, VL),
9133                      Mask33, Mask, VL);
9134   Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9135 
9136   // v = (v + (v >> 4)) & 0x0F0F0F0F...
9137   Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9138                      Mask, VL),
9139   Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9140   Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9141 
9142   if (Len <= 8)
9143     return Op;
9144 
9145   // v = (v * 0x01010101...) >> (Len - 8)
9146   SDValue V;
9147   if (isOperationLegalOrCustomOrPromote(
9148           ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9149     SDValue Mask01 =
9150         DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9151     V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9152   } else {
9153     V = Op;
9154     for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9155       SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9156       V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9157                       DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9158                       Mask, VL);
9159     }
9160   }
9161   return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9162                      Mask, VL);
9163 }
9164 
9165 SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9166   SDLoc dl(Node);
9167   EVT VT = Node->getValueType(0);
9168   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9169   SDValue Op = Node->getOperand(0);
9170   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9171 
9172   // If the non-ZERO_UNDEF version is supported we can use that instead.
9173   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9174       isOperationLegalOrCustom(ISD::CTLZ, VT))
9175     return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9176 
9177   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9178   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
9179     EVT SetCCVT =
9180         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9181     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9182     SDValue Zero = DAG.getConstant(0, dl, VT);
9183     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9184     return DAG.getSelect(dl, VT, SrcIsZero,
9185                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9186   }
9187 
9188   // Only expand vector types if we have the appropriate vector bit operations.
9189   // This includes the operations needed to expand CTPOP if it isn't supported.
9190   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9191                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9192                          !canExpandVectorCTPOP(*this, VT)) ||
9193                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
9194                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
9195     return SDValue();
9196 
9197   // for now, we do this:
9198   // x = x | (x >> 1);
9199   // x = x | (x >> 2);
9200   // ...
9201   // x = x | (x >>16);
9202   // x = x | (x >>32); // for 64-bit input
9203   // return popcount(~x);
9204   //
9205   // Ref: "Hacker's Delight" by Henry Warren
9206   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9207     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9208     Op = DAG.getNode(ISD::OR, dl, VT, Op,
9209                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9210   }
9211   Op = DAG.getNOT(dl, Op, VT);
9212   return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9213 }
9214 
9215 SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9216   SDLoc dl(Node);
9217   EVT VT = Node->getValueType(0);
9218   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9219   SDValue Op = Node->getOperand(0);
9220   SDValue Mask = Node->getOperand(1);
9221   SDValue VL = Node->getOperand(2);
9222   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9223 
9224   // do this:
9225   // x = x | (x >> 1);
9226   // x = x | (x >> 2);
9227   // ...
9228   // x = x | (x >>16);
9229   // x = x | (x >>32); // for 64-bit input
9230   // return popcount(~x);
9231   for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9232     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9233     Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9234                      DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9235                      VL);
9236   }
9237   Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9238                    Mask, VL);
9239   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9240 }
9241 
9242 SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9243                                         const SDLoc &DL, EVT VT, SDValue Op,
9244                                         unsigned BitWidth) const {
9245   if (BitWidth != 32 && BitWidth != 64)
9246     return SDValue();
9247   APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9248                                   : APInt(64, 0x0218A392CD3D5DBFULL);
9249   const DataLayout &TD = DAG.getDataLayout();
9250   MachinePointerInfo PtrInfo =
9251       MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
9252   unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9253   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9254   SDValue Lookup = DAG.getNode(
9255       ISD::SRL, DL, VT,
9256       DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9257                   DAG.getConstant(DeBruijn, DL, VT)),
9258       DAG.getConstant(ShiftAmt, DL, VT));
9259   Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
9260 
9261   SmallVector<uint8_t> Table(BitWidth, 0);
9262   for (unsigned i = 0; i < BitWidth; i++) {
9263     APInt Shl = DeBruijn.shl(i);
9264     APInt Lshr = Shl.lshr(ShiftAmt);
9265     Table[Lshr.getZExtValue()] = i;
9266   }
9267 
9268   // Create a ConstantArray in Constant Pool
9269   auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9270   SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9271                                       TD.getPrefTypeAlign(CA->getType()));
9272   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9273                                    DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9274                                    PtrInfo, MVT::i8);
9275   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9276     return ExtLoad;
9277 
9278   EVT SetCCVT =
9279       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9280   SDValue Zero = DAG.getConstant(0, DL, VT);
9281   SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9282   return DAG.getSelect(DL, VT, SrcIsZero,
9283                        DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9284 }
9285 
9286 SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9287   SDLoc dl(Node);
9288   EVT VT = Node->getValueType(0);
9289   SDValue Op = Node->getOperand(0);
9290   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9291 
9292   // If the non-ZERO_UNDEF version is supported we can use that instead.
9293   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9294       isOperationLegalOrCustom(ISD::CTTZ, VT))
9295     return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9296 
9297   // If the ZERO_UNDEF version is supported use that and handle the zero case.
9298   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
9299     EVT SetCCVT =
9300         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9301     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9302     SDValue Zero = DAG.getConstant(0, dl, VT);
9303     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9304     return DAG.getSelect(dl, VT, SrcIsZero,
9305                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9306   }
9307 
9308   // Only expand vector types if we have the appropriate vector bit operations.
9309   // This includes the operations needed to expand CTPOP if it isn't supported.
9310   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9311                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9312                          !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
9313                          !canExpandVectorCTPOP(*this, VT)) ||
9314                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
9315                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
9316                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9317     return SDValue();
9318 
9319   // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9320   if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9321       !isOperationLegal(ISD::CTLZ, VT))
9322     if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9323       return V;
9324 
9325   // for now, we use: { return popcount(~x & (x - 1)); }
9326   // unless the target has ctlz but not ctpop, in which case we use:
9327   // { return 32 - nlz(~x & (x-1)); }
9328   // Ref: "Hacker's Delight" by Henry Warren
9329   SDValue Tmp = DAG.getNode(
9330       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9331       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9332 
9333   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9334   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
9335     return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9336                        DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9337   }
9338 
9339   return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9340 }
9341 
9342 SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9343   SDValue Op = Node->getOperand(0);
9344   SDValue Mask = Node->getOperand(1);
9345   SDValue VL = Node->getOperand(2);
9346   SDLoc dl(Node);
9347   EVT VT = Node->getValueType(0);
9348 
9349   // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9350   SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9351                             DAG.getAllOnesConstant(dl, VT), Mask, VL);
9352   SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9353                                  DAG.getConstant(1, dl, VT), Mask, VL);
9354   SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9355   return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9356 }
9357 
9358 SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9359                                              SelectionDAG &DAG) const {
9360   // %cond = to_bool_vec %source
9361   // %splat = splat /*val=*/VL
9362   // %tz = step_vector
9363   // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9364   // %r = vp.reduce.umin %v
9365   SDLoc DL(N);
9366   SDValue Source = N->getOperand(0);
9367   SDValue Mask = N->getOperand(1);
9368   SDValue EVL = N->getOperand(2);
9369   EVT SrcVT = Source.getValueType();
9370   EVT ResVT = N->getValueType(0);
9371   EVT ResVecVT =
9372       EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9373 
9374   // Convert to boolean vector.
9375   if (SrcVT.getScalarType() != MVT::i1) {
9376     SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9377     SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9378                              SrcVT.getVectorElementCount());
9379     Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9380                          DAG.getCondCode(ISD::SETNE), Mask, EVL);
9381   }
9382 
9383   SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9384   SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9385   SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9386   SDValue Select =
9387       DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9388   return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9389 }
9390 
9391 SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9392                                   bool IsNegative) const {
9393   SDLoc dl(N);
9394   EVT VT = N->getValueType(0);
9395   SDValue Op = N->getOperand(0);
9396 
9397   // abs(x) -> smax(x,sub(0,x))
9398   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9399       isOperationLegal(ISD::SMAX, VT)) {
9400     SDValue Zero = DAG.getConstant(0, dl, VT);
9401     Op = DAG.getFreeze(Op);
9402     return DAG.getNode(ISD::SMAX, dl, VT, Op,
9403                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9404   }
9405 
9406   // abs(x) -> umin(x,sub(0,x))
9407   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9408       isOperationLegal(ISD::UMIN, VT)) {
9409     SDValue Zero = DAG.getConstant(0, dl, VT);
9410     Op = DAG.getFreeze(Op);
9411     return DAG.getNode(ISD::UMIN, dl, VT, Op,
9412                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9413   }
9414 
9415   // 0 - abs(x) -> smin(x, sub(0,x))
9416   if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9417       isOperationLegal(ISD::SMIN, VT)) {
9418     SDValue Zero = DAG.getConstant(0, dl, VT);
9419     Op = DAG.getFreeze(Op);
9420     return DAG.getNode(ISD::SMIN, dl, VT, Op,
9421                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9422   }
9423 
9424   // Only expand vector types if we have the appropriate vector operations.
9425   if (VT.isVector() &&
9426       (!isOperationLegalOrCustom(ISD::SRA, VT) ||
9427        (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9428        (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9429        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9430     return SDValue();
9431 
9432   Op = DAG.getFreeze(Op);
9433   SDValue Shift = DAG.getNode(
9434       ISD::SRA, dl, VT, Op,
9435       DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9436   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9437 
9438   // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9439   if (!IsNegative)
9440     return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9441 
9442   // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9443   return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9444 }
9445 
9446 SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9447   SDLoc dl(N);
9448   EVT VT = N->getValueType(0);
9449   SDValue LHS = DAG.getFreeze(N->getOperand(0));
9450   SDValue RHS = DAG.getFreeze(N->getOperand(1));
9451   bool IsSigned = N->getOpcode() == ISD::ABDS;
9452 
9453   // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9454   // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9455   unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9456   unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9457   if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9458     SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9459     SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9460     return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9461   }
9462 
9463   // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9464   if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9465     return DAG.getNode(ISD::OR, dl, VT,
9466                        DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9467                        DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9468 
9469   // If the subtract doesn't overflow then just use abs(sub())
9470   // NOTE: don't use frozen operands for value tracking.
9471   bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9472                        DAG.SignBitIsZero(N->getOperand(0));
9473 
9474   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
9475                              N->getOperand(1)))
9476     return DAG.getNode(ISD::ABS, dl, VT,
9477                        DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9478 
9479   if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
9480                              N->getOperand(0)))
9481     return DAG.getNode(ISD::ABS, dl, VT,
9482                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9483 
9484   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9485   ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9486   SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9487 
9488   // Branchless expansion iff cmp result is allbits:
9489   // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9490   // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9491   if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9492     SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9493     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9494     return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9495   }
9496 
9497   // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9498   // flag if the (scalar) type is illegal as this is more likely to legalize
9499   // cleanly:
9500   // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9501   if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9502     SDValue USubO =
9503         DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9504     SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9505     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9506     return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9507   }
9508 
9509   // FIXME: Should really try to split the vector in case it's legal on a
9510   // subvector.
9511   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9512     return DAG.UnrollVectorOp(N);
9513 
9514   // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9515   // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9516   return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9517                        DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9518 }
9519 
9520 SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
9521   SDLoc dl(N);
9522   EVT VT = N->getValueType(0);
9523   SDValue LHS = N->getOperand(0);
9524   SDValue RHS = N->getOperand(1);
9525 
9526   unsigned Opc = N->getOpcode();
9527   bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9528   bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9529   unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9530   unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9531   unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9532   unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9533   assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9534           Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9535          "Unknown AVG node");
9536 
9537   // If the operands are already extended, we can add+shift.
9538   bool IsExt =
9539       (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9540        DAG.ComputeNumSignBits(RHS) >= 2) ||
9541       (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9542        DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9543   if (IsExt) {
9544     SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9545     if (!IsFloor)
9546       Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9547     return DAG.getNode(ShiftOpc, dl, VT, Sum,
9548                        DAG.getShiftAmountConstant(1, VT, dl));
9549   }
9550 
9551   // For scalars, see if we can efficiently extend/truncate to use add+shift.
9552   if (VT.isScalarInteger()) {
9553     unsigned BW = VT.getScalarSizeInBits();
9554     EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9555     if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9556       LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9557       RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9558       SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9559       if (!IsFloor)
9560         Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9561                           DAG.getConstant(1, dl, ExtVT));
9562       // Just use SRL as we will be truncating away the extended sign bits.
9563       Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9564                         DAG.getShiftAmountConstant(1, ExtVT, dl));
9565       return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9566     }
9567   }
9568 
9569   // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9570   if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9571     SDValue UAddWithOverflow =
9572         DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9573 
9574     SDValue Sum = UAddWithOverflow.getValue(0);
9575     SDValue Overflow = UAddWithOverflow.getValue(1);
9576 
9577     // Right shift the sum by 1
9578     SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9579                                   DAG.getShiftAmountConstant(1, VT, dl));
9580 
9581     SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9582     SDValue OverflowShl = DAG.getNode(
9583         ISD::SHL, dl, VT, ZeroExtOverflow,
9584         DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9585 
9586     return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9587   }
9588 
9589   // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9590   // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9591   // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9592   // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9593   LHS = DAG.getFreeze(LHS);
9594   RHS = DAG.getFreeze(RHS);
9595   SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9596   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9597   SDValue Shift =
9598       DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9599   return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9600 }
9601 
9602 SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9603   SDLoc dl(N);
9604   EVT VT = N->getValueType(0);
9605   SDValue Op = N->getOperand(0);
9606 
9607   if (!VT.isSimple())
9608     return SDValue();
9609 
9610   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9611   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9612   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9613   default:
9614     return SDValue();
9615   case MVT::i16:
9616     // Use a rotate by 8. This can be further expanded if necessary.
9617     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9618   case MVT::i32:
9619     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9620     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9621                        DAG.getConstant(0xFF00, dl, VT));
9622     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9623     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9624     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9625     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9626     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9627     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9628     return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9629   case MVT::i64:
9630     Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9631     Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9632                        DAG.getConstant(255ULL<<8, dl, VT));
9633     Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9634     Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9635                        DAG.getConstant(255ULL<<16, dl, VT));
9636     Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9637     Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9638                        DAG.getConstant(255ULL<<24, dl, VT));
9639     Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9640     Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9641     Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9642                        DAG.getConstant(255ULL<<24, dl, VT));
9643     Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9644     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9645                        DAG.getConstant(255ULL<<16, dl, VT));
9646     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9647     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9648                        DAG.getConstant(255ULL<<8, dl, VT));
9649     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9650     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9651     Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9652     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9653     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9654     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9655     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9656     return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9657   }
9658 }
9659 
9660 SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9661   SDLoc dl(N);
9662   EVT VT = N->getValueType(0);
9663   SDValue Op = N->getOperand(0);
9664   SDValue Mask = N->getOperand(1);
9665   SDValue EVL = N->getOperand(2);
9666 
9667   if (!VT.isSimple())
9668     return SDValue();
9669 
9670   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9671   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9672   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9673   default:
9674     return SDValue();
9675   case MVT::i16:
9676     Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9677                        Mask, EVL);
9678     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9679                        Mask, EVL);
9680     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9681   case MVT::i32:
9682     Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9683                        Mask, EVL);
9684     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9685                        Mask, EVL);
9686     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9687                        Mask, EVL);
9688     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9689                        Mask, EVL);
9690     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9691                        DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9692     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9693                        Mask, EVL);
9694     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9695     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9696     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9697   case MVT::i64:
9698     Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9699                        Mask, EVL);
9700     Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9701                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9702     Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9703                        Mask, EVL);
9704     Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9705                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9706     Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9707                        Mask, EVL);
9708     Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9709                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9710     Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9711                        Mask, EVL);
9712     Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9713                        Mask, EVL);
9714     Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9715                        DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9716     Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9717                        Mask, EVL);
9718     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9719                        DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9720     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9721                        Mask, EVL);
9722     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9723                        DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9724     Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9725                        Mask, EVL);
9726     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9727     Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9728     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9729     Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9730     Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9731     Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9732     return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9733   }
9734 }
9735 
9736 SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9737   SDLoc dl(N);
9738   EVT VT = N->getValueType(0);
9739   SDValue Op = N->getOperand(0);
9740   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9741   unsigned Sz = VT.getScalarSizeInBits();
9742 
9743   SDValue Tmp, Tmp2, Tmp3;
9744 
9745   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9746   // and finally the i1 pairs.
9747   // TODO: We can easily support i4/i2 legal types if any target ever does.
9748   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9749     // Create the masks - repeating the pattern every byte.
9750     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9751     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9752     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9753 
9754     // BSWAP if the type is wider than a single byte.
9755     Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9756 
9757     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9758     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9759     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9760     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9761     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9762     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9763 
9764     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9765     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9766     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9767     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9768     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9769     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9770 
9771     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9772     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9773     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9774     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9775     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9776     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9777     return Tmp;
9778   }
9779 
9780   Tmp = DAG.getConstant(0, dl, VT);
9781   for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9782     if (I < J)
9783       Tmp2 =
9784           DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9785     else
9786       Tmp2 =
9787           DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9788 
9789     APInt Shift = APInt::getOneBitSet(Sz, J);
9790     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9791     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9792   }
9793 
9794   return Tmp;
9795 }
9796 
9797 SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9798   assert(N->getOpcode() == ISD::VP_BITREVERSE);
9799 
9800   SDLoc dl(N);
9801   EVT VT = N->getValueType(0);
9802   SDValue Op = N->getOperand(0);
9803   SDValue Mask = N->getOperand(1);
9804   SDValue EVL = N->getOperand(2);
9805   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9806   unsigned Sz = VT.getScalarSizeInBits();
9807 
9808   SDValue Tmp, Tmp2, Tmp3;
9809 
9810   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9811   // and finally the i1 pairs.
9812   // TODO: We can easily support i4/i2 legal types if any target ever does.
9813   if (Sz >= 8 && isPowerOf2_32(Sz)) {
9814     // Create the masks - repeating the pattern every byte.
9815     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9816     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9817     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9818 
9819     // BSWAP if the type is wider than a single byte.
9820     Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9821 
9822     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9823     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9824                        Mask, EVL);
9825     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9826                        DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9827     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9828                        Mask, EVL);
9829     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9830                        Mask, EVL);
9831     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9832 
9833     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9834     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9835                        Mask, EVL);
9836     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9837                        DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9838     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9839                        Mask, EVL);
9840     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9841                        Mask, EVL);
9842     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9843 
9844     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9845     Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9846                        Mask, EVL);
9847     Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9848                        DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9849     Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9850                        Mask, EVL);
9851     Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9852                        Mask, EVL);
9853     Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9854     return Tmp;
9855   }
9856   return SDValue();
9857 }
9858 
9859 std::pair<SDValue, SDValue>
9860 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9861                                     SelectionDAG &DAG) const {
9862   SDLoc SL(LD);
9863   SDValue Chain = LD->getChain();
9864   SDValue BasePTR = LD->getBasePtr();
9865   EVT SrcVT = LD->getMemoryVT();
9866   EVT DstVT = LD->getValueType(0);
9867   ISD::LoadExtType ExtType = LD->getExtensionType();
9868 
9869   if (SrcVT.isScalableVector())
9870     report_fatal_error("Cannot scalarize scalable vector loads");
9871 
9872   unsigned NumElem = SrcVT.getVectorNumElements();
9873 
9874   EVT SrcEltVT = SrcVT.getScalarType();
9875   EVT DstEltVT = DstVT.getScalarType();
9876 
9877   // A vector must always be stored in memory as-is, i.e. without any padding
9878   // between the elements, since various code depend on it, e.g. in the
9879   // handling of a bitcast of a vector type to int, which may be done with a
9880   // vector store followed by an integer load. A vector that does not have
9881   // elements that are byte-sized must therefore be stored as an integer
9882   // built out of the extracted vector elements.
9883   if (!SrcEltVT.isByteSized()) {
9884     unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9885     EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9886 
9887     unsigned NumSrcBits = SrcVT.getSizeInBits();
9888     EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9889 
9890     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9891     SDValue SrcEltBitMask = DAG.getConstant(
9892         APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9893 
9894     // Load the whole vector and avoid masking off the top bits as it makes
9895     // the codegen worse.
9896     SDValue Load =
9897         DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9898                        LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9899                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
9900 
9901     SmallVector<SDValue, 8> Vals;
9902     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9903       unsigned ShiftIntoIdx =
9904           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9905       SDValue ShiftAmount = DAG.getShiftAmountConstant(
9906           ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
9907       SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9908       SDValue Elt =
9909           DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9910       SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9911 
9912       if (ExtType != ISD::NON_EXTLOAD) {
9913         unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9914         Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9915       }
9916 
9917       Vals.push_back(Scalar);
9918     }
9919 
9920     SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9921     return std::make_pair(Value, Load.getValue(1));
9922   }
9923 
9924   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9925   assert(SrcEltVT.isByteSized());
9926 
9927   SmallVector<SDValue, 8> Vals;
9928   SmallVector<SDValue, 8> LoadChains;
9929 
9930   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9931     SDValue ScalarLoad =
9932         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9933                        LD->getPointerInfo().getWithOffset(Idx * Stride),
9934                        SrcEltVT, LD->getOriginalAlign(),
9935                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
9936 
9937     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9938 
9939     Vals.push_back(ScalarLoad.getValue(0));
9940     LoadChains.push_back(ScalarLoad.getValue(1));
9941   }
9942 
9943   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9944   SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9945 
9946   return std::make_pair(Value, NewChain);
9947 }
9948 
9949 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9950                                              SelectionDAG &DAG) const {
9951   SDLoc SL(ST);
9952 
9953   SDValue Chain = ST->getChain();
9954   SDValue BasePtr = ST->getBasePtr();
9955   SDValue Value = ST->getValue();
9956   EVT StVT = ST->getMemoryVT();
9957 
9958   if (StVT.isScalableVector())
9959     report_fatal_error("Cannot scalarize scalable vector stores");
9960 
9961   // The type of the data we want to save
9962   EVT RegVT = Value.getValueType();
9963   EVT RegSclVT = RegVT.getScalarType();
9964 
9965   // The type of data as saved in memory.
9966   EVT MemSclVT = StVT.getScalarType();
9967 
9968   unsigned NumElem = StVT.getVectorNumElements();
9969 
9970   // A vector must always be stored in memory as-is, i.e. without any padding
9971   // between the elements, since various code depend on it, e.g. in the
9972   // handling of a bitcast of a vector type to int, which may be done with a
9973   // vector store followed by an integer load. A vector that does not have
9974   // elements that are byte-sized must therefore be stored as an integer
9975   // built out of the extracted vector elements.
9976   if (!MemSclVT.isByteSized()) {
9977     unsigned NumBits = StVT.getSizeInBits();
9978     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9979 
9980     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9981 
9982     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9983       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9984                                 DAG.getVectorIdxConstant(Idx, SL));
9985       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
9986       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
9987       unsigned ShiftIntoIdx =
9988           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9989       SDValue ShiftAmount =
9990           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9991       SDValue ShiftedElt =
9992           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9993       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9994     }
9995 
9996     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9997                         ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9998                         ST->getAAInfo());
9999   }
10000 
10001   // Store Stride in bytes
10002   unsigned Stride = MemSclVT.getSizeInBits() / 8;
10003   assert(Stride && "Zero stride!");
10004   // Extract each of the elements from the original vector and save them into
10005   // memory individually.
10006   SmallVector<SDValue, 8> Stores;
10007   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10008     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10009                               DAG.getVectorIdxConstant(Idx, SL));
10010 
10011     SDValue Ptr =
10012         DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10013 
10014     // This scalar TruncStore may be illegal, but we legalize it later.
10015     SDValue Store = DAG.getTruncStore(
10016         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10017         MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10018         ST->getAAInfo());
10019 
10020     Stores.push_back(Store);
10021   }
10022 
10023   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10024 }
10025 
10026 std::pair<SDValue, SDValue>
10027 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
10028   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10029          "unaligned indexed loads not implemented!");
10030   SDValue Chain = LD->getChain();
10031   SDValue Ptr = LD->getBasePtr();
10032   EVT VT = LD->getValueType(0);
10033   EVT LoadedVT = LD->getMemoryVT();
10034   SDLoc dl(LD);
10035   auto &MF = DAG.getMachineFunction();
10036 
10037   if (VT.isFloatingPoint() || VT.isVector()) {
10038     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10039     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10040       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10041           LoadedVT.isVector()) {
10042         // Scalarize the load and let the individual components be handled.
10043         return scalarizeVectorLoad(LD, DAG);
10044       }
10045 
10046       // Expand to a (misaligned) integer load of the same size,
10047       // then bitconvert to floating point or vector.
10048       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10049                                     LD->getMemOperand());
10050       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10051       if (LoadedVT != VT)
10052         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10053                              ISD::ANY_EXTEND, dl, VT, Result);
10054 
10055       return std::make_pair(Result, newLoad.getValue(1));
10056     }
10057 
10058     // Copy the value to a (aligned) stack slot using (unaligned) integer
10059     // loads and stores, then do a (aligned) load from the stack slot.
10060     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10061     unsigned LoadedBytes = LoadedVT.getStoreSize();
10062     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10063     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10064 
10065     // Make sure the stack slot is also aligned for the register type.
10066     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10067     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10068     SmallVector<SDValue, 8> Stores;
10069     SDValue StackPtr = StackBase;
10070     unsigned Offset = 0;
10071 
10072     EVT PtrVT = Ptr.getValueType();
10073     EVT StackPtrVT = StackPtr.getValueType();
10074 
10075     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10076     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10077 
10078     // Do all but one copies using the full register width.
10079     for (unsigned i = 1; i < NumRegs; i++) {
10080       // Load one integer register's worth from the original location.
10081       SDValue Load = DAG.getLoad(
10082           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10083           LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10084           LD->getAAInfo());
10085       // Follow the load with a store to the stack slot.  Remember the store.
10086       Stores.push_back(DAG.getStore(
10087           Load.getValue(1), dl, Load, StackPtr,
10088           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10089       // Increment the pointers.
10090       Offset += RegBytes;
10091 
10092       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10093       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10094     }
10095 
10096     // The last copy may be partial.  Do an extending load.
10097     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10098                                   8 * (LoadedBytes - Offset));
10099     SDValue Load =
10100         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10101                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
10102                        LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10103                        LD->getAAInfo());
10104     // Follow the load with a store to the stack slot.  Remember the store.
10105     // On big-endian machines this requires a truncating store to ensure
10106     // that the bits end up in the right place.
10107     Stores.push_back(DAG.getTruncStore(
10108         Load.getValue(1), dl, Load, StackPtr,
10109         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10110 
10111     // The order of the stores doesn't matter - say it with a TokenFactor.
10112     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10113 
10114     // Finally, perform the original load only redirected to the stack slot.
10115     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10116                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10117                           LoadedVT);
10118 
10119     // Callers expect a MERGE_VALUES node.
10120     return std::make_pair(Load, TF);
10121   }
10122 
10123   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10124          "Unaligned load of unsupported type.");
10125 
10126   // Compute the new VT that is half the size of the old one.  This is an
10127   // integer MVT.
10128   unsigned NumBits = LoadedVT.getSizeInBits();
10129   EVT NewLoadedVT;
10130   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10131   NumBits >>= 1;
10132 
10133   Align Alignment = LD->getOriginalAlign();
10134   unsigned IncrementSize = NumBits / 8;
10135   ISD::LoadExtType HiExtType = LD->getExtensionType();
10136 
10137   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10138   if (HiExtType == ISD::NON_EXTLOAD)
10139     HiExtType = ISD::ZEXTLOAD;
10140 
10141   // Load the value in two parts
10142   SDValue Lo, Hi;
10143   if (DAG.getDataLayout().isLittleEndian()) {
10144     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10145                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10146                         LD->getAAInfo());
10147 
10148     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10149     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10150                         LD->getPointerInfo().getWithOffset(IncrementSize),
10151                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10152                         LD->getAAInfo());
10153   } else {
10154     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10155                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10156                         LD->getAAInfo());
10157 
10158     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10159     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10160                         LD->getPointerInfo().getWithOffset(IncrementSize),
10161                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10162                         LD->getAAInfo());
10163   }
10164 
10165   // aggregate the two parts
10166   SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10167   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10168   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10169 
10170   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10171                              Hi.getValue(1));
10172 
10173   return std::make_pair(Result, TF);
10174 }
10175 
10176 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10177                                              SelectionDAG &DAG) const {
10178   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10179          "unaligned indexed stores not implemented!");
10180   SDValue Chain = ST->getChain();
10181   SDValue Ptr = ST->getBasePtr();
10182   SDValue Val = ST->getValue();
10183   EVT VT = Val.getValueType();
10184   Align Alignment = ST->getOriginalAlign();
10185   auto &MF = DAG.getMachineFunction();
10186   EVT StoreMemVT = ST->getMemoryVT();
10187 
10188   SDLoc dl(ST);
10189   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10190     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10191     if (isTypeLegal(intVT)) {
10192       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10193           StoreMemVT.isVector()) {
10194         // Scalarize the store and let the individual components be handled.
10195         SDValue Result = scalarizeVectorStore(ST, DAG);
10196         return Result;
10197       }
10198       // Expand to a bitconvert of the value to the integer type of the
10199       // same size, then a (misaligned) int store.
10200       // FIXME: Does not handle truncating floating point stores!
10201       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10202       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10203                             Alignment, ST->getMemOperand()->getFlags());
10204       return Result;
10205     }
10206     // Do a (aligned) store to a stack slot, then copy from the stack slot
10207     // to the final destination using (unaligned) integer loads and stores.
10208     MVT RegVT = getRegisterType(
10209         *DAG.getContext(),
10210         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10211     EVT PtrVT = Ptr.getValueType();
10212     unsigned StoredBytes = StoreMemVT.getStoreSize();
10213     unsigned RegBytes = RegVT.getSizeInBits() / 8;
10214     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10215 
10216     // Make sure the stack slot is also aligned for the register type.
10217     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10218     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10219 
10220     // Perform the original store, only redirected to the stack slot.
10221     SDValue Store = DAG.getTruncStore(
10222         Chain, dl, Val, StackPtr,
10223         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10224 
10225     EVT StackPtrVT = StackPtr.getValueType();
10226 
10227     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10228     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10229     SmallVector<SDValue, 8> Stores;
10230     unsigned Offset = 0;
10231 
10232     // Do all but one copies using the full register width.
10233     for (unsigned i = 1; i < NumRegs; i++) {
10234       // Load one integer register's worth from the stack slot.
10235       SDValue Load = DAG.getLoad(
10236           RegVT, dl, Store, StackPtr,
10237           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10238       // Store it to the final location.  Remember the store.
10239       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10240                                     ST->getPointerInfo().getWithOffset(Offset),
10241                                     ST->getOriginalAlign(),
10242                                     ST->getMemOperand()->getFlags()));
10243       // Increment the pointers.
10244       Offset += RegBytes;
10245       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10246       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10247     }
10248 
10249     // The last store may be partial.  Do a truncating store.  On big-endian
10250     // machines this requires an extending load from the stack slot to ensure
10251     // that the bits are in the right place.
10252     EVT LoadMemVT =
10253         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10254 
10255     // Load from the stack slot.
10256     SDValue Load = DAG.getExtLoad(
10257         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10258         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10259 
10260     Stores.push_back(
10261         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10262                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10263                           ST->getOriginalAlign(),
10264                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10265     // The order of the stores doesn't matter - say it with a TokenFactor.
10266     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10267     return Result;
10268   }
10269 
10270   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10271          "Unaligned store of unknown type.");
10272   // Get the half-size VT
10273   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10274   unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10275   unsigned IncrementSize = NumBits / 8;
10276 
10277   // Divide the stored value in two parts.
10278   SDValue ShiftAmount =
10279       DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10280   SDValue Lo = Val;
10281   // If Val is a constant, replace the upper bits with 0. The SRL will constant
10282   // fold and not use the upper bits. A smaller constant may be easier to
10283   // materialize.
10284   if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10285     Lo = DAG.getNode(
10286         ISD::AND, dl, VT, Lo,
10287         DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10288                         VT));
10289   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10290 
10291   // Store the two parts
10292   SDValue Store1, Store2;
10293   Store1 = DAG.getTruncStore(Chain, dl,
10294                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10295                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10296                              ST->getMemOperand()->getFlags());
10297 
10298   Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10299   Store2 = DAG.getTruncStore(
10300       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10301       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10302       ST->getMemOperand()->getFlags(), ST->getAAInfo());
10303 
10304   SDValue Result =
10305       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10306   return Result;
10307 }
10308 
10309 SDValue
10310 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10311                                        const SDLoc &DL, EVT DataVT,
10312                                        SelectionDAG &DAG,
10313                                        bool IsCompressedMemory) const {
10314   SDValue Increment;
10315   EVT AddrVT = Addr.getValueType();
10316   EVT MaskVT = Mask.getValueType();
10317   assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10318          "Incompatible types of Data and Mask");
10319   if (IsCompressedMemory) {
10320     if (DataVT.isScalableVector())
10321       report_fatal_error(
10322           "Cannot currently handle compressed memory with scalable vectors");
10323     // Incrementing the pointer according to number of '1's in the mask.
10324     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10325     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10326     if (MaskIntVT.getSizeInBits() < 32) {
10327       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10328       MaskIntVT = MVT::i32;
10329     }
10330 
10331     // Count '1's with POPCNT.
10332     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10333     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10334     // Scale is an element size in bytes.
10335     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10336                                     AddrVT);
10337     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10338   } else if (DataVT.isScalableVector()) {
10339     Increment = DAG.getVScale(DL, AddrVT,
10340                               APInt(AddrVT.getFixedSizeInBits(),
10341                                     DataVT.getStoreSize().getKnownMinValue()));
10342   } else
10343     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10344 
10345   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10346 }
10347 
10348 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10349                                        EVT VecVT, const SDLoc &dl,
10350                                        ElementCount SubEC) {
10351   assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10352          "Cannot index a scalable vector within a fixed-width vector");
10353 
10354   unsigned NElts = VecVT.getVectorMinNumElements();
10355   unsigned NumSubElts = SubEC.getKnownMinValue();
10356   EVT IdxVT = Idx.getValueType();
10357 
10358   if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10359     // If this is a constant index and we know the value plus the number of the
10360     // elements in the subvector minus one is less than the minimum number of
10361     // elements then it's safe to return Idx.
10362     if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10363       if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10364         return Idx;
10365     SDValue VS =
10366         DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10367     unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10368     SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10369                               DAG.getConstant(NumSubElts, dl, IdxVT));
10370     return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10371   }
10372   if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10373     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10374     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10375                        DAG.getConstant(Imm, dl, IdxVT));
10376   }
10377   unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10378   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10379                      DAG.getConstant(MaxIndex, dl, IdxVT));
10380 }
10381 
10382 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
10383                                                 SDValue VecPtr, EVT VecVT,
10384                                                 SDValue Index) const {
10385   return getVectorSubVecPointer(
10386       DAG, VecPtr, VecVT,
10387       EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
10388       Index);
10389 }
10390 
10391 SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
10392                                                SDValue VecPtr, EVT VecVT,
10393                                                EVT SubVecVT,
10394                                                SDValue Index) const {
10395   SDLoc dl(Index);
10396   // Make sure the index type is big enough to compute in.
10397   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10398 
10399   EVT EltVT = VecVT.getVectorElementType();
10400 
10401   // Calculate the element offset and add it to the pointer.
10402   unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10403   assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10404          "Converting bits to bytes lost precision");
10405   assert(SubVecVT.getVectorElementType() == EltVT &&
10406          "Sub-vector must be a vector with matching element type");
10407   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10408                                   SubVecVT.getVectorElementCount());
10409 
10410   EVT IdxVT = Index.getValueType();
10411   if (SubVecVT.isScalableVector())
10412     Index =
10413         DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10414                     DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10415 
10416   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10417                       DAG.getConstant(EltSize, dl, IdxVT));
10418   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10419 }
10420 
10421 //===----------------------------------------------------------------------===//
10422 // Implementation of Emulated TLS Model
10423 //===----------------------------------------------------------------------===//
10424 
10425 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10426                                                 SelectionDAG &DAG) const {
10427   // Access to address of TLS varialbe xyz is lowered to a function call:
10428   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10429   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10430   PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10431   SDLoc dl(GA);
10432 
10433   ArgListTy Args;
10434   ArgListEntry Entry;
10435   const GlobalValue *GV =
10436       cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10437   SmallString<32> NameString("__emutls_v.");
10438   NameString += GV->getName();
10439   StringRef EmuTlsVarName(NameString);
10440   const GlobalVariable *EmuTlsVar =
10441       GV->getParent()->getNamedGlobal(EmuTlsVarName);
10442   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10443   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10444   Entry.Ty = VoidPtrType;
10445   Args.push_back(Entry);
10446 
10447   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10448 
10449   TargetLowering::CallLoweringInfo CLI(DAG);
10450   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10451   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10452   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10453 
10454   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10455   // At last for X86 targets, maybe good for other targets too?
10456   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10457   MFI.setAdjustsStack(true); // Is this only for X86 target?
10458   MFI.setHasCalls(true);
10459 
10460   assert((GA->getOffset() == 0) &&
10461          "Emulated TLS must have zero offset in GlobalAddressSDNode");
10462   return CallResult.first;
10463 }
10464 
10465 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10466                                                 SelectionDAG &DAG) const {
10467   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10468   if (!isCtlzFast())
10469     return SDValue();
10470   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10471   SDLoc dl(Op);
10472   if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10473     EVT VT = Op.getOperand(0).getValueType();
10474     SDValue Zext = Op.getOperand(0);
10475     if (VT.bitsLT(MVT::i32)) {
10476       VT = MVT::i32;
10477       Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10478     }
10479     unsigned Log2b = Log2_32(VT.getSizeInBits());
10480     SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10481     SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10482                               DAG.getConstant(Log2b, dl, MVT::i32));
10483     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10484   }
10485   return SDValue();
10486 }
10487 
10488 SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
10489   SDValue Op0 = Node->getOperand(0);
10490   SDValue Op1 = Node->getOperand(1);
10491   EVT VT = Op0.getValueType();
10492   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10493   unsigned Opcode = Node->getOpcode();
10494   SDLoc DL(Node);
10495 
10496   // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10497   if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10498       getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10499     Op0 = DAG.getFreeze(Op0);
10500     SDValue Zero = DAG.getConstant(0, DL, VT);
10501     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10502                        DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10503   }
10504 
10505   // umin(x,y) -> sub(x,usubsat(x,y))
10506   // TODO: Missing freeze(Op0)?
10507   if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10508       isOperationLegal(ISD::USUBSAT, VT)) {
10509     return DAG.getNode(ISD::SUB, DL, VT, Op0,
10510                        DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10511   }
10512 
10513   // umax(x,y) -> add(x,usubsat(y,x))
10514   // TODO: Missing freeze(Op0)?
10515   if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10516       isOperationLegal(ISD::USUBSAT, VT)) {
10517     return DAG.getNode(ISD::ADD, DL, VT, Op0,
10518                        DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10519   }
10520 
10521   // FIXME: Should really try to split the vector in case it's legal on a
10522   // subvector.
10523   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10524     return DAG.UnrollVectorOp(Node);
10525 
10526   // Attempt to find an existing SETCC node that we can reuse.
10527   // TODO: Do we need a generic doesSETCCNodeExist?
10528   // TODO: Missing freeze(Op0)/freeze(Op1)?
10529   auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10530                          ISD::CondCode PrefCommuteCC,
10531                          ISD::CondCode AltCommuteCC) {
10532     SDVTList BoolVTList = DAG.getVTList(BoolVT);
10533     for (ISD::CondCode CC : {PrefCC, AltCC}) {
10534       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10535                             {Op0, Op1, DAG.getCondCode(CC)})) {
10536         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10537         return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10538       }
10539     }
10540     for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10541       if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10542                             {Op0, Op1, DAG.getCondCode(CC)})) {
10543         SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10544         return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10545       }
10546     }
10547     SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10548     return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10549   };
10550 
10551   // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10552   //                      -> Y = (A < B) ? B : A
10553   //                      -> Y = (A >= B) ? A : B
10554   //                      -> Y = (A <= B) ? B : A
10555   switch (Opcode) {
10556   case ISD::SMAX:
10557     return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10558   case ISD::SMIN:
10559     return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10560   case ISD::UMAX:
10561     return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10562   case ISD::UMIN:
10563     return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10564   }
10565 
10566   llvm_unreachable("How did we get here?");
10567 }
10568 
10569 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
10570   unsigned Opcode = Node->getOpcode();
10571   SDValue LHS = Node->getOperand(0);
10572   SDValue RHS = Node->getOperand(1);
10573   EVT VT = LHS.getValueType();
10574   SDLoc dl(Node);
10575 
10576   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10577   assert(VT.isInteger() && "Expected operands to be integers");
10578 
10579   // usub.sat(a, b) -> umax(a, b) - b
10580   if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10581     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10582     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10583   }
10584 
10585   // uadd.sat(a, b) -> umin(a, ~b) + b
10586   if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10587     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10588     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10589     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10590   }
10591 
10592   unsigned OverflowOp;
10593   switch (Opcode) {
10594   case ISD::SADDSAT:
10595     OverflowOp = ISD::SADDO;
10596     break;
10597   case ISD::UADDSAT:
10598     OverflowOp = ISD::UADDO;
10599     break;
10600   case ISD::SSUBSAT:
10601     OverflowOp = ISD::SSUBO;
10602     break;
10603   case ISD::USUBSAT:
10604     OverflowOp = ISD::USUBO;
10605     break;
10606   default:
10607     llvm_unreachable("Expected method to receive signed or unsigned saturation "
10608                      "addition or subtraction node.");
10609   }
10610 
10611   // FIXME: Should really try to split the vector in case it's legal on a
10612   // subvector.
10613   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10614     return DAG.UnrollVectorOp(Node);
10615 
10616   unsigned BitWidth = LHS.getScalarValueSizeInBits();
10617   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10618   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10619   SDValue SumDiff = Result.getValue(0);
10620   SDValue Overflow = Result.getValue(1);
10621   SDValue Zero = DAG.getConstant(0, dl, VT);
10622   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10623 
10624   if (Opcode == ISD::UADDSAT) {
10625     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10626       // (LHS + RHS) | OverflowMask
10627       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10628       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10629     }
10630     // Overflow ? 0xffff.... : (LHS + RHS)
10631     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10632   }
10633 
10634   if (Opcode == ISD::USUBSAT) {
10635     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10636       // (LHS - RHS) & ~OverflowMask
10637       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10638       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10639       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10640     }
10641     // Overflow ? 0 : (LHS - RHS)
10642     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10643   }
10644 
10645   if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10646     APInt MinVal = APInt::getSignedMinValue(BitWidth);
10647     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
10648 
10649     KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10650     KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10651 
10652     // If either of the operand signs are known, then they are guaranteed to
10653     // only saturate in one direction. If non-negative they will saturate
10654     // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10655     //
10656     // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10657     // sign of 'y' has to be flipped.
10658 
10659     bool LHSIsNonNegative = KnownLHS.isNonNegative();
10660     bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10661                                                    : KnownRHS.isNegative();
10662     if (LHSIsNonNegative || RHSIsNonNegative) {
10663       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10664       return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10665     }
10666 
10667     bool LHSIsNegative = KnownLHS.isNegative();
10668     bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10669                                                 : KnownRHS.isNonNegative();
10670     if (LHSIsNegative || RHSIsNegative) {
10671       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10672       return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10673     }
10674   }
10675 
10676   // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10677   APInt MinVal = APInt::getSignedMinValue(BitWidth);
10678   SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10679   SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10680                               DAG.getConstant(BitWidth - 1, dl, VT));
10681   Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10682   return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10683 }
10684 
10685 SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
10686   unsigned Opcode = Node->getOpcode();
10687   SDValue LHS = Node->getOperand(0);
10688   SDValue RHS = Node->getOperand(1);
10689   EVT VT = LHS.getValueType();
10690   EVT ResVT = Node->getValueType(0);
10691   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10692   SDLoc dl(Node);
10693 
10694   auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10695   auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10696   SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10697   SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10698 
10699   // We can't perform arithmetic on i1 values. Extending them would
10700   // probably result in worse codegen, so let's just use two selects instead.
10701   // Some targets are also just better off using selects rather than subtraction
10702   // because one of the conditions can be merged with one of the selects.
10703   // And finally, if we don't know the contents of high bits of a boolean value
10704   // we can't perform any arithmetic either.
10705   if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10706       getBooleanContents(BoolVT) == UndefinedBooleanContent) {
10707     SDValue SelectZeroOrOne =
10708         DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10709                       DAG.getConstant(0, dl, ResVT));
10710     return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10711                          SelectZeroOrOne);
10712   }
10713 
10714   if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent)
10715     std::swap(IsGT, IsLT);
10716   return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10717                             ResVT);
10718 }
10719 
10720 SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10721   unsigned Opcode = Node->getOpcode();
10722   bool IsSigned = Opcode == ISD::SSHLSAT;
10723   SDValue LHS = Node->getOperand(0);
10724   SDValue RHS = Node->getOperand(1);
10725   EVT VT = LHS.getValueType();
10726   SDLoc dl(Node);
10727 
10728   assert((Node->getOpcode() == ISD::SSHLSAT ||
10729           Node->getOpcode() == ISD::USHLSAT) &&
10730           "Expected a SHLSAT opcode");
10731   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10732   assert(VT.isInteger() && "Expected operands to be integers");
10733 
10734   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10735     return DAG.UnrollVectorOp(Node);
10736 
10737   // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10738 
10739   unsigned BW = VT.getScalarSizeInBits();
10740   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10741   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10742   SDValue Orig =
10743       DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10744 
10745   SDValue SatVal;
10746   if (IsSigned) {
10747     SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10748     SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10749     SDValue Cond =
10750         DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10751     SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10752   } else {
10753     SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10754   }
10755   SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10756   return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10757 }
10758 
10759 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10760                                         bool Signed, EVT WideVT,
10761                                         const SDValue LL, const SDValue LH,
10762                                         const SDValue RL, const SDValue RH,
10763                                         SDValue &Lo, SDValue &Hi) const {
10764   // We can fall back to a libcall with an illegal type for the MUL if we
10765   // have a libcall big enough.
10766   // Also, we can fall back to a division in some cases, but that's a big
10767   // performance hit in the general case.
10768   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10769   if (WideVT == MVT::i16)
10770     LC = RTLIB::MUL_I16;
10771   else if (WideVT == MVT::i32)
10772     LC = RTLIB::MUL_I32;
10773   else if (WideVT == MVT::i64)
10774     LC = RTLIB::MUL_I64;
10775   else if (WideVT == MVT::i128)
10776     LC = RTLIB::MUL_I128;
10777 
10778   if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10779     // We'll expand the multiplication by brute force because we have no other
10780     // options. This is a trivially-generalized version of the code from
10781     // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10782     // 4.3.1).
10783     EVT VT = LL.getValueType();
10784     unsigned Bits = VT.getSizeInBits();
10785     unsigned HalfBits = Bits >> 1;
10786     SDValue Mask =
10787         DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10788     SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10789     SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10790 
10791     SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10792     SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10793 
10794     SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10795     SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10796     SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10797     SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10798 
10799     SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10800                             DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10801     SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10802     SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10803 
10804     SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10805                             DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10806     SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10807 
10808     SDValue W =
10809         DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10810                     DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10811     Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10812                      DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10813 
10814     Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10815                      DAG.getNode(ISD::ADD, dl, VT,
10816                                  DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10817                                  DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10818   } else {
10819     // Attempt a libcall.
10820     SDValue Ret;
10821     TargetLowering::MakeLibCallOptions CallOptions;
10822     CallOptions.setSExt(Signed);
10823     CallOptions.setIsPostTypeLegalization(true);
10824     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10825       // Halves of WideVT are packed into registers in different order
10826       // depending on platform endianness. This is usually handled by
10827       // the C calling convention, but we can't defer to it in
10828       // the legalizer.
10829       SDValue Args[] = {LL, LH, RL, RH};
10830       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10831     } else {
10832       SDValue Args[] = {LH, LL, RH, RL};
10833       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10834     }
10835     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10836            "Ret value is a collection of constituent nodes holding result.");
10837     if (DAG.getDataLayout().isLittleEndian()) {
10838       // Same as above.
10839       Lo = Ret.getOperand(0);
10840       Hi = Ret.getOperand(1);
10841     } else {
10842       Lo = Ret.getOperand(1);
10843       Hi = Ret.getOperand(0);
10844     }
10845   }
10846 }
10847 
10848 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10849                                         bool Signed, const SDValue LHS,
10850                                         const SDValue RHS, SDValue &Lo,
10851                                         SDValue &Hi) const {
10852   EVT VT = LHS.getValueType();
10853   assert(RHS.getValueType() == VT && "Mismatching operand types");
10854 
10855   SDValue HiLHS;
10856   SDValue HiRHS;
10857   if (Signed) {
10858     // The high part is obtained by SRA'ing all but one of the bits of low
10859     // part.
10860     unsigned LoSize = VT.getFixedSizeInBits();
10861     HiLHS = DAG.getNode(
10862         ISD::SRA, dl, VT, LHS,
10863         DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10864     HiRHS = DAG.getNode(
10865         ISD::SRA, dl, VT, RHS,
10866         DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10867   } else {
10868     HiLHS = DAG.getConstant(0, dl, VT);
10869     HiRHS = DAG.getConstant(0, dl, VT);
10870   }
10871   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10872   forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10873 }
10874 
10875 SDValue
10876 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
10877   assert((Node->getOpcode() == ISD::SMULFIX ||
10878           Node->getOpcode() == ISD::UMULFIX ||
10879           Node->getOpcode() == ISD::SMULFIXSAT ||
10880           Node->getOpcode() == ISD::UMULFIXSAT) &&
10881          "Expected a fixed point multiplication opcode");
10882 
10883   SDLoc dl(Node);
10884   SDValue LHS = Node->getOperand(0);
10885   SDValue RHS = Node->getOperand(1);
10886   EVT VT = LHS.getValueType();
10887   unsigned Scale = Node->getConstantOperandVal(2);
10888   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10889                      Node->getOpcode() == ISD::UMULFIXSAT);
10890   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10891                  Node->getOpcode() == ISD::SMULFIXSAT);
10892   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10893   unsigned VTSize = VT.getScalarSizeInBits();
10894 
10895   if (!Scale) {
10896     // [us]mul.fix(a, b, 0) -> mul(a, b)
10897     if (!Saturating) {
10898       if (isOperationLegalOrCustom(ISD::MUL, VT))
10899         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10900     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10901       SDValue Result =
10902           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10903       SDValue Product = Result.getValue(0);
10904       SDValue Overflow = Result.getValue(1);
10905       SDValue Zero = DAG.getConstant(0, dl, VT);
10906 
10907       APInt MinVal = APInt::getSignedMinValue(VTSize);
10908       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10909       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10910       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10911       // Xor the inputs, if resulting sign bit is 0 the product will be
10912       // positive, else negative.
10913       SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10914       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10915       Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10916       return DAG.getSelect(dl, VT, Overflow, Result, Product);
10917     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10918       SDValue Result =
10919           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10920       SDValue Product = Result.getValue(0);
10921       SDValue Overflow = Result.getValue(1);
10922 
10923       APInt MaxVal = APInt::getMaxValue(VTSize);
10924       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10925       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10926     }
10927   }
10928 
10929   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10930          "Expected scale to be less than the number of bits if signed or at "
10931          "most the number of bits if unsigned.");
10932   assert(LHS.getValueType() == RHS.getValueType() &&
10933          "Expected both operands to be the same type");
10934 
10935   // Get the upper and lower bits of the result.
10936   SDValue Lo, Hi;
10937   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10938   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10939   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
10940   if (VT.isVector())
10941     WideVT =
10942         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
10943   if (isOperationLegalOrCustom(LoHiOp, VT)) {
10944     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10945     Lo = Result.getValue(0);
10946     Hi = Result.getValue(1);
10947   } else if (isOperationLegalOrCustom(HiOp, VT)) {
10948     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10949     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10950   } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
10951     // Try for a multiplication using a wider type.
10952     unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10953     SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
10954     SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
10955     SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
10956     Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
10957     SDValue Shifted =
10958         DAG.getNode(ISD::SRA, dl, WideVT, Res,
10959                     DAG.getShiftAmountConstant(VTSize, WideVT, dl));
10960     Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
10961   } else if (VT.isVector()) {
10962     return SDValue();
10963   } else {
10964     forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10965   }
10966 
10967   if (Scale == VTSize)
10968     // Result is just the top half since we'd be shifting by the width of the
10969     // operand. Overflow impossible so this works for both UMULFIX and
10970     // UMULFIXSAT.
10971     return Hi;
10972 
10973   // The result will need to be shifted right by the scale since both operands
10974   // are scaled. The result is given to us in 2 halves, so we only want part of
10975   // both in the result.
10976   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10977                                DAG.getShiftAmountConstant(Scale, VT, dl));
10978   if (!Saturating)
10979     return Result;
10980 
10981   if (!Signed) {
10982     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10983     // widened multiplication) aren't all zeroes.
10984 
10985     // Saturate to max if ((Hi >> Scale) != 0),
10986     // which is the same as if (Hi > ((1 << Scale) - 1))
10987     APInt MaxVal = APInt::getMaxValue(VTSize);
10988     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
10989                                       dl, VT);
10990     Result = DAG.getSelectCC(dl, Hi, LowMask,
10991                              DAG.getConstant(MaxVal, dl, VT), Result,
10992                              ISD::SETUGT);
10993 
10994     return Result;
10995   }
10996 
10997   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10998   // widened multiplication) aren't all ones or all zeroes.
10999 
11000   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11001   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11002 
11003   if (Scale == 0) {
11004     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11005                                DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11006     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11007     // Saturated to SatMin if wide product is negative, and SatMax if wide
11008     // product is positive ...
11009     SDValue Zero = DAG.getConstant(0, dl, VT);
11010     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11011                                                ISD::SETLT);
11012     // ... but only if we overflowed.
11013     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11014   }
11015 
11016   //  We handled Scale==0 above so all the bits to examine is in Hi.
11017 
11018   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11019   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11020   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11021                                     dl, VT);
11022   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11023   // Saturate to min if (Hi >> (Scale - 1)) < -1),
11024   // which is the same as if (HI < (-1 << (Scale - 1))
11025   SDValue HighMask =
11026       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11027                       dl, VT);
11028   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11029   return Result;
11030 }
11031 
11032 SDValue
11033 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11034                                     SDValue LHS, SDValue RHS,
11035                                     unsigned Scale, SelectionDAG &DAG) const {
11036   assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11037           Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11038          "Expected a fixed point division opcode");
11039 
11040   EVT VT = LHS.getValueType();
11041   bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11042   bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11043   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11044 
11045   // If there is enough room in the type to upscale the LHS or downscale the
11046   // RHS before the division, we can perform it in this type without having to
11047   // resize. For signed operations, the LHS headroom is the number of
11048   // redundant sign bits, and for unsigned ones it is the number of zeroes.
11049   // The headroom for the RHS is the number of trailing zeroes.
11050   unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11051                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
11052   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11053 
11054   // For signed saturating operations, we need to be able to detect true integer
11055   // division overflow; that is, when you have MIN / -EPS. However, this
11056   // is undefined behavior and if we emit divisions that could take such
11057   // values it may cause undesired behavior (arithmetic exceptions on x86, for
11058   // example).
11059   // Avoid this by requiring an extra bit so that we never get this case.
11060   // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11061   // signed saturating division, we need to emit a whopping 32-bit division.
11062   if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11063     return SDValue();
11064 
11065   unsigned LHSShift = std::min(LHSLead, Scale);
11066   unsigned RHSShift = Scale - LHSShift;
11067 
11068   // At this point, we know that if we shift the LHS up by LHSShift and the
11069   // RHS down by RHSShift, we can emit a regular division with a final scaling
11070   // factor of Scale.
11071 
11072   if (LHSShift)
11073     LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11074                       DAG.getShiftAmountConstant(LHSShift, VT, dl));
11075   if (RHSShift)
11076     RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11077                       DAG.getShiftAmountConstant(RHSShift, VT, dl));
11078 
11079   SDValue Quot;
11080   if (Signed) {
11081     // For signed operations, if the resulting quotient is negative and the
11082     // remainder is nonzero, subtract 1 from the quotient to round towards
11083     // negative infinity.
11084     SDValue Rem;
11085     // FIXME: Ideally we would always produce an SDIVREM here, but if the
11086     // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11087     // we couldn't just form a libcall, but the type legalizer doesn't do it.
11088     if (isTypeLegal(VT) &&
11089         isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
11090       Quot = DAG.getNode(ISD::SDIVREM, dl,
11091                          DAG.getVTList(VT, VT),
11092                          LHS, RHS);
11093       Rem = Quot.getValue(1);
11094       Quot = Quot.getValue(0);
11095     } else {
11096       Quot = DAG.getNode(ISD::SDIV, dl, VT,
11097                          LHS, RHS);
11098       Rem = DAG.getNode(ISD::SREM, dl, VT,
11099                         LHS, RHS);
11100     }
11101     SDValue Zero = DAG.getConstant(0, dl, VT);
11102     SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11103     SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11104     SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11105     SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11106     SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11107                                DAG.getConstant(1, dl, VT));
11108     Quot = DAG.getSelect(dl, VT,
11109                          DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11110                          Sub1, Quot);
11111   } else
11112     Quot = DAG.getNode(ISD::UDIV, dl, VT,
11113                        LHS, RHS);
11114 
11115   return Quot;
11116 }
11117 
11118 void TargetLowering::expandUADDSUBO(
11119     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11120   SDLoc dl(Node);
11121   SDValue LHS = Node->getOperand(0);
11122   SDValue RHS = Node->getOperand(1);
11123   bool IsAdd = Node->getOpcode() == ISD::UADDO;
11124 
11125   // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11126   unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11127   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11128     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11129     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11130                                     { LHS, RHS, CarryIn });
11131     Result = SDValue(NodeCarry.getNode(), 0);
11132     Overflow = SDValue(NodeCarry.getNode(), 1);
11133     return;
11134   }
11135 
11136   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11137                             LHS.getValueType(), LHS, RHS);
11138 
11139   EVT ResultType = Node->getValueType(1);
11140   EVT SetCCType = getSetCCResultType(
11141       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11142   SDValue SetCC;
11143   if (IsAdd && isOneConstant(RHS)) {
11144     // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11145     // the live range of X. We assume comparing with 0 is cheap.
11146     // The general case (X + C) < C is not necessarily beneficial. Although we
11147     // reduce the live range of X, we may introduce the materialization of
11148     // constant C.
11149     SetCC =
11150         DAG.getSetCC(dl, SetCCType, Result,
11151                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11152   } else if (IsAdd && isAllOnesConstant(RHS)) {
11153     // Special case: uaddo X, -1 overflows if X != 0.
11154     SetCC =
11155         DAG.getSetCC(dl, SetCCType, LHS,
11156                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11157   } else {
11158     ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11159     SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11160   }
11161   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11162 }
11163 
11164 void TargetLowering::expandSADDSUBO(
11165     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11166   SDLoc dl(Node);
11167   SDValue LHS = Node->getOperand(0);
11168   SDValue RHS = Node->getOperand(1);
11169   bool IsAdd = Node->getOpcode() == ISD::SADDO;
11170 
11171   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11172                             LHS.getValueType(), LHS, RHS);
11173 
11174   EVT ResultType = Node->getValueType(1);
11175   EVT OType = getSetCCResultType(
11176       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11177 
11178   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11179   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11180   if (isOperationLegal(OpcSat, LHS.getValueType())) {
11181     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11182     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11183     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11184     return;
11185   }
11186 
11187   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11188 
11189   // For an addition, the result should be less than one of the operands (LHS)
11190   // if and only if the other operand (RHS) is negative, otherwise there will
11191   // be overflow.
11192   // For a subtraction, the result should be less than one of the operands
11193   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11194   // otherwise there will be overflow.
11195   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11196   SDValue ConditionRHS =
11197       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11198 
11199   Overflow = DAG.getBoolExtOrTrunc(
11200       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11201       ResultType, ResultType);
11202 }
11203 
11204 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11205                                 SDValue &Overflow, SelectionDAG &DAG) const {
11206   SDLoc dl(Node);
11207   EVT VT = Node->getValueType(0);
11208   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11209   SDValue LHS = Node->getOperand(0);
11210   SDValue RHS = Node->getOperand(1);
11211   bool isSigned = Node->getOpcode() == ISD::SMULO;
11212 
11213   // For power-of-two multiplications we can use a simpler shift expansion.
11214   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11215     const APInt &C = RHSC->getAPIntValue();
11216     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11217     if (C.isPowerOf2()) {
11218       // smulo(x, signed_min) is same as umulo(x, signed_min).
11219       bool UseArithShift = isSigned && !C.isMinSignedValue();
11220       SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11221       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11222       Overflow = DAG.getSetCC(dl, SetCCVT,
11223           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11224                       dl, VT, Result, ShiftAmt),
11225           LHS, ISD::SETNE);
11226       return true;
11227     }
11228   }
11229 
11230   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11231   if (VT.isVector())
11232     WideVT =
11233         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
11234 
11235   SDValue BottomHalf;
11236   SDValue TopHalf;
11237   static const unsigned Ops[2][3] =
11238       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
11239         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
11240   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11241     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11242     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11243   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11244     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11245                              RHS);
11246     TopHalf = BottomHalf.getValue(1);
11247   } else if (isTypeLegal(WideVT)) {
11248     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11249     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11250     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11251     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11252     SDValue ShiftAmt =
11253         DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11254     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11255                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11256   } else {
11257     if (VT.isVector())
11258       return false;
11259 
11260     forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11261   }
11262 
11263   Result = BottomHalf;
11264   if (isSigned) {
11265     SDValue ShiftAmt = DAG.getShiftAmountConstant(
11266         VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11267     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11268     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11269   } else {
11270     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11271                             DAG.getConstant(0, dl, VT), ISD::SETNE);
11272   }
11273 
11274   // Truncate the result if SetCC returns a larger type than needed.
11275   EVT RType = Node->getValueType(1);
11276   if (RType.bitsLT(Overflow.getValueType()))
11277     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11278 
11279   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11280          "Unexpected result type for S/UMULO legalization");
11281   return true;
11282 }
11283 
11284 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
11285   SDLoc dl(Node);
11286   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11287   SDValue Op = Node->getOperand(0);
11288   EVT VT = Op.getValueType();
11289 
11290   if (VT.isScalableVector())
11291     report_fatal_error(
11292         "Expanding reductions for scalable vectors is undefined.");
11293 
11294   // Try to use a shuffle reduction for power of two vectors.
11295   if (VT.isPow2VectorType()) {
11296     while (VT.getVectorNumElements() > 1) {
11297       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11298       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11299         break;
11300 
11301       SDValue Lo, Hi;
11302       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11303       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11304       VT = HalfVT;
11305     }
11306   }
11307 
11308   EVT EltVT = VT.getVectorElementType();
11309   unsigned NumElts = VT.getVectorNumElements();
11310 
11311   SmallVector<SDValue, 8> Ops;
11312   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11313 
11314   SDValue Res = Ops[0];
11315   for (unsigned i = 1; i < NumElts; i++)
11316     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11317 
11318   // Result type may be wider than element type.
11319   if (EltVT != Node->getValueType(0))
11320     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11321   return Res;
11322 }
11323 
11324 SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
11325   SDLoc dl(Node);
11326   SDValue AccOp = Node->getOperand(0);
11327   SDValue VecOp = Node->getOperand(1);
11328   SDNodeFlags Flags = Node->getFlags();
11329 
11330   EVT VT = VecOp.getValueType();
11331   EVT EltVT = VT.getVectorElementType();
11332 
11333   if (VT.isScalableVector())
11334     report_fatal_error(
11335         "Expanding reductions for scalable vectors is undefined.");
11336 
11337   unsigned NumElts = VT.getVectorNumElements();
11338 
11339   SmallVector<SDValue, 8> Ops;
11340   DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11341 
11342   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11343 
11344   SDValue Res = AccOp;
11345   for (unsigned i = 0; i < NumElts; i++)
11346     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11347 
11348   return Res;
11349 }
11350 
11351 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11352                                SelectionDAG &DAG) const {
11353   EVT VT = Node->getValueType(0);
11354   SDLoc dl(Node);
11355   bool isSigned = Node->getOpcode() == ISD::SREM;
11356   unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11357   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11358   SDValue Dividend = Node->getOperand(0);
11359   SDValue Divisor = Node->getOperand(1);
11360   if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11361     SDVTList VTs = DAG.getVTList(VT, VT);
11362     Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11363     return true;
11364   }
11365   if (isOperationLegalOrCustom(DivOpc, VT)) {
11366     // X % Y -> X-X/Y*Y
11367     SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11368     SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11369     Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11370     return true;
11371   }
11372   return false;
11373 }
11374 
11375 SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11376                                             SelectionDAG &DAG) const {
11377   bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11378   SDLoc dl(SDValue(Node, 0));
11379   SDValue Src = Node->getOperand(0);
11380 
11381   // DstVT is the result type, while SatVT is the size to which we saturate
11382   EVT SrcVT = Src.getValueType();
11383   EVT DstVT = Node->getValueType(0);
11384 
11385   EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11386   unsigned SatWidth = SatVT.getScalarSizeInBits();
11387   unsigned DstWidth = DstVT.getScalarSizeInBits();
11388   assert(SatWidth <= DstWidth &&
11389          "Expected saturation width smaller than result width");
11390 
11391   // Determine minimum and maximum integer values and their corresponding
11392   // floating-point values.
11393   APInt MinInt, MaxInt;
11394   if (IsSigned) {
11395     MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11396     MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11397   } else {
11398     MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11399     MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11400   }
11401 
11402   // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11403   // libcall emission cannot handle this. Large result types will fail.
11404   if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11405     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11406     SrcVT = Src.getValueType();
11407   }
11408 
11409   const fltSemantics &Sem = SrcVT.getFltSemantics();
11410   APFloat MinFloat(Sem);
11411   APFloat MaxFloat(Sem);
11412 
11413   APFloat::opStatus MinStatus =
11414       MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11415   APFloat::opStatus MaxStatus =
11416       MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11417   bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11418                              !(MaxStatus & APFloat::opStatus::opInexact);
11419 
11420   SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11421   SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11422 
11423   // If the integer bounds are exactly representable as floats and min/max are
11424   // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11425   // of comparisons and selects.
11426   bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11427                      isOperationLegal(ISD::FMAXNUM, SrcVT);
11428   if (AreExactFloatBounds && MinMaxLegal) {
11429     SDValue Clamped = Src;
11430 
11431     // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11432     Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11433     // Clamp by MaxFloat from above. NaN cannot occur.
11434     Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11435     // Convert clamped value to integer.
11436     SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11437                                   dl, DstVT, Clamped);
11438 
11439     // In the unsigned case we're done, because we mapped NaN to MinFloat,
11440     // which will cast to zero.
11441     if (!IsSigned)
11442       return FpToInt;
11443 
11444     // Otherwise, select 0 if Src is NaN.
11445     SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11446     EVT SetCCVT =
11447         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11448     SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11449     return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11450   }
11451 
11452   SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11453   SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11454 
11455   // Result of direct conversion. The assumption here is that the operation is
11456   // non-trapping and it's fine to apply it to an out-of-range value if we
11457   // select it away later.
11458   SDValue FpToInt =
11459       DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11460 
11461   SDValue Select = FpToInt;
11462 
11463   EVT SetCCVT =
11464       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11465 
11466   // If Src ULT MinFloat, select MinInt. In particular, this also selects
11467   // MinInt if Src is NaN.
11468   SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11469   Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11470   // If Src OGT MaxFloat, select MaxInt.
11471   SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11472   Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11473 
11474   // In the unsigned case we are done, because we mapped NaN to MinInt, which
11475   // is already zero.
11476   if (!IsSigned)
11477     return Select;
11478 
11479   // Otherwise, select 0 if Src is NaN.
11480   SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11481   SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11482   return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11483 }
11484 
11485 SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
11486                                                 const SDLoc &dl,
11487                                                 SelectionDAG &DAG) const {
11488   EVT OperandVT = Op.getValueType();
11489   if (OperandVT.getScalarType() == ResultVT.getScalarType())
11490     return Op;
11491   EVT ResultIntVT = ResultVT.changeTypeToInteger();
11492   // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11493   // can induce double-rounding which may alter the results. We can
11494   // correct for this using a trick explained in: Boldo, Sylvie, and
11495   // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11496   // World Congress. 2005.
11497   unsigned BitSize = OperandVT.getScalarSizeInBits();
11498   EVT WideIntVT = OperandVT.changeTypeToInteger();
11499   SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11500   SDValue SignBit =
11501       DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11502                   DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11503   SDValue AbsWide;
11504   if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11505     AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11506   } else {
11507     SDValue ClearedSign = DAG.getNode(
11508         ISD::AND, dl, WideIntVT, OpAsInt,
11509         DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11510     AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11511   }
11512   SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11513   SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11514 
11515   // We can keep the narrow value as-is if narrowing was exact (no
11516   // rounding error), the wide value was NaN (the narrow value is also
11517   // NaN and should be preserved) or if we rounded to the odd value.
11518   SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11519   SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11520   SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11521   SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11522   EVT ResultIntVTCCVT = getSetCCResultType(
11523       DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11524   SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11525   // The result is already odd so we don't need to do anything.
11526   SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11527 
11528   EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11529                                        AbsWide.getValueType());
11530   // We keep results which are exact, odd or NaN.
11531   SDValue KeepNarrow =
11532       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11533   KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11534   // We morally performed a round-down if AbsNarrow is smaller than
11535   // AbsWide.
11536   SDValue NarrowIsRd =
11537       DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11538   // If the narrow value is odd or exact, pick it.
11539   // Otherwise, narrow is even and corresponds to either the rounded-up
11540   // or rounded-down value. If narrow is the rounded-down value, we want
11541   // the rounded-up value as it will be odd.
11542   SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11543   SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11544   Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11545   int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11546   SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11547   SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11548   SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11549   Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11550   return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11551 }
11552 
11553 SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
11554   assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11555   SDValue Op = Node->getOperand(0);
11556   EVT VT = Node->getValueType(0);
11557   SDLoc dl(Node);
11558   if (VT.getScalarType() == MVT::bf16) {
11559     if (Node->getConstantOperandVal(1) == 1) {
11560       return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11561     }
11562     EVT OperandVT = Op.getValueType();
11563     SDValue IsNaN = DAG.getSetCC(
11564         dl,
11565         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11566         Op, Op, ISD::SETUO);
11567 
11568     // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11569     // can induce double-rounding which may alter the results. We can
11570     // correct for this using a trick explained in: Boldo, Sylvie, and
11571     // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11572     // World Congress. 2005.
11573     EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11574     EVT I32 = F32.changeTypeToInteger();
11575     Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11576     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11577 
11578     // Conversions should set NaN's quiet bit. This also prevents NaNs from
11579     // turning into infinities.
11580     SDValue NaN =
11581         DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11582 
11583     // Factor in the contribution of the low 16 bits.
11584     SDValue One = DAG.getConstant(1, dl, I32);
11585     SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11586                               DAG.getShiftAmountConstant(16, I32, dl));
11587     Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11588     SDValue RoundingBias =
11589         DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11590     SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11591 
11592     // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11593     // 0x80000000.
11594     Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11595 
11596     // Now that we have rounded, shift the bits into position.
11597     Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11598                      DAG.getShiftAmountConstant(16, I32, dl));
11599     Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11600     EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11601     Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11602     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11603   }
11604   return SDValue();
11605 }
11606 
11607 SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11608                                            SelectionDAG &DAG) const {
11609   assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11610   assert(Node->getValueType(0).isScalableVector() &&
11611          "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11612 
11613   EVT VT = Node->getValueType(0);
11614   SDValue V1 = Node->getOperand(0);
11615   SDValue V2 = Node->getOperand(1);
11616   int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11617   SDLoc DL(Node);
11618 
11619   // Expand through memory thusly:
11620   //  Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11621   //  Store V1, Ptr
11622   //  Store V2, Ptr + sizeof(V1)
11623   //  If (Imm < 0)
11624   //    TrailingElts = -Imm
11625   //    Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11626   //  else
11627   //    Ptr = Ptr + (Imm * sizeof(VT.Elt))
11628   //  Res = Load Ptr
11629 
11630   Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11631 
11632   EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
11633                                VT.getVectorElementCount() * 2);
11634   SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11635   EVT PtrVT = StackPtr.getValueType();
11636   auto &MF = DAG.getMachineFunction();
11637   auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11638   auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11639 
11640   // Store the lo part of CONCAT_VECTORS(V1, V2)
11641   SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11642   // Store the hi part of CONCAT_VECTORS(V1, V2)
11643   SDValue OffsetToV2 = DAG.getVScale(
11644       DL, PtrVT,
11645       APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11646   SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11647   SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11648 
11649   if (Imm >= 0) {
11650     // Load back the required element. getVectorElementPointer takes care of
11651     // clamping the index if it's out-of-bounds.
11652     StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11653     // Load the spliced result
11654     return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11655                        MachinePointerInfo::getUnknownStack(MF));
11656   }
11657 
11658   uint64_t TrailingElts = -Imm;
11659 
11660   // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11661   TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11662   SDValue TrailingBytes =
11663       DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11664 
11665   if (TrailingElts > VT.getVectorMinNumElements()) {
11666     SDValue VLBytes =
11667         DAG.getVScale(DL, PtrVT,
11668                       APInt(PtrVT.getFixedSizeInBits(),
11669                             VT.getStoreSize().getKnownMinValue()));
11670     TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11671   }
11672 
11673   // Calculate the start address of the spliced result.
11674   StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11675 
11676   // Load the spliced result
11677   return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11678                      MachinePointerInfo::getUnknownStack(MF));
11679 }
11680 
11681 SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
11682                                               SelectionDAG &DAG) const {
11683   SDLoc DL(Node);
11684   SDValue Vec = Node->getOperand(0);
11685   SDValue Mask = Node->getOperand(1);
11686   SDValue Passthru = Node->getOperand(2);
11687 
11688   EVT VecVT = Vec.getValueType();
11689   EVT ScalarVT = VecVT.getScalarType();
11690   EVT MaskVT = Mask.getValueType();
11691   EVT MaskScalarVT = MaskVT.getScalarType();
11692 
11693   // Needs to be handled by targets that have scalable vector types.
11694   if (VecVT.isScalableVector())
11695     report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11696 
11697   SDValue StackPtr = DAG.CreateStackTemporary(
11698       VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11699   int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11700   MachinePointerInfo PtrInfo =
11701       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
11702 
11703   MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11704   SDValue Chain = DAG.getEntryNode();
11705   SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11706 
11707   bool HasPassthru = !Passthru.isUndef();
11708 
11709   // If we have a passthru vector, store it on the stack, overwrite the matching
11710   // positions and then re-write the last element that was potentially
11711   // overwritten even though mask[i] = false.
11712   if (HasPassthru)
11713     Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11714 
11715   SDValue LastWriteVal;
11716   APInt PassthruSplatVal;
11717   bool IsSplatPassthru =
11718       ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11719 
11720   if (IsSplatPassthru) {
11721     // As we do not know which position we wrote to last, we cannot simply
11722     // access that index from the passthru vector. So we first check if passthru
11723     // is a splat vector, to use any element ...
11724     LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11725   } else if (HasPassthru) {
11726     // ... if it is not a splat vector, we need to get the passthru value at
11727     // position = popcount(mask) and re-load it from the stack before it is
11728     // overwritten in the loop below.
11729     EVT PopcountVT = ScalarVT.changeTypeToInteger();
11730     SDValue Popcount = DAG.getNode(
11731         ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11732     Popcount =
11733         DAG.getNode(ISD::ZERO_EXTEND, DL,
11734                     MaskVT.changeVectorElementType(PopcountVT), Popcount);
11735     Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
11736     SDValue LastElmtPtr =
11737         getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11738     LastWriteVal = DAG.getLoad(
11739         ScalarVT, DL, Chain, LastElmtPtr,
11740         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11741     Chain = LastWriteVal.getValue(1);
11742   }
11743 
11744   unsigned NumElms = VecVT.getVectorNumElements();
11745   for (unsigned I = 0; I < NumElms; I++) {
11746     SDValue Idx = DAG.getVectorIdxConstant(I, DL);
11747 
11748     SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11749     SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11750     Chain = DAG.getStore(
11751         Chain, DL, ValI, OutPtr,
11752         MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11753 
11754     // Get the mask value and add it to the current output position. This
11755     // either increments by 1 if MaskI is true or adds 0 otherwise.
11756     // Freeze in case we have poison/undef mask entries.
11757     SDValue MaskI = DAG.getFreeze(
11758         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11759     MaskI = DAG.getFreeze(MaskI);
11760     MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11761     MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11762     OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11763 
11764     if (HasPassthru && I == NumElms - 1) {
11765       SDValue EndOfVector =
11766           DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11767       SDValue AllLanesSelected =
11768           DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11769       OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11770       OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11771 
11772       // Re-write the last ValI if all lanes were selected. Otherwise,
11773       // overwrite the last write it with the passthru value.
11774       LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
11775                                    LastWriteVal, SDNodeFlags::Unpredictable);
11776       Chain = DAG.getStore(
11777           Chain, DL, LastWriteVal, OutPtr,
11778           MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11779     }
11780   }
11781 
11782   return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11783 }
11784 
11785 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
11786                                            SDValue &LHS, SDValue &RHS,
11787                                            SDValue &CC, SDValue Mask,
11788                                            SDValue EVL, bool &NeedInvert,
11789                                            const SDLoc &dl, SDValue &Chain,
11790                                            bool IsSignaling) const {
11791   MVT OpVT = LHS.getSimpleValueType();
11792   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11793   NeedInvert = false;
11794   assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11795   bool IsNonVP = !EVL;
11796   switch (getCondCodeAction(CCCode, OpVT)) {
11797   default:
11798     llvm_unreachable("Unknown condition code action!");
11799   case TargetLowering::Legal:
11800     // Nothing to do.
11801     break;
11802   case TargetLowering::Expand: {
11803     ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
11804     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11805       std::swap(LHS, RHS);
11806       CC = DAG.getCondCode(InvCC);
11807       return true;
11808     }
11809     // Swapping operands didn't work. Try inverting the condition.
11810     bool NeedSwap = false;
11811     InvCC = getSetCCInverse(CCCode, OpVT);
11812     if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
11813       // If inverting the condition is not enough, try swapping operands
11814       // on top of it.
11815       InvCC = ISD::getSetCCSwappedOperands(InvCC);
11816       NeedSwap = true;
11817     }
11818     if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11819       CC = DAG.getCondCode(InvCC);
11820       NeedInvert = true;
11821       if (NeedSwap)
11822         std::swap(LHS, RHS);
11823       return true;
11824     }
11825 
11826     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
11827     unsigned Opc = 0;
11828     switch (CCCode) {
11829     default:
11830       llvm_unreachable("Don't know how to expand this condition!");
11831     case ISD::SETUO:
11832       if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
11833         CC1 = ISD::SETUNE;
11834         CC2 = ISD::SETUNE;
11835         Opc = ISD::OR;
11836         break;
11837       }
11838       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11839              "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11840       NeedInvert = true;
11841       [[fallthrough]];
11842     case ISD::SETO:
11843       assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11844              "If SETO is expanded, SETOEQ must be legal!");
11845       CC1 = ISD::SETOEQ;
11846       CC2 = ISD::SETOEQ;
11847       Opc = ISD::AND;
11848       break;
11849     case ISD::SETONE:
11850     case ISD::SETUEQ:
11851       // If the SETUO or SETO CC isn't legal, we might be able to use
11852       // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11853       // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11854       // the operands.
11855       CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11856       if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
11857                                           isCondCodeLegal(ISD::SETOLT, OpVT))) {
11858         CC1 = ISD::SETOGT;
11859         CC2 = ISD::SETOLT;
11860         Opc = ISD::OR;
11861         NeedInvert = ((unsigned)CCCode & 0x8U);
11862         break;
11863       }
11864       [[fallthrough]];
11865     case ISD::SETOEQ:
11866     case ISD::SETOGT:
11867     case ISD::SETOGE:
11868     case ISD::SETOLT:
11869     case ISD::SETOLE:
11870     case ISD::SETUNE:
11871     case ISD::SETUGT:
11872     case ISD::SETUGE:
11873     case ISD::SETULT:
11874     case ISD::SETULE:
11875       // If we are floating point, assign and break, otherwise fall through.
11876       if (!OpVT.isInteger()) {
11877         // We can use the 4th bit to tell if we are the unordered
11878         // or ordered version of the opcode.
11879         CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11880         Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11881         CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11882         break;
11883       }
11884       // Fallthrough if we are unsigned integer.
11885       [[fallthrough]];
11886     case ISD::SETLE:
11887     case ISD::SETGT:
11888     case ISD::SETGE:
11889     case ISD::SETLT:
11890     case ISD::SETNE:
11891     case ISD::SETEQ:
11892       // If all combinations of inverting the condition and swapping operands
11893       // didn't work then we have no means to expand the condition.
11894       llvm_unreachable("Don't know how to expand this condition!");
11895     }
11896 
11897     SDValue SetCC1, SetCC2;
11898     if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11899       // If we aren't the ordered or unorder operation,
11900       // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11901       if (IsNonVP) {
11902         SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11903         SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11904       } else {
11905         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11906         SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11907       }
11908     } else {
11909       // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11910       if (IsNonVP) {
11911         SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11912         SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11913       } else {
11914         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11915         SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11916       }
11917     }
11918     if (Chain)
11919       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11920                           SetCC2.getValue(1));
11921     if (IsNonVP)
11922       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11923     else {
11924       // Transform the binary opcode to the VP equivalent.
11925       assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11926       Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11927       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11928     }
11929     RHS = SDValue();
11930     CC = SDValue();
11931     return true;
11932   }
11933   }
11934   return false;
11935 }
11936 
11937 SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
11938                                                       SelectionDAG &DAG) const {
11939   EVT VT = Node->getValueType(0);
11940   // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
11941   // split into two equal parts.
11942   if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
11943     return SDValue();
11944 
11945   // Restrict expansion to cases where both parts can be concatenated.
11946   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
11947   if (LoVT != HiVT || !isTypeLegal(LoVT))
11948     return SDValue();
11949 
11950   SDLoc DL(Node);
11951   unsigned Opcode = Node->getOpcode();
11952 
11953   // Don't expand if the result is likely to be unrolled anyway.
11954   if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
11955     return SDValue();
11956 
11957   SmallVector<SDValue, 4> LoOps, HiOps;
11958   for (const SDValue &V : Node->op_values()) {
11959     auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
11960     LoOps.push_back(Lo);
11961     HiOps.push_back(Hi);
11962   }
11963 
11964   SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
11965   SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
11966   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
11967 }
11968