xref: /llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp (revision 52646d087cdecd217436b2714f94b84c46b5720a)
1 //===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// Provides analysis for querying information about KnownBits during GISel
10 /// passes.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/Analysis/ValueTracking.h"
16 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
17 #include "llvm/CodeGen/GlobalISel/Utils.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/CodeGen/TargetOpcodes.h"
22 #include "llvm/IR/ConstantRange.h"
23 #include "llvm/Target/TargetMachine.h"
24 
25 #define DEBUG_TYPE "gisel-known-bits"
26 
27 using namespace llvm;
28 
29 char llvm::GISelKnownBitsAnalysis::ID = 0;
30 
31 INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE,
32                 "Analysis for ComputingKnownBits", false, true)
33 
34 GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth)
35     : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
36       DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
37 
38 Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
39   const MachineInstr *MI = MRI.getVRegDef(R);
40   switch (MI->getOpcode()) {
41   case TargetOpcode::COPY:
42     return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
43   case TargetOpcode::G_ASSERT_ALIGN: {
44     // TODO: Min with source
45     return Align(MI->getOperand(2).getImm());
46   }
47   case TargetOpcode::G_FRAME_INDEX: {
48     int FrameIdx = MI->getOperand(1).getIndex();
49     return MF.getFrameInfo().getObjectAlign(FrameIdx);
50   }
51   case TargetOpcode::G_INTRINSIC:
52   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
53   case TargetOpcode::G_INTRINSIC_CONVERGENT:
54   case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
55   default:
56     return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
57   }
58 }
59 
60 KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
61   assert(MI.getNumExplicitDefs() == 1 &&
62          "expected single return generic instruction");
63   return getKnownBits(MI.getOperand(0).getReg());
64 }
65 
66 KnownBits GISelKnownBits::getKnownBits(Register R) {
67   const LLT Ty = MRI.getType(R);
68   // Since the number of lanes in a scalable vector is unknown at compile time,
69   // we track one bit which is implicitly broadcast to all lanes.  This means
70   // that all lanes in a scalable vector are considered demanded.
71   APInt DemandedElts =
72       Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
73   return getKnownBits(R, DemandedElts);
74 }
75 
76 KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts,
77                                        unsigned Depth) {
78   // For now, we only maintain the cache during one request.
79   assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
80 
81   KnownBits Known;
82   computeKnownBitsImpl(R, Known, DemandedElts, Depth);
83   ComputeKnownBitsCache.clear();
84   return Known;
85 }
86 
87 bool GISelKnownBits::signBitIsZero(Register R) {
88   LLT Ty = MRI.getType(R);
89   unsigned BitWidth = Ty.getScalarSizeInBits();
90   return maskedValueIsZero(R, APInt::getSignMask(BitWidth));
91 }
92 
93 APInt GISelKnownBits::getKnownZeroes(Register R) {
94   return getKnownBits(R).Zero;
95 }
96 
97 APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; }
98 
99 LLVM_ATTRIBUTE_UNUSED static void
100 dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
101   dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
102          << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
103          << toString(Known.Zero | Known.One, 16, false) << "\n"
104          << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false)
105          << "\n"
106          << "[" << Depth << "] One:  0x" << toString(Known.One, 16, false)
107          << "\n";
108 }
109 
110 /// Compute known bits for the intersection of \p Src0 and \p Src1
111 void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
112                                          KnownBits &Known,
113                                          const APInt &DemandedElts,
114                                          unsigned Depth) {
115   // Test src1 first, since we canonicalize simpler expressions to the RHS.
116   computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
117 
118   // If we don't know any bits, early out.
119   if (Known.isUnknown())
120     return;
121 
122   KnownBits Known2;
123   computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
124 
125   // Only known if known in both the LHS and RHS.
126   Known = Known.intersectWith(Known2);
127 }
128 
129 // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
130 // created using Width. Use this function when the inputs are KnownBits
131 // objects. TODO: Move this KnownBits.h if this is usable in more cases.
132 static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
133                              const KnownBits &OffsetKnown,
134                              const KnownBits &WidthKnown) {
135   KnownBits Mask(BitWidth);
136   Mask.Zero = APInt::getBitsSetFrom(
137       BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth));
138   Mask.One = APInt::getLowBitsSet(
139       BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth));
140   return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask;
141 }
142 
143 void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
144                                           const APInt &DemandedElts,
145                                           unsigned Depth) {
146   MachineInstr &MI = *MRI.getVRegDef(R);
147   unsigned Opcode = MI.getOpcode();
148   LLT DstTy = MRI.getType(R);
149 
150   // Handle the case where this is called on a register that does not have a
151   // type constraint. For example, it may be post-ISel or this target might not
152   // preserve the type when early-selecting instructions.
153   if (!DstTy.isValid()) {
154     Known = KnownBits();
155     return;
156   }
157 
158 #ifndef NDEBUG
159   if (DstTy.isFixedVector()) {
160     assert(
161         DstTy.getNumElements() == DemandedElts.getBitWidth() &&
162         "DemandedElt width should equal the fixed vector number of elements");
163   } else {
164     assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) &&
165            "DemandedElt width should be 1 for scalars or scalable vectors");
166   }
167 #endif
168 
169   unsigned BitWidth = DstTy.getScalarSizeInBits();
170   auto CacheEntry = ComputeKnownBitsCache.find(R);
171   if (CacheEntry != ComputeKnownBitsCache.end()) {
172     Known = CacheEntry->second;
173     LLVM_DEBUG(dbgs() << "Cache hit at ");
174     LLVM_DEBUG(dumpResult(MI, Known, Depth));
175     assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match");
176     return;
177   }
178   Known = KnownBits(BitWidth); // Don't know anything
179 
180   // Depth may get bigger than max depth if it gets passed to a different
181   // GISelKnownBits object.
182   // This may happen when say a generic part uses a GISelKnownBits object
183   // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
184   // which creates a new GISelKnownBits object with a different and smaller
185   // depth. If we just check for equality, we would never exit if the depth
186   // that is passed down to the target specific GISelKnownBits object is
187   // already bigger than its max depth.
188   if (Depth >= getMaxDepth())
189     return;
190 
191   if (!DemandedElts)
192     return; // No demanded elts, better to assume we don't know anything.
193 
194   KnownBits Known2;
195 
196   switch (Opcode) {
197   default:
198     TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
199                                       Depth);
200     break;
201   case TargetOpcode::G_BUILD_VECTOR: {
202     // Collect the known bits that are shared by every demanded vector element.
203     Known.Zero.setAllBits(); Known.One.setAllBits();
204     for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) {
205       if (!DemandedElts[i])
206         continue;
207 
208       computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, APInt(1, 1),
209                            Depth + 1);
210 
211       // Known bits are the values that are shared by every demanded element.
212       Known = Known.intersectWith(Known2);
213 
214       // If we don't know any bits, early out.
215       if (Known.isUnknown())
216         break;
217     }
218     break;
219   }
220   case TargetOpcode::COPY:
221   case TargetOpcode::G_PHI:
222   case TargetOpcode::PHI: {
223     Known.One = APInt::getAllOnes(BitWidth);
224     Known.Zero = APInt::getAllOnes(BitWidth);
225     // Destination registers should not have subregisters at this
226     // point of the pipeline, otherwise the main live-range will be
227     // defined more than once, which is against SSA.
228     assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
229     // Record in the cache that we know nothing for MI.
230     // This will get updated later and in the meantime, if we reach that
231     // phi again, because of a loop, we will cut the search thanks to this
232     // cache entry.
233     // We could actually build up more information on the phi by not cutting
234     // the search, but that additional information is more a side effect
235     // than an intended choice.
236     // Therefore, for now, save on compile time until we derive a proper way
237     // to derive known bits for PHIs within loops.
238     ComputeKnownBitsCache[R] = KnownBits(BitWidth);
239     // PHI's operand are a mix of registers and basic blocks interleaved.
240     // We only care about the register ones.
241     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
242       const MachineOperand &Src = MI.getOperand(Idx);
243       Register SrcReg = Src.getReg();
244       // Look through trivial copies and phis but don't look through trivial
245       // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
246       // analysis is currently unable to determine the bit width of a
247       // register class.
248       //
249       // We can't use NoSubRegister by name as it's defined by each target but
250       // it's always defined to be 0 by tablegen.
251       if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
252           MRI.getType(SrcReg).isValid()) {
253         // For COPYs we don't do anything, don't increase the depth.
254         computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
255                              Depth + (Opcode != TargetOpcode::COPY));
256         Known2 = Known2.anyextOrTrunc(BitWidth);
257         Known = Known.intersectWith(Known2);
258         // If we reach a point where we don't know anything
259         // just stop looking through the operands.
260         if (Known.isUnknown())
261           break;
262       } else {
263         // We know nothing.
264         Known = KnownBits(BitWidth);
265         break;
266       }
267     }
268     break;
269   }
270   case TargetOpcode::G_CONSTANT: {
271     Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue());
272     break;
273   }
274   case TargetOpcode::G_FRAME_INDEX: {
275     int FrameIdx = MI.getOperand(1).getIndex();
276     TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
277     break;
278   }
279   case TargetOpcode::G_SUB: {
280     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
281                          Depth + 1);
282     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
283                          Depth + 1);
284     Known = KnownBits::sub(Known, Known2);
285     break;
286   }
287   case TargetOpcode::G_XOR: {
288     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
289                          Depth + 1);
290     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
291                          Depth + 1);
292 
293     Known ^= Known2;
294     break;
295   }
296   case TargetOpcode::G_PTR_ADD: {
297     if (DstTy.isVector())
298       break;
299     // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
300     LLT Ty = MRI.getType(MI.getOperand(1).getReg());
301     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
302       break;
303     [[fallthrough]];
304   }
305   case TargetOpcode::G_ADD: {
306     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
307                          Depth + 1);
308     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
309                          Depth + 1);
310     Known = KnownBits::add(Known, Known2);
311     break;
312   }
313   case TargetOpcode::G_AND: {
314     // If either the LHS or the RHS are Zero, the result is zero.
315     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
316                          Depth + 1);
317     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
318                          Depth + 1);
319 
320     Known &= Known2;
321     break;
322   }
323   case TargetOpcode::G_OR: {
324     // If either the LHS or the RHS are Zero, the result is zero.
325     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
326                          Depth + 1);
327     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
328                          Depth + 1);
329 
330     Known |= Known2;
331     break;
332   }
333   case TargetOpcode::G_MUL: {
334     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
335                          Depth + 1);
336     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
337                          Depth + 1);
338     Known = KnownBits::mul(Known, Known2);
339     break;
340   }
341   case TargetOpcode::G_SELECT: {
342     computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
343                         Known, DemandedElts, Depth + 1);
344     break;
345   }
346   case TargetOpcode::G_SMIN: {
347     // TODO: Handle clamp pattern with number of sign bits
348     KnownBits KnownRHS;
349     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
350                          Depth + 1);
351     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
352                          Depth + 1);
353     Known = KnownBits::smin(Known, KnownRHS);
354     break;
355   }
356   case TargetOpcode::G_SMAX: {
357     // TODO: Handle clamp pattern with number of sign bits
358     KnownBits KnownRHS;
359     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
360                          Depth + 1);
361     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
362                          Depth + 1);
363     Known = KnownBits::smax(Known, KnownRHS);
364     break;
365   }
366   case TargetOpcode::G_UMIN: {
367     KnownBits KnownRHS;
368     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
369                          DemandedElts, Depth + 1);
370     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
371                          DemandedElts, Depth + 1);
372     Known = KnownBits::umin(Known, KnownRHS);
373     break;
374   }
375   case TargetOpcode::G_UMAX: {
376     KnownBits KnownRHS;
377     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
378                          DemandedElts, Depth + 1);
379     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
380                          DemandedElts, Depth + 1);
381     Known = KnownBits::umax(Known, KnownRHS);
382     break;
383   }
384   case TargetOpcode::G_FCMP:
385   case TargetOpcode::G_ICMP: {
386     if (DstTy.isVector())
387       break;
388     if (TL.getBooleanContents(DstTy.isVector(),
389                               Opcode == TargetOpcode::G_FCMP) ==
390             TargetLowering::ZeroOrOneBooleanContent &&
391         BitWidth > 1)
392       Known.Zero.setBitsFrom(1);
393     break;
394   }
395   case TargetOpcode::G_SEXT: {
396     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
397                          Depth + 1);
398     // If the sign bit is known to be zero or one, then sext will extend
399     // it to the top bits, else it will just zext.
400     Known = Known.sext(BitWidth);
401     break;
402   }
403   case TargetOpcode::G_ASSERT_SEXT:
404   case TargetOpcode::G_SEXT_INREG: {
405     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
406                          Depth + 1);
407     Known = Known.sextInReg(MI.getOperand(2).getImm());
408     break;
409   }
410   case TargetOpcode::G_ANYEXT: {
411     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
412                          Depth + 1);
413     Known = Known.anyext(BitWidth);
414     break;
415   }
416   case TargetOpcode::G_LOAD: {
417     const MachineMemOperand *MMO = *MI.memoperands_begin();
418     KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
419     if (const MDNode *Ranges = MMO->getRanges())
420       computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
421     Known = KnownRange.anyext(Known.getBitWidth());
422     break;
423   }
424   case TargetOpcode::G_SEXTLOAD:
425   case TargetOpcode::G_ZEXTLOAD: {
426     if (DstTy.isVector())
427       break;
428     const MachineMemOperand *MMO = *MI.memoperands_begin();
429     KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
430     if (const MDNode *Ranges = MMO->getRanges())
431       computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
432     Known = Opcode == TargetOpcode::G_SEXTLOAD
433                 ? KnownRange.sext(Known.getBitWidth())
434                 : KnownRange.zext(Known.getBitWidth());
435     break;
436   }
437   case TargetOpcode::G_ASHR: {
438     KnownBits LHSKnown, RHSKnown;
439     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
440                          Depth + 1);
441     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
442                          Depth + 1);
443     Known = KnownBits::ashr(LHSKnown, RHSKnown);
444     break;
445   }
446   case TargetOpcode::G_LSHR: {
447     KnownBits LHSKnown, RHSKnown;
448     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
449                          Depth + 1);
450     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
451                          Depth + 1);
452     Known = KnownBits::lshr(LHSKnown, RHSKnown);
453     break;
454   }
455   case TargetOpcode::G_SHL: {
456     KnownBits LHSKnown, RHSKnown;
457     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
458                          Depth + 1);
459     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
460                          Depth + 1);
461     Known = KnownBits::shl(LHSKnown, RHSKnown);
462     break;
463   }
464   case TargetOpcode::G_INTTOPTR:
465   case TargetOpcode::G_PTRTOINT:
466     if (DstTy.isVector())
467       break;
468     // Fall through and handle them the same as zext/trunc.
469     [[fallthrough]];
470   case TargetOpcode::G_ASSERT_ZEXT:
471   case TargetOpcode::G_ZEXT:
472   case TargetOpcode::G_TRUNC: {
473     Register SrcReg = MI.getOperand(1).getReg();
474     LLT SrcTy = MRI.getType(SrcReg);
475     unsigned SrcBitWidth;
476 
477     // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand.
478     if (Opcode == TargetOpcode::G_ASSERT_ZEXT)
479       SrcBitWidth = MI.getOperand(2).getImm();
480     else {
481       SrcBitWidth = SrcTy.isPointer()
482                         ? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
483                         : SrcTy.getSizeInBits();
484     }
485     assert(SrcBitWidth && "SrcBitWidth can't be zero");
486     Known = Known.zextOrTrunc(SrcBitWidth);
487     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
488     Known = Known.zextOrTrunc(BitWidth);
489     if (BitWidth > SrcBitWidth)
490       Known.Zero.setBitsFrom(SrcBitWidth);
491     break;
492   }
493   case TargetOpcode::G_ASSERT_ALIGN: {
494     int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm());
495 
496     // TODO: Should use maximum with source
497     // If a node is guaranteed to be aligned, set low zero bits accordingly as
498     // well as clearing one bits.
499     Known.Zero.setLowBits(LogOfAlign);
500     Known.One.clearLowBits(LogOfAlign);
501     break;
502   }
503   case TargetOpcode::G_MERGE_VALUES: {
504     unsigned NumOps = MI.getNumOperands();
505     unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
506 
507     for (unsigned I = 0; I != NumOps - 1; ++I) {
508       KnownBits SrcOpKnown;
509       computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
510                            DemandedElts, Depth + 1);
511       Known.insertBits(SrcOpKnown, I * OpSize);
512     }
513     break;
514   }
515   case TargetOpcode::G_UNMERGE_VALUES: {
516     unsigned NumOps = MI.getNumOperands();
517     Register SrcReg = MI.getOperand(NumOps - 1).getReg();
518     LLT SrcTy = MRI.getType(SrcReg);
519 
520     if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
521       return; // TODO: Handle vector->subelement unmerges
522 
523     // Figure out the result operand index
524     unsigned DstIdx = 0;
525     for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
526          ++DstIdx)
527       ;
528 
529     APInt SubDemandedElts = DemandedElts;
530     if (SrcTy.isVector()) {
531       unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
532       SubDemandedElts =
533           DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes);
534     }
535 
536     KnownBits SrcOpKnown;
537     computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1);
538 
539     if (SrcTy.isVector())
540       Known = std::move(SrcOpKnown);
541     else
542       Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
543     break;
544   }
545   case TargetOpcode::G_BSWAP: {
546     Register SrcReg = MI.getOperand(1).getReg();
547     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
548     Known = Known.byteSwap();
549     break;
550   }
551   case TargetOpcode::G_BITREVERSE: {
552     Register SrcReg = MI.getOperand(1).getReg();
553     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
554     Known = Known.reverseBits();
555     break;
556   }
557   case TargetOpcode::G_CTPOP: {
558     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
559                          Depth + 1);
560     // We can bound the space the count needs.  Also, bits known to be zero can't
561     // contribute to the population.
562     unsigned BitsPossiblySet = Known2.countMaxPopulation();
563     unsigned LowBits = llvm::bit_width(BitsPossiblySet);
564     Known.Zero.setBitsFrom(LowBits);
565     // TODO: we could bound Known.One using the lower bound on the number of
566     // bits which might be set provided by popcnt KnownOne2.
567     break;
568   }
569   case TargetOpcode::G_UBFX: {
570     KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
571     computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
572                          Depth + 1);
573     computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
574                          Depth + 1);
575     computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
576                          Depth + 1);
577     Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
578     break;
579   }
580   case TargetOpcode::G_SBFX: {
581     KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
582     computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
583                          Depth + 1);
584     computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
585                          Depth + 1);
586     computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
587                          Depth + 1);
588     Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
589     // Sign extend the extracted value using shift left and arithmetic shift
590     // right.
591     KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth));
592     KnownBits ShiftKnown = KnownBits::sub(ExtKnown, WidthKnown);
593     Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
594     break;
595   }
596   case TargetOpcode::G_UADDO:
597   case TargetOpcode::G_UADDE:
598   case TargetOpcode::G_SADDO:
599   case TargetOpcode::G_SADDE:
600   case TargetOpcode::G_USUBO:
601   case TargetOpcode::G_USUBE:
602   case TargetOpcode::G_SSUBO:
603   case TargetOpcode::G_SSUBE:
604   case TargetOpcode::G_UMULO:
605   case TargetOpcode::G_SMULO: {
606     if (MI.getOperand(1).getReg() == R) {
607       // If we know the result of a compare has the top bits zero, use this
608       // info.
609       if (TL.getBooleanContents(DstTy.isVector(), false) ==
610               TargetLowering::ZeroOrOneBooleanContent &&
611           BitWidth > 1)
612         Known.Zero.setBitsFrom(1);
613     }
614     break;
615   }
616   case TargetOpcode::G_CTLZ:
617   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
618     KnownBits SrcOpKnown;
619     computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
620                          Depth + 1);
621     // If we have a known 1, its position is our upper bound.
622     unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
623     unsigned LowBits = llvm::bit_width(PossibleLZ);
624     Known.Zero.setBitsFrom(LowBits);
625     break;
626   }
627   }
628 
629   LLVM_DEBUG(dumpResult(MI, Known, Depth));
630 
631   // Update the cache.
632   ComputeKnownBitsCache[R] = Known;
633 }
634 
635 /// Compute number of sign bits for the intersection of \p Src0 and \p Src1
636 unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
637                                                const APInt &DemandedElts,
638                                                unsigned Depth) {
639   // Test src1 first, since we canonicalize simpler expressions to the RHS.
640   unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
641   if (Src1SignBits == 1)
642     return 1;
643   return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
644 }
645 
646 /// Compute the known number of sign bits with attached range metadata in the
647 /// memory operand. If this is an extending load, accounts for the behavior of
648 /// the high bits.
649 static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
650                                                     unsigned TyBits) {
651   const MDNode *Ranges = Ld->getRanges();
652   if (!Ranges)
653     return 1;
654 
655   ConstantRange CR = getConstantRangeFromMetadata(*Ranges);
656   if (TyBits > CR.getBitWidth()) {
657     switch (Ld->getOpcode()) {
658     case TargetOpcode::G_SEXTLOAD:
659       CR = CR.signExtend(TyBits);
660       break;
661     case TargetOpcode::G_ZEXTLOAD:
662       CR = CR.zeroExtend(TyBits);
663       break;
664     default:
665       break;
666     }
667   }
668 
669   return std::min(CR.getSignedMin().getNumSignBits(),
670                   CR.getSignedMax().getNumSignBits());
671 }
672 
673 unsigned GISelKnownBits::computeNumSignBits(Register R,
674                                             const APInt &DemandedElts,
675                                             unsigned Depth) {
676   MachineInstr &MI = *MRI.getVRegDef(R);
677   unsigned Opcode = MI.getOpcode();
678 
679   if (Opcode == TargetOpcode::G_CONSTANT)
680     return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
681 
682   if (Depth == getMaxDepth())
683     return 1;
684 
685   if (!DemandedElts)
686     return 1; // No demanded elts, better to assume we don't know anything.
687 
688   LLT DstTy = MRI.getType(R);
689   const unsigned TyBits = DstTy.getScalarSizeInBits();
690 
691   // Handle the case where this is called on a register that does not have a
692   // type constraint. This is unlikely to occur except by looking through copies
693   // but it is possible for the initial register being queried to be in this
694   // state.
695   if (!DstTy.isValid())
696     return 1;
697 
698   unsigned FirstAnswer = 1;
699   switch (Opcode) {
700   case TargetOpcode::COPY: {
701     MachineOperand &Src = MI.getOperand(1);
702     if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
703         MRI.getType(Src.getReg()).isValid()) {
704       // Don't increment Depth for this one since we didn't do any work.
705       return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
706     }
707 
708     return 1;
709   }
710   case TargetOpcode::G_SEXT: {
711     Register Src = MI.getOperand(1).getReg();
712     LLT SrcTy = MRI.getType(Src);
713     unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
714     return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
715   }
716   case TargetOpcode::G_ASSERT_SEXT:
717   case TargetOpcode::G_SEXT_INREG: {
718     // Max of the input and what this extends.
719     Register Src = MI.getOperand(1).getReg();
720     unsigned SrcBits = MI.getOperand(2).getImm();
721     unsigned InRegBits = TyBits - SrcBits + 1;
722     return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
723   }
724   case TargetOpcode::G_LOAD: {
725     GLoad *Ld = cast<GLoad>(&MI);
726     if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
727       break;
728 
729     return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
730   }
731   case TargetOpcode::G_SEXTLOAD: {
732     GSExtLoad *Ld = cast<GSExtLoad>(&MI);
733 
734     // FIXME: We need an in-memory type representation.
735     if (DstTy.isVector())
736       return 1;
737 
738     unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
739     if (NumBits != 1)
740       return NumBits;
741 
742     // e.g. i16->i32 = '17' bits known.
743     const MachineMemOperand *MMO = *MI.memoperands_begin();
744     return TyBits - MMO->getSizeInBits().getValue() + 1;
745   }
746   case TargetOpcode::G_ZEXTLOAD: {
747     GZExtLoad *Ld = cast<GZExtLoad>(&MI);
748 
749     // FIXME: We need an in-memory type representation.
750     if (DstTy.isVector())
751       return 1;
752 
753     unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
754     if (NumBits != 1)
755       return NumBits;
756 
757     // e.g. i16->i32 = '16' bits known.
758     const MachineMemOperand *MMO = *MI.memoperands_begin();
759     return TyBits - MMO->getSizeInBits().getValue();
760   }
761   case TargetOpcode::G_AND:
762   case TargetOpcode::G_OR:
763   case TargetOpcode::G_XOR: {
764     Register Src1 = MI.getOperand(1).getReg();
765     unsigned Src1NumSignBits =
766         computeNumSignBits(Src1, DemandedElts, Depth + 1);
767     if (Src1NumSignBits != 1) {
768       Register Src2 = MI.getOperand(2).getReg();
769       unsigned Src2NumSignBits =
770           computeNumSignBits(Src2, DemandedElts, Depth + 1);
771       FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits);
772     }
773     break;
774   }
775   case TargetOpcode::G_TRUNC: {
776     Register Src = MI.getOperand(1).getReg();
777     LLT SrcTy = MRI.getType(Src);
778 
779     // Check if the sign bits of source go down as far as the truncated value.
780     unsigned DstTyBits = DstTy.getScalarSizeInBits();
781     unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
782     unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
783     if (NumSrcSignBits > (NumSrcBits - DstTyBits))
784       return NumSrcSignBits - (NumSrcBits - DstTyBits);
785     break;
786   }
787   case TargetOpcode::G_SELECT: {
788     return computeNumSignBitsMin(MI.getOperand(2).getReg(),
789                                  MI.getOperand(3).getReg(), DemandedElts,
790                                  Depth + 1);
791   }
792   case TargetOpcode::G_SMIN:
793   case TargetOpcode::G_SMAX:
794   case TargetOpcode::G_UMIN:
795   case TargetOpcode::G_UMAX:
796     // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX.
797     return computeNumSignBitsMin(MI.getOperand(1).getReg(),
798                                  MI.getOperand(2).getReg(), DemandedElts,
799                                  Depth + 1);
800   case TargetOpcode::G_SADDO:
801   case TargetOpcode::G_SADDE:
802   case TargetOpcode::G_UADDO:
803   case TargetOpcode::G_UADDE:
804   case TargetOpcode::G_SSUBO:
805   case TargetOpcode::G_SSUBE:
806   case TargetOpcode::G_USUBO:
807   case TargetOpcode::G_USUBE:
808   case TargetOpcode::G_SMULO:
809   case TargetOpcode::G_UMULO: {
810     // If compares returns 0/-1, all bits are sign bits.
811     // We know that we have an integer-based boolean since these operations
812     // are only available for integer.
813     if (MI.getOperand(1).getReg() == R) {
814       if (TL.getBooleanContents(DstTy.isVector(), false) ==
815           TargetLowering::ZeroOrNegativeOneBooleanContent)
816         return TyBits;
817     }
818 
819     break;
820   }
821   case TargetOpcode::G_FCMP:
822   case TargetOpcode::G_ICMP: {
823     bool IsFP = Opcode == TargetOpcode::G_FCMP;
824     if (TyBits == 1)
825       break;
826     auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
827     if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
828       return TyBits; // All bits are sign bits.
829     if (BC == TargetLowering::ZeroOrOneBooleanContent)
830       return TyBits - 1; // Every always-zero bit is a sign bit.
831     break;
832   }
833   case TargetOpcode::G_INTRINSIC:
834   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
835   case TargetOpcode::G_INTRINSIC_CONVERGENT:
836   case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
837   default: {
838     unsigned NumBits =
839       TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
840     if (NumBits > 1)
841       FirstAnswer = std::max(FirstAnswer, NumBits);
842     break;
843   }
844   }
845 
846   // Finally, if we can prove that the top bits of the result are 0's or 1's,
847   // use this information.
848   KnownBits Known = getKnownBits(R, DemandedElts, Depth);
849   APInt Mask;
850   if (Known.isNonNegative()) {        // sign bit is 0
851     Mask = Known.Zero;
852   } else if (Known.isNegative()) {  // sign bit is 1;
853     Mask = Known.One;
854   } else {
855     // Nothing known.
856     return FirstAnswer;
857   }
858 
859   // Okay, we know that the sign bit in Mask is set.  Use CLO to determine
860   // the number of identical bits in the top of the input value.
861   Mask <<= Mask.getBitWidth() - TyBits;
862   return std::max(FirstAnswer, Mask.countl_one());
863 }
864 
865 unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
866   LLT Ty = MRI.getType(R);
867   APInt DemandedElts =
868       Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
869   return computeNumSignBits(R, DemandedElts, Depth);
870 }
871 
872 void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
873   AU.setPreservesAll();
874   MachineFunctionPass::getAnalysisUsage(AU);
875 }
876 
877 bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) {
878   return false;
879 }
880 
881 GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) {
882   if (!Info) {
883     unsigned MaxDepth =
884         MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
885     Info = std::make_unique<GISelKnownBits>(MF, MaxDepth);
886   }
887   return *Info;
888 }
889