xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (revision cb14a3fe5122c879eae1fb480ed7ce82a699ddb6)
1 //===- InstCombineCompares.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visitICmp and visitFCmp functions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/APSInt.h"
15 #include "llvm/ADT/ScopeExit.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/CaptureTracking.h"
19 #include "llvm/Analysis/CmpInstAnalysis.h"
20 #include "llvm/Analysis/ConstantFolding.h"
21 #include "llvm/Analysis/InstructionSimplify.h"
22 #include "llvm/Analysis/Utils/Local.h"
23 #include "llvm/Analysis/VectorUtils.h"
24 #include "llvm/IR/ConstantRange.h"
25 #include "llvm/IR/DataLayout.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/PatternMatch.h"
28 #include "llvm/Support/KnownBits.h"
29 #include "llvm/Transforms/InstCombine/InstCombiner.h"
30 #include <bitset>
31 
32 using namespace llvm;
33 using namespace PatternMatch;
34 
35 #define DEBUG_TYPE "instcombine"
36 
37 // How many times is a select replaced by one of its operands?
38 STATISTIC(NumSel, "Number of select opts");
39 
40 
41 /// Compute Result = In1+In2, returning true if the result overflowed for this
42 /// type.
43 static bool addWithOverflow(APInt &Result, const APInt &In1,
44                             const APInt &In2, bool IsSigned = false) {
45   bool Overflow;
46   if (IsSigned)
47     Result = In1.sadd_ov(In2, Overflow);
48   else
49     Result = In1.uadd_ov(In2, Overflow);
50 
51   return Overflow;
52 }
53 
54 /// Compute Result = In1-In2, returning true if the result overflowed for this
55 /// type.
56 static bool subWithOverflow(APInt &Result, const APInt &In1,
57                             const APInt &In2, bool IsSigned = false) {
58   bool Overflow;
59   if (IsSigned)
60     Result = In1.ssub_ov(In2, Overflow);
61   else
62     Result = In1.usub_ov(In2, Overflow);
63 
64   return Overflow;
65 }
66 
67 /// Given an icmp instruction, return true if any use of this comparison is a
68 /// branch on sign bit comparison.
69 static bool hasBranchUse(ICmpInst &I) {
70   for (auto *U : I.users())
71     if (isa<BranchInst>(U))
72       return true;
73   return false;
74 }
75 
76 /// Returns true if the exploded icmp can be expressed as a signed comparison
77 /// to zero and updates the predicate accordingly.
78 /// The signedness of the comparison is preserved.
79 /// TODO: Refactor with decomposeBitTestICmp()?
80 static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
81   if (!ICmpInst::isSigned(Pred))
82     return false;
83 
84   if (C.isZero())
85     return ICmpInst::isRelational(Pred);
86 
87   if (C.isOne()) {
88     if (Pred == ICmpInst::ICMP_SLT) {
89       Pred = ICmpInst::ICMP_SLE;
90       return true;
91     }
92   } else if (C.isAllOnes()) {
93     if (Pred == ICmpInst::ICMP_SGT) {
94       Pred = ICmpInst::ICMP_SGE;
95       return true;
96     }
97   }
98 
99   return false;
100 }
101 
102 /// This is called when we see this pattern:
103 ///   cmp pred (load (gep GV, ...)), cmpcst
104 /// where GV is a global variable with a constant initializer. Try to simplify
105 /// this into some simple computation that does not need the load. For example
106 /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
107 ///
108 /// If AndCst is non-null, then the loaded value is masked with that constant
109 /// before doing the comparison. This handles cases like "A[i]&4 == 0".
110 Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
111     LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
112     ConstantInt *AndCst) {
113   if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
114       GV->getValueType() != GEP->getSourceElementType() ||
115       !GV->isConstant() || !GV->hasDefinitiveInitializer())
116     return nullptr;
117 
118   Constant *Init = GV->getInitializer();
119   if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
120     return nullptr;
121 
122   uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
123   // Don't blow up on huge arrays.
124   if (ArrayElementCount > MaxArraySizeForCombine)
125     return nullptr;
126 
127   // There are many forms of this optimization we can handle, for now, just do
128   // the simple index into a single-dimensional array.
129   //
130   // Require: GEP GV, 0, i {{, constant indices}}
131   if (GEP->getNumOperands() < 3 ||
132       !isa<ConstantInt>(GEP->getOperand(1)) ||
133       !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
134       isa<Constant>(GEP->getOperand(2)))
135     return nullptr;
136 
137   // Check that indices after the variable are constants and in-range for the
138   // type they index.  Collect the indices.  This is typically for arrays of
139   // structs.
140   SmallVector<unsigned, 4> LaterIndices;
141 
142   Type *EltTy = Init->getType()->getArrayElementType();
143   for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
144     ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
145     if (!Idx) return nullptr;  // Variable index.
146 
147     uint64_t IdxVal = Idx->getZExtValue();
148     if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
149 
150     if (StructType *STy = dyn_cast<StructType>(EltTy))
151       EltTy = STy->getElementType(IdxVal);
152     else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
153       if (IdxVal >= ATy->getNumElements()) return nullptr;
154       EltTy = ATy->getElementType();
155     } else {
156       return nullptr; // Unknown type.
157     }
158 
159     LaterIndices.push_back(IdxVal);
160   }
161 
162   enum { Overdefined = -3, Undefined = -2 };
163 
164   // Variables for our state machines.
165 
166   // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
167   // "i == 47 | i == 87", where 47 is the first index the condition is true for,
168   // and 87 is the second (and last) index.  FirstTrueElement is -2 when
169   // undefined, otherwise set to the first true element.  SecondTrueElement is
170   // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
171   int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
172 
173   // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
174   // form "i != 47 & i != 87".  Same state transitions as for true elements.
175   int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
176 
177   /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
178   /// define a state machine that triggers for ranges of values that the index
179   /// is true or false for.  This triggers on things like "abbbbc"[i] == 'b'.
180   /// This is -2 when undefined, -3 when overdefined, and otherwise the last
181   /// index in the range (inclusive).  We use -2 for undefined here because we
182   /// use relative comparisons and don't want 0-1 to match -1.
183   int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
184 
185   // MagicBitvector - This is a magic bitvector where we set a bit if the
186   // comparison is true for element 'i'.  If there are 64 elements or less in
187   // the array, this will fully represent all the comparison results.
188   uint64_t MagicBitvector = 0;
189 
190   // Scan the array and see if one of our patterns matches.
191   Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
192   for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
193     Constant *Elt = Init->getAggregateElement(i);
194     if (!Elt) return nullptr;
195 
196     // If this is indexing an array of structures, get the structure element.
197     if (!LaterIndices.empty()) {
198       Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices);
199       if (!Elt)
200         return nullptr;
201     }
202 
203     // If the element is masked, handle it.
204     if (AndCst) {
205       Elt = ConstantFoldBinaryOpOperands(Instruction::And, Elt, AndCst, DL);
206       if (!Elt)
207         return nullptr;
208     }
209 
210     // Find out if the comparison would be true or false for the i'th element.
211     Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
212                                                   CompareRHS, DL, &TLI);
213     // If the result is undef for this element, ignore it.
214     if (isa<UndefValue>(C)) {
215       // Extend range state machines to cover this element in case there is an
216       // undef in the middle of the range.
217       if (TrueRangeEnd == (int)i-1)
218         TrueRangeEnd = i;
219       if (FalseRangeEnd == (int)i-1)
220         FalseRangeEnd = i;
221       continue;
222     }
223 
224     // If we can't compute the result for any of the elements, we have to give
225     // up evaluating the entire conditional.
226     if (!isa<ConstantInt>(C)) return nullptr;
227 
228     // Otherwise, we know if the comparison is true or false for this element,
229     // update our state machines.
230     bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
231 
232     // State machine for single/double/range index comparison.
233     if (IsTrueForElt) {
234       // Update the TrueElement state machine.
235       if (FirstTrueElement == Undefined)
236         FirstTrueElement = TrueRangeEnd = i;  // First true element.
237       else {
238         // Update double-compare state machine.
239         if (SecondTrueElement == Undefined)
240           SecondTrueElement = i;
241         else
242           SecondTrueElement = Overdefined;
243 
244         // Update range state machine.
245         if (TrueRangeEnd == (int)i-1)
246           TrueRangeEnd = i;
247         else
248           TrueRangeEnd = Overdefined;
249       }
250     } else {
251       // Update the FalseElement state machine.
252       if (FirstFalseElement == Undefined)
253         FirstFalseElement = FalseRangeEnd = i; // First false element.
254       else {
255         // Update double-compare state machine.
256         if (SecondFalseElement == Undefined)
257           SecondFalseElement = i;
258         else
259           SecondFalseElement = Overdefined;
260 
261         // Update range state machine.
262         if (FalseRangeEnd == (int)i-1)
263           FalseRangeEnd = i;
264         else
265           FalseRangeEnd = Overdefined;
266       }
267     }
268 
269     // If this element is in range, update our magic bitvector.
270     if (i < 64 && IsTrueForElt)
271       MagicBitvector |= 1ULL << i;
272 
273     // If all of our states become overdefined, bail out early.  Since the
274     // predicate is expensive, only check it every 8 elements.  This is only
275     // really useful for really huge arrays.
276     if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
277         SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
278         FalseRangeEnd == Overdefined)
279       return nullptr;
280   }
281 
282   // Now that we've scanned the entire array, emit our new comparison(s).  We
283   // order the state machines in complexity of the generated code.
284   Value *Idx = GEP->getOperand(2);
285 
286   // If the index is larger than the pointer offset size of the target, truncate
287   // the index down like the GEP would do implicitly.  We don't have to do this
288   // for an inbounds GEP because the index can't be out of range.
289   if (!GEP->isInBounds()) {
290     Type *PtrIdxTy = DL.getIndexType(GEP->getType());
291     unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth();
292     if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize)
293       Idx = Builder.CreateTrunc(Idx, PtrIdxTy);
294   }
295 
296   // If inbounds keyword is not present, Idx * ElementSize can overflow.
297   // Let's assume that ElementSize is 2 and the wanted value is at offset 0.
298   // Then, there are two possible values for Idx to match offset 0:
299   // 0x00..00, 0x80..00.
300   // Emitting 'icmp eq Idx, 0' isn't correct in this case because the
301   // comparison is false if Idx was 0x80..00.
302   // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
303   unsigned ElementSize =
304       DL.getTypeAllocSize(Init->getType()->getArrayElementType());
305   auto MaskIdx = [&](Value *Idx) {
306     if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
307       Value *Mask = ConstantInt::get(Idx->getType(), -1);
308       Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize));
309       Idx = Builder.CreateAnd(Idx, Mask);
310     }
311     return Idx;
312   };
313 
314   // If the comparison is only true for one or two elements, emit direct
315   // comparisons.
316   if (SecondTrueElement != Overdefined) {
317     Idx = MaskIdx(Idx);
318     // None true -> false.
319     if (FirstTrueElement == Undefined)
320       return replaceInstUsesWith(ICI, Builder.getFalse());
321 
322     Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
323 
324     // True for one element -> 'i == 47'.
325     if (SecondTrueElement == Undefined)
326       return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
327 
328     // True for two elements -> 'i == 47 | i == 72'.
329     Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
330     Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
331     Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx);
332     return BinaryOperator::CreateOr(C1, C2);
333   }
334 
335   // If the comparison is only false for one or two elements, emit direct
336   // comparisons.
337   if (SecondFalseElement != Overdefined) {
338     Idx = MaskIdx(Idx);
339     // None false -> true.
340     if (FirstFalseElement == Undefined)
341       return replaceInstUsesWith(ICI, Builder.getTrue());
342 
343     Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
344 
345     // False for one element -> 'i != 47'.
346     if (SecondFalseElement == Undefined)
347       return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
348 
349     // False for two elements -> 'i != 47 & i != 72'.
350     Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
351     Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
352     Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
353     return BinaryOperator::CreateAnd(C1, C2);
354   }
355 
356   // If the comparison can be replaced with a range comparison for the elements
357   // where it is true, emit the range check.
358   if (TrueRangeEnd != Overdefined) {
359     assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
360     Idx = MaskIdx(Idx);
361 
362     // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
363     if (FirstTrueElement) {
364       Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
365       Idx = Builder.CreateAdd(Idx, Offs);
366     }
367 
368     Value *End = ConstantInt::get(Idx->getType(),
369                                   TrueRangeEnd-FirstTrueElement+1);
370     return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
371   }
372 
373   // False range check.
374   if (FalseRangeEnd != Overdefined) {
375     assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
376     Idx = MaskIdx(Idx);
377     // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
378     if (FirstFalseElement) {
379       Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
380       Idx = Builder.CreateAdd(Idx, Offs);
381     }
382 
383     Value *End = ConstantInt::get(Idx->getType(),
384                                   FalseRangeEnd-FirstFalseElement);
385     return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
386   }
387 
388   // If a magic bitvector captures the entire comparison state
389   // of this load, replace it with computation that does:
390   //   ((magic_cst >> i) & 1) != 0
391   {
392     Type *Ty = nullptr;
393 
394     // Look for an appropriate type:
395     // - The type of Idx if the magic fits
396     // - The smallest fitting legal type
397     if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
398       Ty = Idx->getType();
399     else
400       Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
401 
402     if (Ty) {
403       Idx = MaskIdx(Idx);
404       Value *V = Builder.CreateIntCast(Idx, Ty, false);
405       V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
406       V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
407       return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
408     }
409   }
410 
411   return nullptr;
412 }
413 
414 /// Returns true if we can rewrite Start as a GEP with pointer Base
415 /// and some integer offset. The nodes that need to be re-written
416 /// for this transformation will be added to Explored.
417 static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
418                                   const DataLayout &DL,
419                                   SetVector<Value *> &Explored) {
420   SmallVector<Value *, 16> WorkList(1, Start);
421   Explored.insert(Base);
422 
423   // The following traversal gives us an order which can be used
424   // when doing the final transformation. Since in the final
425   // transformation we create the PHI replacement instructions first,
426   // we don't have to get them in any particular order.
427   //
428   // However, for other instructions we will have to traverse the
429   // operands of an instruction first, which means that we have to
430   // do a post-order traversal.
431   while (!WorkList.empty()) {
432     SetVector<PHINode *> PHIs;
433 
434     while (!WorkList.empty()) {
435       if (Explored.size() >= 100)
436         return false;
437 
438       Value *V = WorkList.back();
439 
440       if (Explored.contains(V)) {
441         WorkList.pop_back();
442         continue;
443       }
444 
445       if (!isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
446         // We've found some value that we can't explore which is different from
447         // the base. Therefore we can't do this transformation.
448         return false;
449 
450       if (auto *GEP = dyn_cast<GEPOperator>(V)) {
451         // Only allow inbounds GEPs with at most one variable offset.
452         auto IsNonConst = [](Value *V) { return !isa<ConstantInt>(V); };
453         if (!GEP->isInBounds() || count_if(GEP->indices(), IsNonConst) > 1)
454           return false;
455 
456         if (!Explored.contains(GEP->getOperand(0)))
457           WorkList.push_back(GEP->getOperand(0));
458       }
459 
460       if (WorkList.back() == V) {
461         WorkList.pop_back();
462         // We've finished visiting this node, mark it as such.
463         Explored.insert(V);
464       }
465 
466       if (auto *PN = dyn_cast<PHINode>(V)) {
467         // We cannot transform PHIs on unsplittable basic blocks.
468         if (isa<CatchSwitchInst>(PN->getParent()->getTerminator()))
469           return false;
470         Explored.insert(PN);
471         PHIs.insert(PN);
472       }
473     }
474 
475     // Explore the PHI nodes further.
476     for (auto *PN : PHIs)
477       for (Value *Op : PN->incoming_values())
478         if (!Explored.contains(Op))
479           WorkList.push_back(Op);
480   }
481 
482   // Make sure that we can do this. Since we can't insert GEPs in a basic
483   // block before a PHI node, we can't easily do this transformation if
484   // we have PHI node users of transformed instructions.
485   for (Value *Val : Explored) {
486     for (Value *Use : Val->uses()) {
487 
488       auto *PHI = dyn_cast<PHINode>(Use);
489       auto *Inst = dyn_cast<Instruction>(Val);
490 
491       if (Inst == Base || Inst == PHI || !Inst || !PHI ||
492           !Explored.contains(PHI))
493         continue;
494 
495       if (PHI->getParent() == Inst->getParent())
496         return false;
497     }
498   }
499   return true;
500 }
501 
502 // Sets the appropriate insert point on Builder where we can add
503 // a replacement Instruction for V (if that is possible).
504 static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
505                               bool Before = true) {
506   if (auto *PHI = dyn_cast<PHINode>(V)) {
507     BasicBlock *Parent = PHI->getParent();
508     Builder.SetInsertPoint(Parent, Parent->getFirstInsertionPt());
509     return;
510   }
511   if (auto *I = dyn_cast<Instruction>(V)) {
512     if (!Before)
513       I = &*std::next(I->getIterator());
514     Builder.SetInsertPoint(I);
515     return;
516   }
517   if (auto *A = dyn_cast<Argument>(V)) {
518     // Set the insertion point in the entry block.
519     BasicBlock &Entry = A->getParent()->getEntryBlock();
520     Builder.SetInsertPoint(&Entry, Entry.getFirstInsertionPt());
521     return;
522   }
523   // Otherwise, this is a constant and we don't need to set a new
524   // insertion point.
525   assert(isa<Constant>(V) && "Setting insertion point for unknown value!");
526 }
527 
528 /// Returns a re-written value of Start as an indexed GEP using Base as a
529 /// pointer.
530 static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
531                                  const DataLayout &DL,
532                                  SetVector<Value *> &Explored,
533                                  InstCombiner &IC) {
534   // Perform all the substitutions. This is a bit tricky because we can
535   // have cycles in our use-def chains.
536   // 1. Create the PHI nodes without any incoming values.
537   // 2. Create all the other values.
538   // 3. Add the edges for the PHI nodes.
539   // 4. Emit GEPs to get the original pointers.
540   // 5. Remove the original instructions.
541   Type *IndexType = IntegerType::get(
542       Base->getContext(), DL.getIndexTypeSizeInBits(Start->getType()));
543 
544   DenseMap<Value *, Value *> NewInsts;
545   NewInsts[Base] = ConstantInt::getNullValue(IndexType);
546 
547   // Create the new PHI nodes, without adding any incoming values.
548   for (Value *Val : Explored) {
549     if (Val == Base)
550       continue;
551     // Create empty phi nodes. This avoids cyclic dependencies when creating
552     // the remaining instructions.
553     if (auto *PHI = dyn_cast<PHINode>(Val))
554       NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(),
555                                       PHI->getName() + ".idx", PHI);
556   }
557   IRBuilder<> Builder(Base->getContext());
558 
559   // Create all the other instructions.
560   for (Value *Val : Explored) {
561     if (NewInsts.contains(Val))
562       continue;
563 
564     if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
565       setInsertionPoint(Builder, GEP);
566       Value *Op = NewInsts[GEP->getOperand(0)];
567       Value *OffsetV = emitGEPOffset(&Builder, DL, GEP);
568       if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
569         NewInsts[GEP] = OffsetV;
570       else
571         NewInsts[GEP] = Builder.CreateNSWAdd(
572             Op, OffsetV, GEP->getOperand(0)->getName() + ".add");
573       continue;
574     }
575     if (isa<PHINode>(Val))
576       continue;
577 
578     llvm_unreachable("Unexpected instruction type");
579   }
580 
581   // Add the incoming values to the PHI nodes.
582   for (Value *Val : Explored) {
583     if (Val == Base)
584       continue;
585     // All the instructions have been created, we can now add edges to the
586     // phi nodes.
587     if (auto *PHI = dyn_cast<PHINode>(Val)) {
588       PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]);
589       for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
590         Value *NewIncoming = PHI->getIncomingValue(I);
591 
592         if (NewInsts.contains(NewIncoming))
593           NewIncoming = NewInsts[NewIncoming];
594 
595         NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
596       }
597     }
598   }
599 
600   for (Value *Val : Explored) {
601     if (Val == Base)
602       continue;
603 
604     setInsertionPoint(Builder, Val, false);
605     // Create GEP for external users.
606     Value *NewVal = Builder.CreateInBoundsGEP(
607         Builder.getInt8Ty(), Base, NewInsts[Val], Val->getName() + ".ptr");
608     IC.replaceInstUsesWith(*cast<Instruction>(Val), NewVal);
609     // Add old instruction to worklist for DCE. We don't directly remove it
610     // here because the original compare is one of the users.
611     IC.addToWorklist(cast<Instruction>(Val));
612   }
613 
614   return NewInsts[Start];
615 }
616 
617 /// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
618 /// We can look through PHIs, GEPs and casts in order to determine a common base
619 /// between GEPLHS and RHS.
620 static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
621                                               ICmpInst::Predicate Cond,
622                                               const DataLayout &DL,
623                                               InstCombiner &IC) {
624   // FIXME: Support vector of pointers.
625   if (GEPLHS->getType()->isVectorTy())
626     return nullptr;
627 
628   if (!GEPLHS->hasAllConstantIndices())
629     return nullptr;
630 
631   APInt Offset(DL.getIndexTypeSizeInBits(GEPLHS->getType()), 0);
632   Value *PtrBase =
633       GEPLHS->stripAndAccumulateConstantOffsets(DL, Offset,
634                                                 /*AllowNonInbounds*/ false);
635 
636   // Bail if we looked through addrspacecast.
637   if (PtrBase->getType() != GEPLHS->getType())
638     return nullptr;
639 
640   // The set of nodes that will take part in this transformation.
641   SetVector<Value *> Nodes;
642 
643   if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
644     return nullptr;
645 
646   // We know we can re-write this as
647   //  ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)
648   // Since we've only looked through inbouds GEPs we know that we
649   // can't have overflow on either side. We can therefore re-write
650   // this as:
651   //   OFFSET1 cmp OFFSET2
652   Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes, IC);
653 
654   // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
655   // GEP having PtrBase as the pointer base, and has returned in NewRHS the
656   // offset. Since Index is the offset of LHS to the base pointer, we will now
657   // compare the offsets instead of comparing the pointers.
658   return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
659                       IC.Builder.getInt(Offset), NewRHS);
660 }
661 
662 /// Fold comparisons between a GEP instruction and something else. At this point
663 /// we know that the GEP is on the LHS of the comparison.
664 Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
665                                            ICmpInst::Predicate Cond,
666                                            Instruction &I) {
667   // Don't transform signed compares of GEPs into index compares. Even if the
668   // GEP is inbounds, the final add of the base pointer can have signed overflow
669   // and would change the result of the icmp.
670   // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
671   // the maximum signed value for the pointer type.
672   if (ICmpInst::isSigned(Cond))
673     return nullptr;
674 
675   // Look through bitcasts and addrspacecasts. We do not however want to remove
676   // 0 GEPs.
677   if (!isa<GetElementPtrInst>(RHS))
678     RHS = RHS->stripPointerCasts();
679 
680   Value *PtrBase = GEPLHS->getOperand(0);
681   if (PtrBase == RHS && (GEPLHS->isInBounds() || ICmpInst::isEquality(Cond))) {
682     // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
683     Value *Offset = EmitGEPOffset(GEPLHS);
684     return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
685                         Constant::getNullValue(Offset->getType()));
686   }
687 
688   if (GEPLHS->isInBounds() && ICmpInst::isEquality(Cond) &&
689       isa<Constant>(RHS) && cast<Constant>(RHS)->isNullValue() &&
690       !NullPointerIsDefined(I.getFunction(),
691                             RHS->getType()->getPointerAddressSpace())) {
692     // For most address spaces, an allocation can't be placed at null, but null
693     // itself is treated as a 0 size allocation in the in bounds rules.  Thus,
694     // the only valid inbounds address derived from null, is null itself.
695     // Thus, we have four cases to consider:
696     // 1) Base == nullptr, Offset == 0 -> inbounds, null
697     // 2) Base == nullptr, Offset != 0 -> poison as the result is out of bounds
698     // 3) Base != nullptr, Offset == (-base) -> poison (crossing allocations)
699     // 4) Base != nullptr, Offset != (-base) -> nonnull (and possibly poison)
700     //
701     // (Note if we're indexing a type of size 0, that simply collapses into one
702     //  of the buckets above.)
703     //
704     // In general, we're allowed to make values less poison (i.e. remove
705     //   sources of full UB), so in this case, we just select between the two
706     //   non-poison cases (1 and 4 above).
707     //
708     // For vectors, we apply the same reasoning on a per-lane basis.
709     auto *Base = GEPLHS->getPointerOperand();
710     if (GEPLHS->getType()->isVectorTy() && Base->getType()->isPointerTy()) {
711       auto EC = cast<VectorType>(GEPLHS->getType())->getElementCount();
712       Base = Builder.CreateVectorSplat(EC, Base);
713     }
714     return new ICmpInst(Cond, Base,
715                         ConstantExpr::getPointerBitCastOrAddrSpaceCast(
716                             cast<Constant>(RHS), Base->getType()));
717   } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
718     // If the base pointers are different, but the indices are the same, just
719     // compare the base pointer.
720     if (PtrBase != GEPRHS->getOperand(0)) {
721       bool IndicesTheSame =
722           GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
723           GEPLHS->getPointerOperand()->getType() ==
724               GEPRHS->getPointerOperand()->getType() &&
725           GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType();
726       if (IndicesTheSame)
727         for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
728           if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
729             IndicesTheSame = false;
730             break;
731           }
732 
733       // If all indices are the same, just compare the base pointers.
734       Type *BaseType = GEPLHS->getOperand(0)->getType();
735       if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType())
736         return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
737 
738       // If we're comparing GEPs with two base pointers that only differ in type
739       // and both GEPs have only constant indices or just one use, then fold
740       // the compare with the adjusted indices.
741       // FIXME: Support vector of pointers.
742       if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
743           (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
744           (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
745           PtrBase->stripPointerCasts() ==
746               GEPRHS->getOperand(0)->stripPointerCasts() &&
747           !GEPLHS->getType()->isVectorTy()) {
748         Value *LOffset = EmitGEPOffset(GEPLHS);
749         Value *ROffset = EmitGEPOffset(GEPRHS);
750 
751         // If we looked through an addrspacecast between different sized address
752         // spaces, the LHS and RHS pointers are different sized
753         // integers. Truncate to the smaller one.
754         Type *LHSIndexTy = LOffset->getType();
755         Type *RHSIndexTy = ROffset->getType();
756         if (LHSIndexTy != RHSIndexTy) {
757           if (LHSIndexTy->getPrimitiveSizeInBits().getFixedValue() <
758               RHSIndexTy->getPrimitiveSizeInBits().getFixedValue()) {
759             ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy);
760           } else
761             LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy);
762         }
763 
764         Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond),
765                                         LOffset, ROffset);
766         return replaceInstUsesWith(I, Cmp);
767       }
768 
769       // Otherwise, the base pointers are different and the indices are
770       // different. Try convert this to an indexed compare by looking through
771       // PHIs/casts.
772       return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
773     }
774 
775     bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
776     if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
777         GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType()) {
778       // If the GEPs only differ by one index, compare it.
779       unsigned NumDifferences = 0;  // Keep track of # differences.
780       unsigned DiffOperand = 0;     // The operand that differs.
781       for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
782         if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
783           Type *LHSType = GEPLHS->getOperand(i)->getType();
784           Type *RHSType = GEPRHS->getOperand(i)->getType();
785           // FIXME: Better support for vector of pointers.
786           if (LHSType->getPrimitiveSizeInBits() !=
787                    RHSType->getPrimitiveSizeInBits() ||
788               (GEPLHS->getType()->isVectorTy() &&
789                (!LHSType->isVectorTy() || !RHSType->isVectorTy()))) {
790             // Irreconcilable differences.
791             NumDifferences = 2;
792             break;
793           }
794 
795           if (NumDifferences++) break;
796           DiffOperand = i;
797         }
798 
799       if (NumDifferences == 0)   // SAME GEP?
800         return replaceInstUsesWith(I, // No comparison is needed here.
801           ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond)));
802 
803       else if (NumDifferences == 1 && GEPsInBounds) {
804         Value *LHSV = GEPLHS->getOperand(DiffOperand);
805         Value *RHSV = GEPRHS->getOperand(DiffOperand);
806         // Make sure we do a signed comparison here.
807         return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
808       }
809     }
810 
811     // Only lower this if the icmp is the only user of the GEP or if we expect
812     // the result to fold to a constant!
813     if ((GEPsInBounds || CmpInst::isEquality(Cond)) &&
814         (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
815         (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse())) {
816       // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
817       Value *L = EmitGEPOffset(GEPLHS);
818       Value *R = EmitGEPOffset(GEPRHS);
819       return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
820     }
821   }
822 
823   // Try convert this to an indexed compare by looking through PHIs/casts as a
824   // last resort.
825   return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
826 }
827 
828 bool InstCombinerImpl::foldAllocaCmp(AllocaInst *Alloca) {
829   // It would be tempting to fold away comparisons between allocas and any
830   // pointer not based on that alloca (e.g. an argument). However, even
831   // though such pointers cannot alias, they can still compare equal.
832   //
833   // But LLVM doesn't specify where allocas get their memory, so if the alloca
834   // doesn't escape we can argue that it's impossible to guess its value, and we
835   // can therefore act as if any such guesses are wrong.
836   //
837   // However, we need to ensure that this folding is consistent: We can't fold
838   // one comparison to false, and then leave a different comparison against the
839   // same value alone (as it might evaluate to true at runtime, leading to a
840   // contradiction). As such, this code ensures that all comparisons are folded
841   // at the same time, and there are no other escapes.
842 
843   struct CmpCaptureTracker : public CaptureTracker {
844     AllocaInst *Alloca;
845     bool Captured = false;
846     /// The value of the map is a bit mask of which icmp operands the alloca is
847     /// used in.
848     SmallMapVector<ICmpInst *, unsigned, 4> ICmps;
849 
850     CmpCaptureTracker(AllocaInst *Alloca) : Alloca(Alloca) {}
851 
852     void tooManyUses() override { Captured = true; }
853 
854     bool captured(const Use *U) override {
855       auto *ICmp = dyn_cast<ICmpInst>(U->getUser());
856       // We need to check that U is based *only* on the alloca, and doesn't
857       // have other contributions from a select/phi operand.
858       // TODO: We could check whether getUnderlyingObjects() reduces to one
859       // object, which would allow looking through phi nodes.
860       if (ICmp && ICmp->isEquality() && getUnderlyingObject(*U) == Alloca) {
861         // Collect equality icmps of the alloca, and don't treat them as
862         // captures.
863         auto Res = ICmps.insert({ICmp, 0});
864         Res.first->second |= 1u << U->getOperandNo();
865         return false;
866       }
867 
868       Captured = true;
869       return true;
870     }
871   };
872 
873   CmpCaptureTracker Tracker(Alloca);
874   PointerMayBeCaptured(Alloca, &Tracker);
875   if (Tracker.Captured)
876     return false;
877 
878   bool Changed = false;
879   for (auto [ICmp, Operands] : Tracker.ICmps) {
880     switch (Operands) {
881     case 1:
882     case 2: {
883       // The alloca is only used in one icmp operand. Assume that the
884       // equality is false.
885       auto *Res = ConstantInt::get(
886           ICmp->getType(), ICmp->getPredicate() == ICmpInst::ICMP_NE);
887       replaceInstUsesWith(*ICmp, Res);
888       eraseInstFromFunction(*ICmp);
889       Changed = true;
890       break;
891     }
892     case 3:
893       // Both icmp operands are based on the alloca, so this is comparing
894       // pointer offsets, without leaking any information about the address
895       // of the alloca. Ignore such comparisons.
896       break;
897     default:
898       llvm_unreachable("Cannot happen");
899     }
900   }
901 
902   return Changed;
903 }
904 
905 /// Fold "icmp pred (X+C), X".
906 Instruction *InstCombinerImpl::foldICmpAddOpConst(Value *X, const APInt &C,
907                                                   ICmpInst::Predicate Pred) {
908   // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
909   // so the values can never be equal.  Similarly for all other "or equals"
910   // operators.
911   assert(!!C && "C should not be zero!");
912 
913   // (X+1) <u X        --> X >u (MAXUINT-1)        --> X == 255
914   // (X+2) <u X        --> X >u (MAXUINT-2)        --> X > 253
915   // (X+MAXUINT) <u X  --> X >u (MAXUINT-MAXUINT)  --> X != 0
916   if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
917     Constant *R = ConstantInt::get(X->getType(),
918                                    APInt::getMaxValue(C.getBitWidth()) - C);
919     return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
920   }
921 
922   // (X+1) >u X        --> X <u (0-1)        --> X != 255
923   // (X+2) >u X        --> X <u (0-2)        --> X <u 254
924   // (X+MAXUINT) >u X  --> X <u (0-MAXUINT)  --> X <u 1  --> X == 0
925   if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
926     return new ICmpInst(ICmpInst::ICMP_ULT, X,
927                         ConstantInt::get(X->getType(), -C));
928 
929   APInt SMax = APInt::getSignedMaxValue(C.getBitWidth());
930 
931   // (X+ 1) <s X       --> X >s (MAXSINT-1)          --> X == 127
932   // (X+ 2) <s X       --> X >s (MAXSINT-2)          --> X >s 125
933   // (X+MAXSINT) <s X  --> X >s (MAXSINT-MAXSINT)    --> X >s 0
934   // (X+MINSINT) <s X  --> X >s (MAXSINT-MINSINT)    --> X >s -1
935   // (X+ -2) <s X      --> X >s (MAXSINT- -2)        --> X >s 126
936   // (X+ -1) <s X      --> X >s (MAXSINT- -1)        --> X != 127
937   if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
938     return new ICmpInst(ICmpInst::ICMP_SGT, X,
939                         ConstantInt::get(X->getType(), SMax - C));
940 
941   // (X+ 1) >s X       --> X <s (MAXSINT-(1-1))       --> X != 127
942   // (X+ 2) >s X       --> X <s (MAXSINT-(2-1))       --> X <s 126
943   // (X+MAXSINT) >s X  --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
944   // (X+MINSINT) >s X  --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
945   // (X+ -2) >s X      --> X <s (MAXSINT-(-2-1))      --> X <s -126
946   // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
947 
948   assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
949   return new ICmpInst(ICmpInst::ICMP_SLT, X,
950                       ConstantInt::get(X->getType(), SMax - (C - 1)));
951 }
952 
953 /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
954 /// (icmp eq/ne A, Log2(AP2/AP1)) ->
955 /// (icmp eq/ne A, Log2(AP2) - Log2(AP1)).
956 Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A,
957                                                      const APInt &AP1,
958                                                      const APInt &AP2) {
959   assert(I.isEquality() && "Cannot fold icmp gt/lt");
960 
961   auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
962     if (I.getPredicate() == I.ICMP_NE)
963       Pred = CmpInst::getInversePredicate(Pred);
964     return new ICmpInst(Pred, LHS, RHS);
965   };
966 
967   // Don't bother doing any work for cases which InstSimplify handles.
968   if (AP2.isZero())
969     return nullptr;
970 
971   bool IsAShr = isa<AShrOperator>(I.getOperand(0));
972   if (IsAShr) {
973     if (AP2.isAllOnes())
974       return nullptr;
975     if (AP2.isNegative() != AP1.isNegative())
976       return nullptr;
977     if (AP2.sgt(AP1))
978       return nullptr;
979   }
980 
981   if (!AP1)
982     // 'A' must be large enough to shift out the highest set bit.
983     return getICmp(I.ICMP_UGT, A,
984                    ConstantInt::get(A->getType(), AP2.logBase2()));
985 
986   if (AP1 == AP2)
987     return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
988 
989   int Shift;
990   if (IsAShr && AP1.isNegative())
991     Shift = AP1.countl_one() - AP2.countl_one();
992   else
993     Shift = AP1.countl_zero() - AP2.countl_zero();
994 
995   if (Shift > 0) {
996     if (IsAShr && AP1 == AP2.ashr(Shift)) {
997       // There are multiple solutions if we are comparing against -1 and the LHS
998       // of the ashr is not a power of two.
999       if (AP1.isAllOnes() && !AP2.isPowerOf2())
1000         return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
1001       return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1002     } else if (AP1 == AP2.lshr(Shift)) {
1003       return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1004     }
1005   }
1006 
1007   // Shifting const2 will never be equal to const1.
1008   // FIXME: This should always be handled by InstSimplify?
1009   auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1010   return replaceInstUsesWith(I, TorF);
1011 }
1012 
1013 /// Handle "(icmp eq/ne (shl AP2, A), AP1)" ->
1014 /// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)).
1015 Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A,
1016                                                      const APInt &AP1,
1017                                                      const APInt &AP2) {
1018   assert(I.isEquality() && "Cannot fold icmp gt/lt");
1019 
1020   auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1021     if (I.getPredicate() == I.ICMP_NE)
1022       Pred = CmpInst::getInversePredicate(Pred);
1023     return new ICmpInst(Pred, LHS, RHS);
1024   };
1025 
1026   // Don't bother doing any work for cases which InstSimplify handles.
1027   if (AP2.isZero())
1028     return nullptr;
1029 
1030   unsigned AP2TrailingZeros = AP2.countr_zero();
1031 
1032   if (!AP1 && AP2TrailingZeros != 0)
1033     return getICmp(
1034         I.ICMP_UGE, A,
1035         ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
1036 
1037   if (AP1 == AP2)
1038     return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1039 
1040   // Get the distance between the lowest bits that are set.
1041   int Shift = AP1.countr_zero() - AP2TrailingZeros;
1042 
1043   if (Shift > 0 && AP2.shl(Shift) == AP1)
1044     return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1045 
1046   // Shifting const2 will never be equal to const1.
1047   // FIXME: This should always be handled by InstSimplify?
1048   auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1049   return replaceInstUsesWith(I, TorF);
1050 }
1051 
1052 /// The caller has matched a pattern of the form:
1053 ///   I = icmp ugt (add (add A, B), CI2), CI1
1054 /// If this is of the form:
1055 ///   sum = a + b
1056 ///   if (sum+128 >u 255)
1057 /// Then replace it with llvm.sadd.with.overflow.i8.
1058 ///
1059 static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
1060                                           ConstantInt *CI2, ConstantInt *CI1,
1061                                           InstCombinerImpl &IC) {
1062   // The transformation we're trying to do here is to transform this into an
1063   // llvm.sadd.with.overflow.  To do this, we have to replace the original add
1064   // with a narrower add, and discard the add-with-constant that is part of the
1065   // range check (if we can't eliminate it, this isn't profitable).
1066 
1067   // In order to eliminate the add-with-constant, the compare can be its only
1068   // use.
1069   Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
1070   if (!AddWithCst->hasOneUse())
1071     return nullptr;
1072 
1073   // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
1074   if (!CI2->getValue().isPowerOf2())
1075     return nullptr;
1076   unsigned NewWidth = CI2->getValue().countr_zero();
1077   if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31)
1078     return nullptr;
1079 
1080   // The width of the new add formed is 1 more than the bias.
1081   ++NewWidth;
1082 
1083   // Check to see that CI1 is an all-ones value with NewWidth bits.
1084   if (CI1->getBitWidth() == NewWidth ||
1085       CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
1086     return nullptr;
1087 
1088   // This is only really a signed overflow check if the inputs have been
1089   // sign-extended; check for that condition. For example, if CI2 is 2^31 and
1090   // the operands of the add are 64 bits wide, we need at least 33 sign bits.
1091   if (IC.ComputeMaxSignificantBits(A, 0, &I) > NewWidth ||
1092       IC.ComputeMaxSignificantBits(B, 0, &I) > NewWidth)
1093     return nullptr;
1094 
1095   // In order to replace the original add with a narrower
1096   // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
1097   // and truncates that discard the high bits of the add.  Verify that this is
1098   // the case.
1099   Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
1100   for (User *U : OrigAdd->users()) {
1101     if (U == AddWithCst)
1102       continue;
1103 
1104     // Only accept truncates for now.  We would really like a nice recursive
1105     // predicate like SimplifyDemandedBits, but which goes downwards the use-def
1106     // chain to see which bits of a value are actually demanded.  If the
1107     // original add had another add which was then immediately truncated, we
1108     // could still do the transformation.
1109     TruncInst *TI = dyn_cast<TruncInst>(U);
1110     if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
1111       return nullptr;
1112   }
1113 
1114   // If the pattern matches, truncate the inputs to the narrower type and
1115   // use the sadd_with_overflow intrinsic to efficiently compute both the
1116   // result and the overflow bit.
1117   Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
1118   Function *F = Intrinsic::getDeclaration(
1119       I.getModule(), Intrinsic::sadd_with_overflow, NewType);
1120 
1121   InstCombiner::BuilderTy &Builder = IC.Builder;
1122 
1123   // Put the new code above the original add, in case there are any uses of the
1124   // add between the add and the compare.
1125   Builder.SetInsertPoint(OrigAdd);
1126 
1127   Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc");
1128   Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc");
1129   CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd");
1130   Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result");
1131   Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType());
1132 
1133   // The inner add was the result of the narrow add, zero extended to the
1134   // wider type.  Replace it with the result computed by the intrinsic.
1135   IC.replaceInstUsesWith(*OrigAdd, ZExt);
1136   IC.eraseInstFromFunction(*OrigAdd);
1137 
1138   // The original icmp gets replaced with the overflow value.
1139   return ExtractValueInst::Create(Call, 1, "sadd.overflow");
1140 }
1141 
1142 /// If we have:
1143 ///   icmp eq/ne (urem/srem %x, %y), 0
1144 /// iff %y is a power-of-two, we can replace this with a bit test:
1145 ///   icmp eq/ne (and %x, (add %y, -1)), 0
1146 Instruction *InstCombinerImpl::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) {
1147   // This fold is only valid for equality predicates.
1148   if (!I.isEquality())
1149     return nullptr;
1150   ICmpInst::Predicate Pred;
1151   Value *X, *Y, *Zero;
1152   if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))),
1153                         m_CombineAnd(m_Zero(), m_Value(Zero)))))
1154     return nullptr;
1155   if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I))
1156     return nullptr;
1157   // This may increase instruction count, we don't enforce that Y is a constant.
1158   Value *Mask = Builder.CreateAdd(Y, Constant::getAllOnesValue(Y->getType()));
1159   Value *Masked = Builder.CreateAnd(X, Mask);
1160   return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero);
1161 }
1162 
1163 /// Fold equality-comparison between zero and any (maybe truncated) right-shift
1164 /// by one-less-than-bitwidth into a sign test on the original value.
1165 Instruction *InstCombinerImpl::foldSignBitTest(ICmpInst &I) {
1166   Instruction *Val;
1167   ICmpInst::Predicate Pred;
1168   if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero())))
1169     return nullptr;
1170 
1171   Value *X;
1172   Type *XTy;
1173 
1174   Constant *C;
1175   if (match(Val, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))))) {
1176     XTy = X->getType();
1177     unsigned XBitWidth = XTy->getScalarSizeInBits();
1178     if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,
1179                                      APInt(XBitWidth, XBitWidth - 1))))
1180       return nullptr;
1181   } else if (isa<BinaryOperator>(Val) &&
1182              (X = reassociateShiftAmtsOfTwoSameDirectionShifts(
1183                   cast<BinaryOperator>(Val), SQ.getWithInstruction(Val),
1184                   /*AnalyzeForSignBitExtraction=*/true))) {
1185     XTy = X->getType();
1186   } else
1187     return nullptr;
1188 
1189   return ICmpInst::Create(Instruction::ICmp,
1190                           Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_SGE
1191                                                     : ICmpInst::ICMP_SLT,
1192                           X, ConstantInt::getNullValue(XTy));
1193 }
1194 
1195 // Handle  icmp pred X, 0
1196 Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
1197   CmpInst::Predicate Pred = Cmp.getPredicate();
1198   if (!match(Cmp.getOperand(1), m_Zero()))
1199     return nullptr;
1200 
1201   // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
1202   if (Pred == ICmpInst::ICMP_SGT) {
1203     Value *A, *B;
1204     if (match(Cmp.getOperand(0), m_SMin(m_Value(A), m_Value(B)))) {
1205       if (isKnownPositive(A, SQ.getWithInstruction(&Cmp)))
1206         return new ICmpInst(Pred, B, Cmp.getOperand(1));
1207       if (isKnownPositive(B, SQ.getWithInstruction(&Cmp)))
1208         return new ICmpInst(Pred, A, Cmp.getOperand(1));
1209     }
1210   }
1211 
1212   if (Instruction *New = foldIRemByPowerOfTwoToBitTest(Cmp))
1213     return New;
1214 
1215   // Given:
1216   //   icmp eq/ne (urem %x, %y), 0
1217   // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
1218   //   icmp eq/ne %x, 0
1219   Value *X, *Y;
1220   if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) &&
1221       ICmpInst::isEquality(Pred)) {
1222     KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
1223     KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
1224     if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
1225       return new ICmpInst(Pred, X, Cmp.getOperand(1));
1226   }
1227 
1228   // (icmp eq/ne (mul X Y)) -> (icmp eq/ne X/Y) if we know about whether X/Y are
1229   // odd/non-zero/there is no overflow.
1230   if (match(Cmp.getOperand(0), m_Mul(m_Value(X), m_Value(Y))) &&
1231       ICmpInst::isEquality(Pred)) {
1232 
1233     KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
1234     // if X % 2 != 0
1235     //    (icmp eq/ne Y)
1236     if (XKnown.countMaxTrailingZeros() == 0)
1237       return new ICmpInst(Pred, Y, Cmp.getOperand(1));
1238 
1239     KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
1240     // if Y % 2 != 0
1241     //    (icmp eq/ne X)
1242     if (YKnown.countMaxTrailingZeros() == 0)
1243       return new ICmpInst(Pred, X, Cmp.getOperand(1));
1244 
1245     auto *BO0 = cast<OverflowingBinaryOperator>(Cmp.getOperand(0));
1246     if (BO0->hasNoUnsignedWrap() || BO0->hasNoSignedWrap()) {
1247       const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
1248       // `isKnownNonZero` does more analysis than just `!KnownBits.One.isZero()`
1249       // but to avoid unnecessary work, first just if this is an obvious case.
1250 
1251       // if X non-zero and NoOverflow(X * Y)
1252       //    (icmp eq/ne Y)
1253       if (!XKnown.One.isZero() || isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT))
1254         return new ICmpInst(Pred, Y, Cmp.getOperand(1));
1255 
1256       // if Y non-zero and NoOverflow(X * Y)
1257       //    (icmp eq/ne X)
1258       if (!YKnown.One.isZero() || isKnownNonZero(Y, DL, 0, Q.AC, Q.CxtI, Q.DT))
1259         return new ICmpInst(Pred, X, Cmp.getOperand(1));
1260     }
1261     // Note, we are skipping cases:
1262     //      if Y % 2 != 0 AND X % 2 != 0
1263     //          (false/true)
1264     //      if X non-zero and Y non-zero and NoOverflow(X * Y)
1265     //          (false/true)
1266     // Those can be simplified later as we would have already replaced the (icmp
1267     // eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that
1268     // will fold to a constant elsewhere.
1269   }
1270   return nullptr;
1271 }
1272 
1273 /// Fold icmp Pred X, C.
1274 /// TODO: This code structure does not make sense. The saturating add fold
1275 /// should be moved to some other helper and extended as noted below (it is also
1276 /// possible that code has been made unnecessary - do we canonicalize IR to
1277 /// overflow/saturating intrinsics or not?).
1278 Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) {
1279   // Match the following pattern, which is a common idiom when writing
1280   // overflow-safe integer arithmetic functions. The source performs an addition
1281   // in wider type and explicitly checks for overflow using comparisons against
1282   // INT_MIN and INT_MAX. Simplify by using the sadd_with_overflow intrinsic.
1283   //
1284   // TODO: This could probably be generalized to handle other overflow-safe
1285   // operations if we worked out the formulas to compute the appropriate magic
1286   // constants.
1287   //
1288   // sum = a + b
1289   // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
1290   CmpInst::Predicate Pred = Cmp.getPredicate();
1291   Value *Op0 = Cmp.getOperand(0), *Op1 = Cmp.getOperand(1);
1292   Value *A, *B;
1293   ConstantInt *CI, *CI2; // I = icmp ugt (add (add A, B), CI2), CI
1294   if (Pred == ICmpInst::ICMP_UGT && match(Op1, m_ConstantInt(CI)) &&
1295       match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
1296     if (Instruction *Res = processUGT_ADDCST_ADD(Cmp, A, B, CI2, CI, *this))
1297       return Res;
1298 
1299   // icmp(phi(C1, C2, ...), C) -> phi(icmp(C1, C), icmp(C2, C), ...).
1300   Constant *C = dyn_cast<Constant>(Op1);
1301   if (!C)
1302     return nullptr;
1303 
1304   if (auto *Phi = dyn_cast<PHINode>(Op0))
1305     if (all_of(Phi->operands(), [](Value *V) { return isa<Constant>(V); })) {
1306       SmallVector<Constant *> Ops;
1307       for (Value *V : Phi->incoming_values()) {
1308         Constant *Res =
1309             ConstantFoldCompareInstOperands(Pred, cast<Constant>(V), C, DL);
1310         if (!Res)
1311           return nullptr;
1312         Ops.push_back(Res);
1313       }
1314       Builder.SetInsertPoint(Phi);
1315       PHINode *NewPhi = Builder.CreatePHI(Cmp.getType(), Phi->getNumOperands());
1316       for (auto [V, Pred] : zip(Ops, Phi->blocks()))
1317         NewPhi->addIncoming(V, Pred);
1318       return replaceInstUsesWith(Cmp, NewPhi);
1319     }
1320 
1321   return nullptr;
1322 }
1323 
1324 /// Canonicalize icmp instructions based on dominating conditions.
1325 Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
1326   // We already checked simple implication in InstSimplify, only handle complex
1327   // cases here.
1328   Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1);
1329   ICmpInst::Predicate DomPred;
1330   const APInt *C;
1331   if (!match(Y, m_APInt(C)))
1332     return nullptr;
1333 
1334   CmpInst::Predicate Pred = Cmp.getPredicate();
1335   ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, *C);
1336 
1337   auto handleDomCond = [&](Value *DomCond, bool CondIsTrue) -> Instruction * {
1338     const APInt *DomC;
1339     if (!match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))))
1340       return nullptr;
1341     // We have 2 compares of a variable with constants. Calculate the constant
1342     // ranges of those compares to see if we can transform the 2nd compare:
1343     // DomBB:
1344     //   DomCond = icmp DomPred X, DomC
1345     //   br DomCond, CmpBB, FalseBB
1346     // CmpBB:
1347     //   Cmp = icmp Pred X, C
1348     if (!CondIsTrue)
1349       DomPred = CmpInst::getInversePredicate(DomPred);
1350     ConstantRange DominatingCR =
1351         ConstantRange::makeExactICmpRegion(DomPred, *DomC);
1352     ConstantRange Intersection = DominatingCR.intersectWith(CR);
1353     ConstantRange Difference = DominatingCR.difference(CR);
1354     if (Intersection.isEmptySet())
1355       return replaceInstUsesWith(Cmp, Builder.getFalse());
1356     if (Difference.isEmptySet())
1357       return replaceInstUsesWith(Cmp, Builder.getTrue());
1358 
1359     // Canonicalizing a sign bit comparison that gets used in a branch,
1360     // pessimizes codegen by generating branch on zero instruction instead
1361     // of a test and branch. So we avoid canonicalizing in such situations
1362     // because test and branch instruction has better branch displacement
1363     // than compare and branch instruction.
1364     bool UnusedBit;
1365     bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit);
1366     if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
1367       return nullptr;
1368 
1369     // Avoid an infinite loop with min/max canonicalization.
1370     // TODO: This will be unnecessary if we canonicalize to min/max intrinsics.
1371     if (Cmp.hasOneUse() &&
1372         match(Cmp.user_back(), m_MaxOrMin(m_Value(), m_Value())))
1373       return nullptr;
1374 
1375     if (const APInt *EqC = Intersection.getSingleElement())
1376       return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC));
1377     if (const APInt *NeC = Difference.getSingleElement())
1378       return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC));
1379     return nullptr;
1380   };
1381 
1382   for (BranchInst *BI : DC.conditionsFor(X)) {
1383     auto *Cond = BI->getCondition();
1384     BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
1385     if (DT.dominates(Edge0, Cmp.getParent())) {
1386       if (auto *V = handleDomCond(Cond, true))
1387         return V;
1388     } else {
1389       BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
1390       if (DT.dominates(Edge1, Cmp.getParent()))
1391         if (auto *V = handleDomCond(Cond, false))
1392           return V;
1393     }
1394   }
1395 
1396   return nullptr;
1397 }
1398 
1399 /// Fold icmp (trunc X), C.
1400 Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
1401                                                      TruncInst *Trunc,
1402                                                      const APInt &C) {
1403   ICmpInst::Predicate Pred = Cmp.getPredicate();
1404   Value *X = Trunc->getOperand(0);
1405   if (C.isOne() && C.getBitWidth() > 1) {
1406     // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
1407     Value *V = nullptr;
1408     if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
1409       return new ICmpInst(ICmpInst::ICMP_SLT, V,
1410                           ConstantInt::get(V->getType(), 1));
1411   }
1412 
1413   Type *SrcTy = X->getType();
1414   unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
1415            SrcBits = SrcTy->getScalarSizeInBits();
1416 
1417   // TODO: Handle any shifted constant by subtracting trailing zeros.
1418   // TODO: Handle non-equality predicates.
1419   Value *Y;
1420   if (Cmp.isEquality() && match(X, m_Shl(m_One(), m_Value(Y)))) {
1421     // (trunc (1 << Y) to iN) == 0 --> Y u>= N
1422     // (trunc (1 << Y) to iN) != 0 --> Y u<  N
1423     if (C.isZero()) {
1424       auto NewPred = (Pred == Cmp.ICMP_EQ) ? Cmp.ICMP_UGE : Cmp.ICMP_ULT;
1425       return new ICmpInst(NewPred, Y, ConstantInt::get(SrcTy, DstBits));
1426     }
1427     // (trunc (1 << Y) to iN) == 2**C --> Y == C
1428     // (trunc (1 << Y) to iN) != 2**C --> Y != C
1429     if (C.isPowerOf2())
1430       return new ICmpInst(Pred, Y, ConstantInt::get(SrcTy, C.logBase2()));
1431   }
1432 
1433   if (Cmp.isEquality() && Trunc->hasOneUse()) {
1434     // Canonicalize to a mask and wider compare if the wide type is suitable:
1435     // (trunc X to i8) == C --> (X & 0xff) == (zext C)
1436     if (!SrcTy->isVectorTy() && shouldChangeType(DstBits, SrcBits)) {
1437       Constant *Mask =
1438           ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcBits, DstBits));
1439       Value *And = Builder.CreateAnd(X, Mask);
1440       Constant *WideC = ConstantInt::get(SrcTy, C.zext(SrcBits));
1441       return new ICmpInst(Pred, And, WideC);
1442     }
1443 
1444     // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
1445     // of the high bits truncated out of x are known.
1446     KnownBits Known = computeKnownBits(X, 0, &Cmp);
1447 
1448     // If all the high bits are known, we can do this xform.
1449     if ((Known.Zero | Known.One).countl_one() >= SrcBits - DstBits) {
1450       // Pull in the high bits from known-ones set.
1451       APInt NewRHS = C.zext(SrcBits);
1452       NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
1453       return new ICmpInst(Pred, X, ConstantInt::get(SrcTy, NewRHS));
1454     }
1455   }
1456 
1457   // Look through truncated right-shift of the sign-bit for a sign-bit check:
1458   // trunc iN (ShOp >> ShAmtC) to i[N - ShAmtC] < 0  --> ShOp <  0
1459   // trunc iN (ShOp >> ShAmtC) to i[N - ShAmtC] > -1 --> ShOp > -1
1460   Value *ShOp;
1461   const APInt *ShAmtC;
1462   bool TrueIfSigned;
1463   if (isSignBitCheck(Pred, C, TrueIfSigned) &&
1464       match(X, m_Shr(m_Value(ShOp), m_APInt(ShAmtC))) &&
1465       DstBits == SrcBits - ShAmtC->getZExtValue()) {
1466     return TrueIfSigned ? new ICmpInst(ICmpInst::ICMP_SLT, ShOp,
1467                                        ConstantInt::getNullValue(SrcTy))
1468                         : new ICmpInst(ICmpInst::ICMP_SGT, ShOp,
1469                                        ConstantInt::getAllOnesValue(SrcTy));
1470   }
1471 
1472   return nullptr;
1473 }
1474 
1475 /// Fold icmp (trunc X), (trunc Y).
1476 /// Fold icmp (trunc X), (zext Y).
1477 Instruction *
1478 InstCombinerImpl::foldICmpTruncWithTruncOrExt(ICmpInst &Cmp,
1479                                               const SimplifyQuery &Q) {
1480   if (Cmp.isSigned())
1481     return nullptr;
1482 
1483   Value *X, *Y;
1484   ICmpInst::Predicate Pred;
1485   bool YIsZext = false;
1486   // Try to match icmp (trunc X), (trunc Y)
1487   if (match(&Cmp, m_ICmp(Pred, m_Trunc(m_Value(X)), m_Trunc(m_Value(Y))))) {
1488     if (X->getType() != Y->getType() &&
1489         (!Cmp.getOperand(0)->hasOneUse() || !Cmp.getOperand(1)->hasOneUse()))
1490       return nullptr;
1491     if (!isDesirableIntType(X->getType()->getScalarSizeInBits()) &&
1492         isDesirableIntType(Y->getType()->getScalarSizeInBits())) {
1493       std::swap(X, Y);
1494       Pred = Cmp.getSwappedPredicate(Pred);
1495     }
1496   }
1497   // Try to match icmp (trunc X), (zext Y)
1498   else if (match(&Cmp, m_c_ICmp(Pred, m_Trunc(m_Value(X)),
1499                                 m_OneUse(m_ZExt(m_Value(Y))))))
1500 
1501     YIsZext = true;
1502   else
1503     return nullptr;
1504 
1505   Type *TruncTy = Cmp.getOperand(0)->getType();
1506   unsigned TruncBits = TruncTy->getScalarSizeInBits();
1507 
1508   // If this transform will end up changing from desirable types -> undesirable
1509   // types skip it.
1510   if (isDesirableIntType(TruncBits) &&
1511       !isDesirableIntType(X->getType()->getScalarSizeInBits()))
1512     return nullptr;
1513 
1514   // Check if the trunc is unneeded.
1515   KnownBits KnownX = llvm::computeKnownBits(X, /*Depth*/ 0, Q);
1516   if (KnownX.countMaxActiveBits() > TruncBits)
1517     return nullptr;
1518 
1519   if (!YIsZext) {
1520     // If Y is also a trunc, make sure it is unneeded.
1521     KnownBits KnownY = llvm::computeKnownBits(Y, /*Depth*/ 0, Q);
1522     if (KnownY.countMaxActiveBits() > TruncBits)
1523       return nullptr;
1524   }
1525 
1526   Value *NewY = Builder.CreateZExtOrTrunc(Y, X->getType());
1527   return new ICmpInst(Pred, X, NewY);
1528 }
1529 
1530 /// Fold icmp (xor X, Y), C.
1531 Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp,
1532                                                    BinaryOperator *Xor,
1533                                                    const APInt &C) {
1534   if (Instruction *I = foldICmpXorShiftConst(Cmp, Xor, C))
1535     return I;
1536 
1537   Value *X = Xor->getOperand(0);
1538   Value *Y = Xor->getOperand(1);
1539   const APInt *XorC;
1540   if (!match(Y, m_APInt(XorC)))
1541     return nullptr;
1542 
1543   // If this is a comparison that tests the signbit (X < 0) or (x > -1),
1544   // fold the xor.
1545   ICmpInst::Predicate Pred = Cmp.getPredicate();
1546   bool TrueIfSigned = false;
1547   if (isSignBitCheck(Cmp.getPredicate(), C, TrueIfSigned)) {
1548 
1549     // If the sign bit of the XorCst is not set, there is no change to
1550     // the operation, just stop using the Xor.
1551     if (!XorC->isNegative())
1552       return replaceOperand(Cmp, 0, X);
1553 
1554     // Emit the opposite comparison.
1555     if (TrueIfSigned)
1556       return new ICmpInst(ICmpInst::ICMP_SGT, X,
1557                           ConstantInt::getAllOnesValue(X->getType()));
1558     else
1559       return new ICmpInst(ICmpInst::ICMP_SLT, X,
1560                           ConstantInt::getNullValue(X->getType()));
1561   }
1562 
1563   if (Xor->hasOneUse()) {
1564     // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
1565     if (!Cmp.isEquality() && XorC->isSignMask()) {
1566       Pred = Cmp.getFlippedSignednessPredicate();
1567       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1568     }
1569 
1570     // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
1571     if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
1572       Pred = Cmp.getFlippedSignednessPredicate();
1573       Pred = Cmp.getSwappedPredicate(Pred);
1574       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1575     }
1576   }
1577 
1578   // Mask constant magic can eliminate an 'xor' with unsigned compares.
1579   if (Pred == ICmpInst::ICMP_UGT) {
1580     // (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2)
1581     if (*XorC == ~C && (C + 1).isPowerOf2())
1582       return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
1583     // (xor X, C) >u C --> X >u C (when C+1 is a power of 2)
1584     if (*XorC == C && (C + 1).isPowerOf2())
1585       return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
1586   }
1587   if (Pred == ICmpInst::ICMP_ULT) {
1588     // (xor X, -C) <u C --> X >u ~C (when C is a power of 2)
1589     if (*XorC == -C && C.isPowerOf2())
1590       return new ICmpInst(ICmpInst::ICMP_UGT, X,
1591                           ConstantInt::get(X->getType(), ~C));
1592     // (xor X, C) <u C --> X >u ~C (when -C is a power of 2)
1593     if (*XorC == C && (-C).isPowerOf2())
1594       return new ICmpInst(ICmpInst::ICMP_UGT, X,
1595                           ConstantInt::get(X->getType(), ~C));
1596   }
1597   return nullptr;
1598 }
1599 
1600 /// For power-of-2 C:
1601 /// ((X s>> ShiftC) ^ X) u< C --> (X + C) u< (C << 1)
1602 /// ((X s>> ShiftC) ^ X) u> (C - 1) --> (X + C) u> ((C << 1) - 1)
1603 Instruction *InstCombinerImpl::foldICmpXorShiftConst(ICmpInst &Cmp,
1604                                                      BinaryOperator *Xor,
1605                                                      const APInt &C) {
1606   CmpInst::Predicate Pred = Cmp.getPredicate();
1607   APInt PowerOf2;
1608   if (Pred == ICmpInst::ICMP_ULT)
1609     PowerOf2 = C;
1610   else if (Pred == ICmpInst::ICMP_UGT && !C.isMaxValue())
1611     PowerOf2 = C + 1;
1612   else
1613     return nullptr;
1614   if (!PowerOf2.isPowerOf2())
1615     return nullptr;
1616   Value *X;
1617   const APInt *ShiftC;
1618   if (!match(Xor, m_OneUse(m_c_Xor(m_Value(X),
1619                                    m_AShr(m_Deferred(X), m_APInt(ShiftC))))))
1620     return nullptr;
1621   uint64_t Shift = ShiftC->getLimitedValue();
1622   Type *XType = X->getType();
1623   if (Shift == 0 || PowerOf2.isMinSignedValue())
1624     return nullptr;
1625   Value *Add = Builder.CreateAdd(X, ConstantInt::get(XType, PowerOf2));
1626   APInt Bound =
1627       Pred == ICmpInst::ICMP_ULT ? PowerOf2 << 1 : ((PowerOf2 << 1) - 1);
1628   return new ICmpInst(Pred, Add, ConstantInt::get(XType, Bound));
1629 }
1630 
1631 /// Fold icmp (and (sh X, Y), C2), C1.
1632 Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp,
1633                                                 BinaryOperator *And,
1634                                                 const APInt &C1,
1635                                                 const APInt &C2) {
1636   BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0));
1637   if (!Shift || !Shift->isShift())
1638     return nullptr;
1639 
1640   // If this is: (X >> C3) & C2 != C1 (where any shift and any compare could
1641   // exist), turn it into (X & (C2 << C3)) != (C1 << C3). This happens a LOT in
1642   // code produced by the clang front-end, for bitfield access.
1643   // This seemingly simple opportunity to fold away a shift turns out to be
1644   // rather complicated. See PR17827 for details.
1645   unsigned ShiftOpcode = Shift->getOpcode();
1646   bool IsShl = ShiftOpcode == Instruction::Shl;
1647   const APInt *C3;
1648   if (match(Shift->getOperand(1), m_APInt(C3))) {
1649     APInt NewAndCst, NewCmpCst;
1650     bool AnyCmpCstBitsShiftedOut;
1651     if (ShiftOpcode == Instruction::Shl) {
1652       // For a left shift, we can fold if the comparison is not signed. We can
1653       // also fold a signed comparison if the mask value and comparison value
1654       // are not negative. These constraints may not be obvious, but we can
1655       // prove that they are correct using an SMT solver.
1656       if (Cmp.isSigned() && (C2.isNegative() || C1.isNegative()))
1657         return nullptr;
1658 
1659       NewCmpCst = C1.lshr(*C3);
1660       NewAndCst = C2.lshr(*C3);
1661       AnyCmpCstBitsShiftedOut = NewCmpCst.shl(*C3) != C1;
1662     } else if (ShiftOpcode == Instruction::LShr) {
1663       // For a logical right shift, we can fold if the comparison is not signed.
1664       // We can also fold a signed comparison if the shifted mask value and the
1665       // shifted comparison value are not negative. These constraints may not be
1666       // obvious, but we can prove that they are correct using an SMT solver.
1667       NewCmpCst = C1.shl(*C3);
1668       NewAndCst = C2.shl(*C3);
1669       AnyCmpCstBitsShiftedOut = NewCmpCst.lshr(*C3) != C1;
1670       if (Cmp.isSigned() && (NewAndCst.isNegative() || NewCmpCst.isNegative()))
1671         return nullptr;
1672     } else {
1673       // For an arithmetic shift, check that both constants don't use (in a
1674       // signed sense) the top bits being shifted out.
1675       assert(ShiftOpcode == Instruction::AShr && "Unknown shift opcode");
1676       NewCmpCst = C1.shl(*C3);
1677       NewAndCst = C2.shl(*C3);
1678       AnyCmpCstBitsShiftedOut = NewCmpCst.ashr(*C3) != C1;
1679       if (NewAndCst.ashr(*C3) != C2)
1680         return nullptr;
1681     }
1682 
1683     if (AnyCmpCstBitsShiftedOut) {
1684       // If we shifted bits out, the fold is not going to work out. As a
1685       // special case, check to see if this means that the result is always
1686       // true or false now.
1687       if (Cmp.getPredicate() == ICmpInst::ICMP_EQ)
1688         return replaceInstUsesWith(Cmp, ConstantInt::getFalse(Cmp.getType()));
1689       if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
1690         return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType()));
1691     } else {
1692       Value *NewAnd = Builder.CreateAnd(
1693           Shift->getOperand(0), ConstantInt::get(And->getType(), NewAndCst));
1694       return new ICmpInst(Cmp.getPredicate(),
1695           NewAnd, ConstantInt::get(And->getType(), NewCmpCst));
1696     }
1697   }
1698 
1699   // Turn ((X >> Y) & C2) == 0  into  (X & (C2 << Y)) == 0.  The latter is
1700   // preferable because it allows the C2 << Y expression to be hoisted out of a
1701   // loop if Y is invariant and X is not.
1702   if (Shift->hasOneUse() && C1.isZero() && Cmp.isEquality() &&
1703       !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
1704     // Compute C2 << Y.
1705     Value *NewShift =
1706         IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1))
1707               : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1));
1708 
1709     // Compute X & (C2 << Y).
1710     Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift);
1711     return replaceOperand(Cmp, 0, NewAnd);
1712   }
1713 
1714   return nullptr;
1715 }
1716 
1717 /// Fold icmp (and X, C2), C1.
1718 Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
1719                                                      BinaryOperator *And,
1720                                                      const APInt &C1) {
1721   bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE;
1722 
1723   // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
1724   // TODO: We canonicalize to the longer form for scalars because we have
1725   // better analysis/folds for icmp, and codegen may be better with icmp.
1726   if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isZero() &&
1727       match(And->getOperand(1), m_One()))
1728     return new TruncInst(And->getOperand(0), Cmp.getType());
1729 
1730   const APInt *C2;
1731   Value *X;
1732   if (!match(And, m_And(m_Value(X), m_APInt(C2))))
1733     return nullptr;
1734 
1735   // Don't perform the following transforms if the AND has multiple uses
1736   if (!And->hasOneUse())
1737     return nullptr;
1738 
1739   if (Cmp.isEquality() && C1.isZero()) {
1740     // Restrict this fold to single-use 'and' (PR10267).
1741     // Replace (and X, (1 << size(X)-1) != 0) with X s< 0
1742     if (C2->isSignMask()) {
1743       Constant *Zero = Constant::getNullValue(X->getType());
1744       auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
1745       return new ICmpInst(NewPred, X, Zero);
1746     }
1747 
1748     APInt NewC2 = *C2;
1749     KnownBits Know = computeKnownBits(And->getOperand(0), 0, And);
1750     // Set high zeros of C2 to allow matching negated power-of-2.
1751     NewC2 = *C2 | APInt::getHighBitsSet(C2->getBitWidth(),
1752                                         Know.countMinLeadingZeros());
1753 
1754     // Restrict this fold only for single-use 'and' (PR10267).
1755     // ((%x & C) == 0) --> %x u< (-C)  iff (-C) is power of two.
1756     if (NewC2.isNegatedPowerOf2()) {
1757       Constant *NegBOC = ConstantInt::get(And->getType(), -NewC2);
1758       auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
1759       return new ICmpInst(NewPred, X, NegBOC);
1760     }
1761   }
1762 
1763   // If the LHS is an 'and' of a truncate and we can widen the and/compare to
1764   // the input width without changing the value produced, eliminate the cast:
1765   //
1766   // icmp (and (trunc W), C2), C1 -> icmp (and W, C2'), C1'
1767   //
1768   // We can do this transformation if the constants do not have their sign bits
1769   // set or if it is an equality comparison. Extending a relational comparison
1770   // when we're checking the sign bit would not work.
1771   Value *W;
1772   if (match(And->getOperand(0), m_OneUse(m_Trunc(m_Value(W)))) &&
1773       (Cmp.isEquality() || (!C1.isNegative() && !C2->isNegative()))) {
1774     // TODO: Is this a good transform for vectors? Wider types may reduce
1775     // throughput. Should this transform be limited (even for scalars) by using
1776     // shouldChangeType()?
1777     if (!Cmp.getType()->isVectorTy()) {
1778       Type *WideType = W->getType();
1779       unsigned WideScalarBits = WideType->getScalarSizeInBits();
1780       Constant *ZextC1 = ConstantInt::get(WideType, C1.zext(WideScalarBits));
1781       Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits));
1782       Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName());
1783       return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1);
1784     }
1785   }
1786 
1787   if (Instruction *I = foldICmpAndShift(Cmp, And, C1, *C2))
1788     return I;
1789 
1790   // (icmp pred (and (or (lshr A, B), A), 1), 0) -->
1791   // (icmp pred (and A, (or (shl 1, B), 1), 0))
1792   //
1793   // iff pred isn't signed
1794   if (!Cmp.isSigned() && C1.isZero() && And->getOperand(0)->hasOneUse() &&
1795       match(And->getOperand(1), m_One())) {
1796     Constant *One = cast<Constant>(And->getOperand(1));
1797     Value *Or = And->getOperand(0);
1798     Value *A, *B, *LShr;
1799     if (match(Or, m_Or(m_Value(LShr), m_Value(A))) &&
1800         match(LShr, m_LShr(m_Specific(A), m_Value(B)))) {
1801       unsigned UsesRemoved = 0;
1802       if (And->hasOneUse())
1803         ++UsesRemoved;
1804       if (Or->hasOneUse())
1805         ++UsesRemoved;
1806       if (LShr->hasOneUse())
1807         ++UsesRemoved;
1808 
1809       // Compute A & ((1 << B) | 1)
1810       unsigned RequireUsesRemoved = match(B, m_ImmConstant()) ? 1 : 3;
1811       if (UsesRemoved >= RequireUsesRemoved) {
1812         Value *NewOr =
1813             Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
1814                                                /*HasNUW=*/true),
1815                              One, Or->getName());
1816         Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName());
1817         return replaceOperand(Cmp, 0, NewAnd);
1818       }
1819     }
1820   }
1821 
1822   return nullptr;
1823 }
1824 
1825 /// Fold icmp (and X, Y), C.
1826 Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
1827                                                    BinaryOperator *And,
1828                                                    const APInt &C) {
1829   if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
1830     return I;
1831 
1832   const ICmpInst::Predicate Pred = Cmp.getPredicate();
1833   bool TrueIfNeg;
1834   if (isSignBitCheck(Pred, C, TrueIfNeg)) {
1835     // ((X - 1) & ~X) <  0 --> X == 0
1836     // ((X - 1) & ~X) >= 0 --> X != 0
1837     Value *X;
1838     if (match(And->getOperand(0), m_Add(m_Value(X), m_AllOnes())) &&
1839         match(And->getOperand(1), m_Not(m_Specific(X)))) {
1840       auto NewPred = TrueIfNeg ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;
1841       return new ICmpInst(NewPred, X, ConstantInt::getNullValue(X->getType()));
1842     }
1843     // (X & X) <  0 --> X == MinSignedC
1844     // (X & X) > -1 --> X != MinSignedC
1845     if (match(And, m_c_And(m_Neg(m_Value(X)), m_Deferred(X)))) {
1846       Constant *MinSignedC = ConstantInt::get(
1847           X->getType(),
1848           APInt::getSignedMinValue(X->getType()->getScalarSizeInBits()));
1849       auto NewPred = TrueIfNeg ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;
1850       return new ICmpInst(NewPred, X, MinSignedC);
1851     }
1852   }
1853 
1854   // TODO: These all require that Y is constant too, so refactor with the above.
1855 
1856   // Try to optimize things like "A[i] & 42 == 0" to index computations.
1857   Value *X = And->getOperand(0);
1858   Value *Y = And->getOperand(1);
1859   if (auto *C2 = dyn_cast<ConstantInt>(Y))
1860     if (auto *LI = dyn_cast<LoadInst>(X))
1861       if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
1862         if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
1863           if (Instruction *Res =
1864                   foldCmpLoadFromIndexedGlobal(LI, GEP, GV, Cmp, C2))
1865             return Res;
1866 
1867   if (!Cmp.isEquality())
1868     return nullptr;
1869 
1870   // X & -C == -C -> X >  u ~C
1871   // X & -C != -C -> X <= u ~C
1872   //   iff C is a power of 2
1873   if (Cmp.getOperand(1) == Y && C.isNegatedPowerOf2()) {
1874     auto NewPred =
1875         Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
1876     return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
1877   }
1878 
1879   // If we are testing the intersection of 2 select-of-nonzero-constants with no
1880   // common bits set, it's the same as checking if exactly one select condition
1881   // is set:
1882   // ((A ? TC : FC) & (B ? TC : FC)) == 0 --> xor A, B
1883   // ((A ? TC : FC) & (B ? TC : FC)) != 0 --> not(xor A, B)
1884   // TODO: Generalize for non-constant values.
1885   // TODO: Handle signed/unsigned predicates.
1886   // TODO: Handle other bitwise logic connectors.
1887   // TODO: Extend to handle a non-zero compare constant.
1888   if (C.isZero() && (Pred == CmpInst::ICMP_EQ || And->hasOneUse())) {
1889     assert(Cmp.isEquality() && "Not expecting non-equality predicates");
1890     Value *A, *B;
1891     const APInt *TC, *FC;
1892     if (match(X, m_Select(m_Value(A), m_APInt(TC), m_APInt(FC))) &&
1893         match(Y,
1894               m_Select(m_Value(B), m_SpecificInt(*TC), m_SpecificInt(*FC))) &&
1895         !TC->isZero() && !FC->isZero() && !TC->intersects(*FC)) {
1896       Value *R = Builder.CreateXor(A, B);
1897       if (Pred == CmpInst::ICMP_NE)
1898         R = Builder.CreateNot(R);
1899       return replaceInstUsesWith(Cmp, R);
1900     }
1901   }
1902 
1903   // ((zext i1 X) & Y) == 0 --> !((trunc Y) & X)
1904   // ((zext i1 X) & Y) != 0 -->  ((trunc Y) & X)
1905   // ((zext i1 X) & Y) == 1 -->  ((trunc Y) & X)
1906   // ((zext i1 X) & Y) != 1 --> !((trunc Y) & X)
1907   if (match(And, m_OneUse(m_c_And(m_OneUse(m_ZExt(m_Value(X))), m_Value(Y)))) &&
1908       X->getType()->isIntOrIntVectorTy(1) && (C.isZero() || C.isOne())) {
1909     Value *TruncY = Builder.CreateTrunc(Y, X->getType());
1910     if (C.isZero() ^ (Pred == CmpInst::ICMP_NE)) {
1911       Value *And = Builder.CreateAnd(TruncY, X);
1912       return BinaryOperator::CreateNot(And);
1913     }
1914     return BinaryOperator::CreateAnd(TruncY, X);
1915   }
1916 
1917   return nullptr;
1918 }
1919 
1920 /// Fold icmp eq/ne (or (xor/sub (X1, X2), xor/sub (X3, X4))), 0.
1921 static Value *foldICmpOrXorSubChain(ICmpInst &Cmp, BinaryOperator *Or,
1922                                     InstCombiner::BuilderTy &Builder) {
1923   // Are we using xors or subs to bitwise check for a pair or pairs of
1924   // (in)equalities? Convert to a shorter form that has more potential to be
1925   // folded even further.
1926   // ((X1 ^/- X2) || (X3 ^/- X4)) == 0 --> (X1 == X2) && (X3 == X4)
1927   // ((X1 ^/- X2) || (X3 ^/- X4)) != 0 --> (X1 != X2) || (X3 != X4)
1928   // ((X1 ^/- X2) || (X3 ^/- X4) || (X5 ^/- X6)) == 0 -->
1929   // (X1 == X2) && (X3 == X4) && (X5 == X6)
1930   // ((X1 ^/- X2) || (X3 ^/- X4) || (X5 ^/- X6)) != 0 -->
1931   // (X1 != X2) || (X3 != X4) || (X5 != X6)
1932   SmallVector<std::pair<Value *, Value *>, 2> CmpValues;
1933   SmallVector<Value *, 16> WorkList(1, Or);
1934 
1935   while (!WorkList.empty()) {
1936     auto MatchOrOperatorArgument = [&](Value *OrOperatorArgument) {
1937       Value *Lhs, *Rhs;
1938 
1939       if (match(OrOperatorArgument,
1940                 m_OneUse(m_Xor(m_Value(Lhs), m_Value(Rhs))))) {
1941         CmpValues.emplace_back(Lhs, Rhs);
1942         return;
1943       }
1944 
1945       if (match(OrOperatorArgument,
1946                 m_OneUse(m_Sub(m_Value(Lhs), m_Value(Rhs))))) {
1947         CmpValues.emplace_back(Lhs, Rhs);
1948         return;
1949       }
1950 
1951       WorkList.push_back(OrOperatorArgument);
1952     };
1953 
1954     Value *CurrentValue = WorkList.pop_back_val();
1955     Value *OrOperatorLhs, *OrOperatorRhs;
1956 
1957     if (!match(CurrentValue,
1958                m_Or(m_Value(OrOperatorLhs), m_Value(OrOperatorRhs)))) {
1959       return nullptr;
1960     }
1961 
1962     MatchOrOperatorArgument(OrOperatorRhs);
1963     MatchOrOperatorArgument(OrOperatorLhs);
1964   }
1965 
1966   ICmpInst::Predicate Pred = Cmp.getPredicate();
1967   auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
1968   Value *LhsCmp = Builder.CreateICmp(Pred, CmpValues.rbegin()->first,
1969                                      CmpValues.rbegin()->second);
1970 
1971   for (auto It = CmpValues.rbegin() + 1; It != CmpValues.rend(); ++It) {
1972     Value *RhsCmp = Builder.CreateICmp(Pred, It->first, It->second);
1973     LhsCmp = Builder.CreateBinOp(BOpc, LhsCmp, RhsCmp);
1974   }
1975 
1976   return LhsCmp;
1977 }
1978 
1979 /// Fold icmp (or X, Y), C.
1980 Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
1981                                                   BinaryOperator *Or,
1982                                                   const APInt &C) {
1983   ICmpInst::Predicate Pred = Cmp.getPredicate();
1984   if (C.isOne()) {
1985     // icmp slt signum(V) 1 --> icmp slt V, 1
1986     Value *V = nullptr;
1987     if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
1988       return new ICmpInst(ICmpInst::ICMP_SLT, V,
1989                           ConstantInt::get(V->getType(), 1));
1990   }
1991 
1992   Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1);
1993   const APInt *MaskC;
1994   if (match(OrOp1, m_APInt(MaskC)) && Cmp.isEquality()) {
1995     if (*MaskC == C && (C + 1).isPowerOf2()) {
1996       // X | C == C --> X <=u C
1997       // X | C != C --> X  >u C
1998       //   iff C+1 is a power of 2 (C is a bitmask of the low bits)
1999       Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
2000       return new ICmpInst(Pred, OrOp0, OrOp1);
2001     }
2002 
2003     // More general: canonicalize 'equality with set bits mask' to
2004     // 'equality with clear bits mask'.
2005     // (X | MaskC) == C --> (X & ~MaskC) == C ^ MaskC
2006     // (X | MaskC) != C --> (X & ~MaskC) != C ^ MaskC
2007     if (Or->hasOneUse()) {
2008       Value *And = Builder.CreateAnd(OrOp0, ~(*MaskC));
2009       Constant *NewC = ConstantInt::get(Or->getType(), C ^ (*MaskC));
2010       return new ICmpInst(Pred, And, NewC);
2011     }
2012   }
2013 
2014   // (X | (X-1)) s<  0 --> X s< 1
2015   // (X | (X-1)) s> -1 --> X s> 0
2016   Value *X;
2017   bool TrueIfSigned;
2018   if (isSignBitCheck(Pred, C, TrueIfSigned) &&
2019       match(Or, m_c_Or(m_Add(m_Value(X), m_AllOnes()), m_Deferred(X)))) {
2020     auto NewPred = TrueIfSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGT;
2021     Constant *NewC = ConstantInt::get(X->getType(), TrueIfSigned ? 1 : 0);
2022     return new ICmpInst(NewPred, X, NewC);
2023   }
2024 
2025   const APInt *OrC;
2026   // icmp(X | OrC, C) --> icmp(X, 0)
2027   if (C.isNonNegative() && match(Or, m_Or(m_Value(X), m_APInt(OrC)))) {
2028     switch (Pred) {
2029     // X | OrC s< C --> X s< 0 iff OrC s>= C s>= 0
2030     case ICmpInst::ICMP_SLT:
2031     // X | OrC s>= C --> X s>= 0 iff OrC s>= C s>= 0
2032     case ICmpInst::ICMP_SGE:
2033       if (OrC->sge(C))
2034         return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
2035       break;
2036     // X | OrC s<= C --> X s< 0 iff OrC s> C s>= 0
2037     case ICmpInst::ICMP_SLE:
2038     // X | OrC s> C --> X s>= 0 iff OrC s> C s>= 0
2039     case ICmpInst::ICMP_SGT:
2040       if (OrC->sgt(C))
2041         return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), X,
2042                             ConstantInt::getNullValue(X->getType()));
2043       break;
2044     default:
2045       break;
2046     }
2047   }
2048 
2049   if (!Cmp.isEquality() || !C.isZero() || !Or->hasOneUse())
2050     return nullptr;
2051 
2052   Value *P, *Q;
2053   if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
2054     // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
2055     // -> and (icmp eq P, null), (icmp eq Q, null).
2056     Value *CmpP =
2057         Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
2058     Value *CmpQ =
2059         Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
2060     auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
2061     return BinaryOperator::Create(BOpc, CmpP, CmpQ);
2062   }
2063 
2064   if (Value *V = foldICmpOrXorSubChain(Cmp, Or, Builder))
2065     return replaceInstUsesWith(Cmp, V);
2066 
2067   return nullptr;
2068 }
2069 
2070 /// Fold icmp (mul X, Y), C.
2071 Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
2072                                                    BinaryOperator *Mul,
2073                                                    const APInt &C) {
2074   ICmpInst::Predicate Pred = Cmp.getPredicate();
2075   Type *MulTy = Mul->getType();
2076   Value *X = Mul->getOperand(0);
2077 
2078   // If there's no overflow:
2079   // X * X == 0 --> X == 0
2080   // X * X != 0 --> X != 0
2081   if (Cmp.isEquality() && C.isZero() && X == Mul->getOperand(1) &&
2082       (Mul->hasNoUnsignedWrap() || Mul->hasNoSignedWrap()))
2083     return new ICmpInst(Pred, X, ConstantInt::getNullValue(MulTy));
2084 
2085   const APInt *MulC;
2086   if (!match(Mul->getOperand(1), m_APInt(MulC)))
2087     return nullptr;
2088 
2089   // If this is a test of the sign bit and the multiply is sign-preserving with
2090   // a constant operand, use the multiply LHS operand instead:
2091   // (X * +MulC) < 0 --> X < 0
2092   // (X * -MulC) < 0 --> X > 0
2093   if (isSignTest(Pred, C) && Mul->hasNoSignedWrap()) {
2094     if (MulC->isNegative())
2095       Pred = ICmpInst::getSwappedPredicate(Pred);
2096     return new ICmpInst(Pred, X, ConstantInt::getNullValue(MulTy));
2097   }
2098 
2099   if (MulC->isZero())
2100     return nullptr;
2101 
2102   // If the multiply does not wrap or the constant is odd, try to divide the
2103   // compare constant by the multiplication factor.
2104   if (Cmp.isEquality()) {
2105     // (mul nsw X, MulC) eq/ne C --> X eq/ne C /s MulC
2106     if (Mul->hasNoSignedWrap() && C.srem(*MulC).isZero()) {
2107       Constant *NewC = ConstantInt::get(MulTy, C.sdiv(*MulC));
2108       return new ICmpInst(Pred, X, NewC);
2109     }
2110 
2111     // C % MulC == 0 is weaker than we could use if MulC is odd because it
2112     // correct to transform if MulC * N == C including overflow. I.e with i8
2113     // (icmp eq (mul X, 5), 101) -> (icmp eq X, 225) but since 101 % 5 != 0, we
2114     // miss that case.
2115     if (C.urem(*MulC).isZero()) {
2116       // (mul nuw X, MulC) eq/ne C --> X eq/ne C /u MulC
2117       // (mul X, OddC) eq/ne N * C --> X eq/ne N
2118       if ((*MulC & 1).isOne() || Mul->hasNoUnsignedWrap()) {
2119         Constant *NewC = ConstantInt::get(MulTy, C.udiv(*MulC));
2120         return new ICmpInst(Pred, X, NewC);
2121       }
2122     }
2123   }
2124 
2125   // With a matching no-overflow guarantee, fold the constants:
2126   // (X * MulC) < C --> X < (C / MulC)
2127   // (X * MulC) > C --> X > (C / MulC)
2128   // TODO: Assert that Pred is not equal to SGE, SLE, UGE, ULE?
2129   Constant *NewC = nullptr;
2130   if (Mul->hasNoSignedWrap() && ICmpInst::isSigned(Pred)) {
2131     // MININT / -1 --> overflow.
2132     if (C.isMinSignedValue() && MulC->isAllOnes())
2133       return nullptr;
2134     if (MulC->isNegative())
2135       Pred = ICmpInst::getSwappedPredicate(Pred);
2136 
2137     if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) {
2138       NewC = ConstantInt::get(
2139           MulTy, APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::UP));
2140     } else {
2141       assert((Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_SGT) &&
2142              "Unexpected predicate");
2143       NewC = ConstantInt::get(
2144           MulTy, APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::DOWN));
2145     }
2146   } else if (Mul->hasNoUnsignedWrap() && ICmpInst::isUnsigned(Pred)) {
2147     if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) {
2148       NewC = ConstantInt::get(
2149           MulTy, APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::UP));
2150     } else {
2151       assert((Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) &&
2152              "Unexpected predicate");
2153       NewC = ConstantInt::get(
2154           MulTy, APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::DOWN));
2155     }
2156   }
2157 
2158   return NewC ? new ICmpInst(Pred, X, NewC) : nullptr;
2159 }
2160 
2161 /// Fold icmp (shl 1, Y), C.
2162 static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
2163                                    const APInt &C) {
2164   Value *Y;
2165   if (!match(Shl, m_Shl(m_One(), m_Value(Y))))
2166     return nullptr;
2167 
2168   Type *ShiftType = Shl->getType();
2169   unsigned TypeBits = C.getBitWidth();
2170   bool CIsPowerOf2 = C.isPowerOf2();
2171   ICmpInst::Predicate Pred = Cmp.getPredicate();
2172   if (Cmp.isUnsigned()) {
2173     // (1 << Y) pred C -> Y pred Log2(C)
2174     if (!CIsPowerOf2) {
2175       // (1 << Y) <  30 -> Y <= 4
2176       // (1 << Y) <= 30 -> Y <= 4
2177       // (1 << Y) >= 30 -> Y >  4
2178       // (1 << Y) >  30 -> Y >  4
2179       if (Pred == ICmpInst::ICMP_ULT)
2180         Pred = ICmpInst::ICMP_ULE;
2181       else if (Pred == ICmpInst::ICMP_UGE)
2182         Pred = ICmpInst::ICMP_UGT;
2183     }
2184 
2185     unsigned CLog2 = C.logBase2();
2186     return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
2187   } else if (Cmp.isSigned()) {
2188     Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
2189     // (1 << Y) >  0 -> Y != 31
2190     // (1 << Y) >  C -> Y != 31 if C is negative.
2191     if (Pred == ICmpInst::ICMP_SGT && C.sle(0))
2192       return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
2193 
2194     // (1 << Y) <  0 -> Y == 31
2195     // (1 << Y) <  1 -> Y == 31
2196     // (1 << Y) <  C -> Y == 31 if C is negative and not signed min.
2197     // Exclude signed min by subtracting 1 and lower the upper bound to 0.
2198     if (Pred == ICmpInst::ICMP_SLT && (C-1).sle(0))
2199       return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
2200   }
2201 
2202   return nullptr;
2203 }
2204 
2205 /// Fold icmp (shl X, Y), C.
2206 Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
2207                                                    BinaryOperator *Shl,
2208                                                    const APInt &C) {
2209   const APInt *ShiftVal;
2210   if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal)))
2211     return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal);
2212 
2213   ICmpInst::Predicate Pred = Cmp.getPredicate();
2214   // (icmp pred (shl nuw&nsw X, Y), Csle0)
2215   //      -> (icmp pred X, Csle0)
2216   //
2217   // The idea is the nuw/nsw essentially freeze the sign bit for the shift op
2218   // so X's must be what is used.
2219   if (C.sle(0) && Shl->hasNoUnsignedWrap() && Shl->hasNoSignedWrap())
2220     return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
2221 
2222   // (icmp eq/ne (shl nuw|nsw X, Y), 0)
2223   //      -> (icmp eq/ne X, 0)
2224   if (ICmpInst::isEquality(Pred) && C.isZero() &&
2225       (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap()))
2226     return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
2227 
2228   // (icmp slt (shl nsw X, Y), 0/1)
2229   //      -> (icmp slt X, 0/1)
2230   // (icmp sgt (shl nsw X, Y), 0/-1)
2231   //      -> (icmp sgt X, 0/-1)
2232   //
2233   // NB: sge/sle with a constant will canonicalize to sgt/slt.
2234   if (Shl->hasNoSignedWrap() &&
2235       (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT))
2236     if (C.isZero() || (Pred == ICmpInst::ICMP_SGT ? C.isAllOnes() : C.isOne()))
2237       return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
2238 
2239   const APInt *ShiftAmt;
2240   if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
2241     return foldICmpShlOne(Cmp, Shl, C);
2242 
2243   // Check that the shift amount is in range. If not, don't perform undefined
2244   // shifts. When the shift is visited, it will be simplified.
2245   unsigned TypeBits = C.getBitWidth();
2246   if (ShiftAmt->uge(TypeBits))
2247     return nullptr;
2248 
2249   Value *X = Shl->getOperand(0);
2250   Type *ShType = Shl->getType();
2251 
2252   // NSW guarantees that we are only shifting out sign bits from the high bits,
2253   // so we can ASHR the compare constant without needing a mask and eliminate
2254   // the shift.
2255   if (Shl->hasNoSignedWrap()) {
2256     if (Pred == ICmpInst::ICMP_SGT) {
2257       // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
2258       APInt ShiftedC = C.ashr(*ShiftAmt);
2259       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2260     }
2261     if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
2262         C.ashr(*ShiftAmt).shl(*ShiftAmt) == C) {
2263       APInt ShiftedC = C.ashr(*ShiftAmt);
2264       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2265     }
2266     if (Pred == ICmpInst::ICMP_SLT) {
2267       // SLE is the same as above, but SLE is canonicalized to SLT, so convert:
2268       // (X << S) <=s C is equiv to X <=s (C >> S) for all C
2269       // (X << S) <s (C + 1) is equiv to X <s (C >> S) + 1 if C <s SMAX
2270       // (X << S) <s C is equiv to X <s ((C - 1) >> S) + 1 if C >s SMIN
2271       assert(!C.isMinSignedValue() && "Unexpected icmp slt");
2272       APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1;
2273       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2274     }
2275   }
2276 
2277   // NUW guarantees that we are only shifting out zero bits from the high bits,
2278   // so we can LSHR the compare constant without needing a mask and eliminate
2279   // the shift.
2280   if (Shl->hasNoUnsignedWrap()) {
2281     if (Pred == ICmpInst::ICMP_UGT) {
2282       // icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt)
2283       APInt ShiftedC = C.lshr(*ShiftAmt);
2284       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2285     }
2286     if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
2287         C.lshr(*ShiftAmt).shl(*ShiftAmt) == C) {
2288       APInt ShiftedC = C.lshr(*ShiftAmt);
2289       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2290     }
2291     if (Pred == ICmpInst::ICMP_ULT) {
2292       // ULE is the same as above, but ULE is canonicalized to ULT, so convert:
2293       // (X << S) <=u C is equiv to X <=u (C >> S) for all C
2294       // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u
2295       // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0
2296       assert(C.ugt(0) && "ult 0 should have been eliminated");
2297       APInt ShiftedC = (C - 1).lshr(*ShiftAmt) + 1;
2298       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2299     }
2300   }
2301 
2302   if (Cmp.isEquality() && Shl->hasOneUse()) {
2303     // Strength-reduce the shift into an 'and'.
2304     Constant *Mask = ConstantInt::get(
2305         ShType,
2306         APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
2307     Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
2308     Constant *LShrC = ConstantInt::get(ShType, C.lshr(*ShiftAmt));
2309     return new ICmpInst(Pred, And, LShrC);
2310   }
2311 
2312   // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
2313   bool TrueIfSigned = false;
2314   if (Shl->hasOneUse() && isSignBitCheck(Pred, C, TrueIfSigned)) {
2315     // (X << 31) <s 0  --> (X & 1) != 0
2316     Constant *Mask = ConstantInt::get(
2317         ShType,
2318         APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
2319     Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
2320     return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
2321                         And, Constant::getNullValue(ShType));
2322   }
2323 
2324   // Simplify 'shl' inequality test into 'and' equality test.
2325   if (Cmp.isUnsigned() && Shl->hasOneUse()) {
2326     // (X l<< C2) u<=/u> C1 iff C1+1 is power of two -> X & (~C1 l>> C2) ==/!= 0
2327     if ((C + 1).isPowerOf2() &&
2328         (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT)) {
2329       Value *And = Builder.CreateAnd(X, (~C).lshr(ShiftAmt->getZExtValue()));
2330       return new ICmpInst(Pred == ICmpInst::ICMP_ULE ? ICmpInst::ICMP_EQ
2331                                                      : ICmpInst::ICMP_NE,
2332                           And, Constant::getNullValue(ShType));
2333     }
2334     // (X l<< C2) u</u>= C1 iff C1 is power of two -> X & (-C1 l>> C2) ==/!= 0
2335     if (C.isPowerOf2() &&
2336         (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) {
2337       Value *And =
2338           Builder.CreateAnd(X, (~(C - 1)).lshr(ShiftAmt->getZExtValue()));
2339       return new ICmpInst(Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_EQ
2340                                                      : ICmpInst::ICMP_NE,
2341                           And, Constant::getNullValue(ShType));
2342     }
2343   }
2344 
2345   // Transform (icmp pred iM (shl iM %v, N), C)
2346   // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
2347   // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
2348   // This enables us to get rid of the shift in favor of a trunc that may be
2349   // free on the target. It has the additional benefit of comparing to a
2350   // smaller constant that may be more target-friendly.
2351   unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
2352   if (Shl->hasOneUse() && Amt != 0 && C.countr_zero() >= Amt &&
2353       DL.isLegalInteger(TypeBits - Amt)) {
2354     Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
2355     if (auto *ShVTy = dyn_cast<VectorType>(ShType))
2356       TruncTy = VectorType::get(TruncTy, ShVTy->getElementCount());
2357     Constant *NewC =
2358         ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt));
2359     return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC);
2360   }
2361 
2362   return nullptr;
2363 }
2364 
2365 /// Fold icmp ({al}shr X, Y), C.
2366 Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
2367                                                    BinaryOperator *Shr,
2368                                                    const APInt &C) {
2369   // An exact shr only shifts out zero bits, so:
2370   // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
2371   Value *X = Shr->getOperand(0);
2372   CmpInst::Predicate Pred = Cmp.getPredicate();
2373   if (Cmp.isEquality() && Shr->isExact() && C.isZero())
2374     return new ICmpInst(Pred, X, Cmp.getOperand(1));
2375 
2376   bool IsAShr = Shr->getOpcode() == Instruction::AShr;
2377   const APInt *ShiftValC;
2378   if (match(X, m_APInt(ShiftValC))) {
2379     if (Cmp.isEquality())
2380       return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftValC);
2381 
2382     // (ShiftValC >> Y) >s -1 --> Y != 0 with ShiftValC < 0
2383     // (ShiftValC >> Y) <s  0 --> Y == 0 with ShiftValC < 0
2384     bool TrueIfSigned;
2385     if (!IsAShr && ShiftValC->isNegative() &&
2386         isSignBitCheck(Pred, C, TrueIfSigned))
2387       return new ICmpInst(TrueIfSigned ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE,
2388                           Shr->getOperand(1),
2389                           ConstantInt::getNullValue(X->getType()));
2390 
2391     // If the shifted constant is a power-of-2, test the shift amount directly:
2392     // (ShiftValC >> Y) >u C --> X <u (LZ(C) - LZ(ShiftValC))
2393     // (ShiftValC >> Y) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC))
2394     if (!IsAShr && ShiftValC->isPowerOf2() &&
2395         (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_ULT)) {
2396       bool IsUGT = Pred == CmpInst::ICMP_UGT;
2397       assert(ShiftValC->uge(C) && "Expected simplify of compare");
2398       assert((IsUGT || !C.isZero()) && "Expected X u< 0 to simplify");
2399 
2400       unsigned CmpLZ = IsUGT ? C.countl_zero() : (C - 1).countl_zero();
2401       unsigned ShiftLZ = ShiftValC->countl_zero();
2402       Constant *NewC = ConstantInt::get(Shr->getType(), CmpLZ - ShiftLZ);
2403       auto NewPred = IsUGT ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
2404       return new ICmpInst(NewPred, Shr->getOperand(1), NewC);
2405     }
2406   }
2407 
2408   const APInt *ShiftAmtC;
2409   if (!match(Shr->getOperand(1), m_APInt(ShiftAmtC)))
2410     return nullptr;
2411 
2412   // Check that the shift amount is in range. If not, don't perform undefined
2413   // shifts. When the shift is visited it will be simplified.
2414   unsigned TypeBits = C.getBitWidth();
2415   unsigned ShAmtVal = ShiftAmtC->getLimitedValue(TypeBits);
2416   if (ShAmtVal >= TypeBits || ShAmtVal == 0)
2417     return nullptr;
2418 
2419   bool IsExact = Shr->isExact();
2420   Type *ShrTy = Shr->getType();
2421   // TODO: If we could guarantee that InstSimplify would handle all of the
2422   // constant-value-based preconditions in the folds below, then we could assert
2423   // those conditions rather than checking them. This is difficult because of
2424   // undef/poison (PR34838).
2425   if (IsAShr && Shr->hasOneUse()) {
2426     if (IsExact || Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) {
2427       // When ShAmtC can be shifted losslessly:
2428       // icmp PRED (ashr exact X, ShAmtC), C --> icmp PRED X, (C << ShAmtC)
2429       // icmp slt/ult (ashr X, ShAmtC), C --> icmp slt/ult X, (C << ShAmtC)
2430       APInt ShiftedC = C.shl(ShAmtVal);
2431       if (ShiftedC.ashr(ShAmtVal) == C)
2432         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2433     }
2434     if (Pred == CmpInst::ICMP_SGT) {
2435       // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1
2436       APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2437       if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() &&
2438           (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
2439         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2440     }
2441     if (Pred == CmpInst::ICMP_UGT) {
2442       // icmp ugt (ashr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
2443       // 'C + 1 << ShAmtC' can overflow as a signed number, so the 2nd
2444       // clause accounts for that pattern.
2445       APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2446       if ((ShiftedC + 1).ashr(ShAmtVal) == (C + 1) ||
2447           (C + 1).shl(ShAmtVal).isMinSignedValue())
2448         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2449     }
2450 
2451     // If the compare constant has significant bits above the lowest sign-bit,
2452     // then convert an unsigned cmp to a test of the sign-bit:
2453     // (ashr X, ShiftC) u> C --> X s< 0
2454     // (ashr X, ShiftC) u< C --> X s> -1
2455     if (C.getBitWidth() > 2 && C.getNumSignBits() <= ShAmtVal) {
2456       if (Pred == CmpInst::ICMP_UGT) {
2457         return new ICmpInst(CmpInst::ICMP_SLT, X,
2458                             ConstantInt::getNullValue(ShrTy));
2459       }
2460       if (Pred == CmpInst::ICMP_ULT) {
2461         return new ICmpInst(CmpInst::ICMP_SGT, X,
2462                             ConstantInt::getAllOnesValue(ShrTy));
2463       }
2464     }
2465   } else if (!IsAShr) {
2466     if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
2467       // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
2468       // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
2469       APInt ShiftedC = C.shl(ShAmtVal);
2470       if (ShiftedC.lshr(ShAmtVal) == C)
2471         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2472     }
2473     if (Pred == CmpInst::ICMP_UGT) {
2474       // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
2475       APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2476       if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1))
2477         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2478     }
2479   }
2480 
2481   if (!Cmp.isEquality())
2482     return nullptr;
2483 
2484   // Handle equality comparisons of shift-by-constant.
2485 
2486   // If the comparison constant changes with the shift, the comparison cannot
2487   // succeed (bits of the comparison constant cannot match the shifted value).
2488   // This should be known by InstSimplify and already be folded to true/false.
2489   assert(((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) ||
2490           (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) &&
2491          "Expected icmp+shr simplify did not occur.");
2492 
2493   // If the bits shifted out are known zero, compare the unshifted value:
2494   //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
2495   if (Shr->isExact())
2496     return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
2497 
2498   if (C.isZero()) {
2499     // == 0 is u< 1.
2500     if (Pred == CmpInst::ICMP_EQ)
2501       return new ICmpInst(CmpInst::ICMP_ULT, X,
2502                           ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal)));
2503     else
2504       return new ICmpInst(CmpInst::ICMP_UGT, X,
2505                           ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal) - 1));
2506   }
2507 
2508   if (Shr->hasOneUse()) {
2509     // Canonicalize the shift into an 'and':
2510     // icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt)
2511     APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
2512     Constant *Mask = ConstantInt::get(ShrTy, Val);
2513     Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
2514     return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal));
2515   }
2516 
2517   return nullptr;
2518 }
2519 
2520 Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
2521                                                     BinaryOperator *SRem,
2522                                                     const APInt &C) {
2523   // Match an 'is positive' or 'is negative' comparison of remainder by a
2524   // constant power-of-2 value:
2525   // (X % pow2C) sgt/slt 0
2526   const ICmpInst::Predicate Pred = Cmp.getPredicate();
2527   if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT &&
2528       Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
2529     return nullptr;
2530 
2531   // TODO: The one-use check is standard because we do not typically want to
2532   //       create longer instruction sequences, but this might be a special-case
2533   //       because srem is not good for analysis or codegen.
2534   if (!SRem->hasOneUse())
2535     return nullptr;
2536 
2537   const APInt *DivisorC;
2538   if (!match(SRem->getOperand(1), m_Power2(DivisorC)))
2539     return nullptr;
2540 
2541   // For cmp_sgt/cmp_slt only zero valued C is handled.
2542   // For cmp_eq/cmp_ne only positive valued C is handled.
2543   if (((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT) &&
2544        !C.isZero()) ||
2545       ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
2546        !C.isStrictlyPositive()))
2547     return nullptr;
2548 
2549   // Mask off the sign bit and the modulo bits (low-bits).
2550   Type *Ty = SRem->getType();
2551   APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits());
2552   Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1));
2553   Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC);
2554 
2555   if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)
2556     return new ICmpInst(Pred, And, ConstantInt::get(Ty, C));
2557 
2558   // For 'is positive?' check that the sign-bit is clear and at least 1 masked
2559   // bit is set. Example:
2560   // (i8 X % 32) s> 0 --> (X & 159) s> 0
2561   if (Pred == ICmpInst::ICMP_SGT)
2562     return new ICmpInst(ICmpInst::ICMP_SGT, And, ConstantInt::getNullValue(Ty));
2563 
2564   // For 'is negative?' check that the sign-bit is set and at least 1 masked
2565   // bit is set. Example:
2566   // (i16 X % 4) s< 0 --> (X & 32771) u> 32768
2567   return new ICmpInst(ICmpInst::ICMP_UGT, And, ConstantInt::get(Ty, SignMask));
2568 }
2569 
2570 /// Fold icmp (udiv X, Y), C.
2571 Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp,
2572                                                     BinaryOperator *UDiv,
2573                                                     const APInt &C) {
2574   ICmpInst::Predicate Pred = Cmp.getPredicate();
2575   Value *X = UDiv->getOperand(0);
2576   Value *Y = UDiv->getOperand(1);
2577   Type *Ty = UDiv->getType();
2578 
2579   const APInt *C2;
2580   if (!match(X, m_APInt(C2)))
2581     return nullptr;
2582 
2583   assert(*C2 != 0 && "udiv 0, X should have been simplified already.");
2584 
2585   // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1))
2586   if (Pred == ICmpInst::ICMP_UGT) {
2587     assert(!C.isMaxValue() &&
2588            "icmp ugt X, UINT_MAX should have been simplified already.");
2589     return new ICmpInst(ICmpInst::ICMP_ULE, Y,
2590                         ConstantInt::get(Ty, C2->udiv(C + 1)));
2591   }
2592 
2593   // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C)
2594   if (Pred == ICmpInst::ICMP_ULT) {
2595     assert(C != 0 && "icmp ult X, 0 should have been simplified already.");
2596     return new ICmpInst(ICmpInst::ICMP_UGT, Y,
2597                         ConstantInt::get(Ty, C2->udiv(C)));
2598   }
2599 
2600   return nullptr;
2601 }
2602 
2603 /// Fold icmp ({su}div X, Y), C.
2604 Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
2605                                                    BinaryOperator *Div,
2606                                                    const APInt &C) {
2607   ICmpInst::Predicate Pred = Cmp.getPredicate();
2608   Value *X = Div->getOperand(0);
2609   Value *Y = Div->getOperand(1);
2610   Type *Ty = Div->getType();
2611   bool DivIsSigned = Div->getOpcode() == Instruction::SDiv;
2612 
2613   // If unsigned division and the compare constant is bigger than
2614   // UMAX/2 (negative), there's only one pair of values that satisfies an
2615   // equality check, so eliminate the division:
2616   // (X u/ Y) == C --> (X == C) && (Y == 1)
2617   // (X u/ Y) != C --> (X != C) || (Y != 1)
2618   // Similarly, if signed division and the compare constant is exactly SMIN:
2619   // (X s/ Y) == SMIN --> (X == SMIN) && (Y == 1)
2620   // (X s/ Y) != SMIN --> (X != SMIN) || (Y != 1)
2621   if (Cmp.isEquality() && Div->hasOneUse() && C.isSignBitSet() &&
2622       (!DivIsSigned || C.isMinSignedValue()))   {
2623     Value *XBig = Builder.CreateICmp(Pred, X, ConstantInt::get(Ty, C));
2624     Value *YOne = Builder.CreateICmp(Pred, Y, ConstantInt::get(Ty, 1));
2625     auto Logic = Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
2626     return BinaryOperator::Create(Logic, XBig, YOne);
2627   }
2628 
2629   // Fold: icmp pred ([us]div X, C2), C -> range test
2630   // Fold this div into the comparison, producing a range check.
2631   // Determine, based on the divide type, what the range is being
2632   // checked.  If there is an overflow on the low or high side, remember
2633   // it, otherwise compute the range [low, hi) bounding the new value.
2634   // See: InsertRangeTest above for the kinds of replacements possible.
2635   const APInt *C2;
2636   if (!match(Y, m_APInt(C2)))
2637     return nullptr;
2638 
2639   // FIXME: If the operand types don't match the type of the divide
2640   // then don't attempt this transform. The code below doesn't have the
2641   // logic to deal with a signed divide and an unsigned compare (and
2642   // vice versa). This is because (x /s C2) <s C  produces different
2643   // results than (x /s C2) <u C or (x /u C2) <s C or even
2644   // (x /u C2) <u C.  Simply casting the operands and result won't
2645   // work. :(  The if statement below tests that condition and bails
2646   // if it finds it.
2647   if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned())
2648     return nullptr;
2649 
2650   // The ProdOV computation fails on divide by 0 and divide by -1. Cases with
2651   // INT_MIN will also fail if the divisor is 1. Although folds of all these
2652   // division-by-constant cases should be present, we can not assert that they
2653   // have happened before we reach this icmp instruction.
2654   if (C2->isZero() || C2->isOne() || (DivIsSigned && C2->isAllOnes()))
2655     return nullptr;
2656 
2657   // Compute Prod = C * C2. We are essentially solving an equation of
2658   // form X / C2 = C. We solve for X by multiplying C2 and C.
2659   // By solving for X, we can turn this into a range check instead of computing
2660   // a divide.
2661   APInt Prod = C * *C2;
2662 
2663   // Determine if the product overflows by seeing if the product is not equal to
2664   // the divide. Make sure we do the same kind of divide as in the LHS
2665   // instruction that we're folding.
2666   bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C;
2667 
2668   // If the division is known to be exact, then there is no remainder from the
2669   // divide, so the covered range size is unit, otherwise it is the divisor.
2670   APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2;
2671 
2672   // Figure out the interval that is being checked.  For example, a comparison
2673   // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
2674   // Compute this interval based on the constants involved and the signedness of
2675   // the compare/divide.  This computes a half-open interval, keeping track of
2676   // whether either value in the interval overflows.  After analysis each
2677   // overflow variable is set to 0 if it's corresponding bound variable is valid
2678   // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
2679   int LoOverflow = 0, HiOverflow = 0;
2680   APInt LoBound, HiBound;
2681 
2682   if (!DivIsSigned) { // udiv
2683     // e.g. X/5 op 3  --> [15, 20)
2684     LoBound = Prod;
2685     HiOverflow = LoOverflow = ProdOV;
2686     if (!HiOverflow) {
2687       // If this is not an exact divide, then many values in the range collapse
2688       // to the same result value.
2689       HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
2690     }
2691   } else if (C2->isStrictlyPositive()) { // Divisor is > 0.
2692     if (C.isZero()) {                    // (X / pos) op 0
2693       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
2694       LoBound = -(RangeSize - 1);
2695       HiBound = RangeSize;
2696     } else if (C.isStrictlyPositive()) { // (X / pos) op pos
2697       LoBound = Prod;                    // e.g.   X/5 op 3 --> [15, 20)
2698       HiOverflow = LoOverflow = ProdOV;
2699       if (!HiOverflow)
2700         HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
2701     } else { // (X / pos) op neg
2702       // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
2703       HiBound = Prod + 1;
2704       LoOverflow = HiOverflow = ProdOV ? -1 : 0;
2705       if (!LoOverflow) {
2706         APInt DivNeg = -RangeSize;
2707         LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
2708       }
2709     }
2710   } else if (C2->isNegative()) { // Divisor is < 0.
2711     if (Div->isExact())
2712       RangeSize.negate();
2713     if (C.isZero()) { // (X / neg) op 0
2714       // e.g. X/-5 op 0  --> [-4, 5)
2715       LoBound = RangeSize + 1;
2716       HiBound = -RangeSize;
2717       if (HiBound == *C2) { // -INTMIN = INTMIN
2718         HiOverflow = 1;     // [INTMIN+1, overflow)
2719         HiBound = APInt();  // e.g. X/INTMIN = 0 --> X > INTMIN
2720       }
2721     } else if (C.isStrictlyPositive()) { // (X / neg) op pos
2722       // e.g. X/-5 op 3  --> [-19, -14)
2723       HiBound = Prod + 1;
2724       HiOverflow = LoOverflow = ProdOV ? -1 : 0;
2725       if (!LoOverflow)
2726         LoOverflow =
2727             addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1 : 0;
2728     } else {          // (X / neg) op neg
2729       LoBound = Prod; // e.g. X/-5 op -3  --> [15, 20)
2730       LoOverflow = HiOverflow = ProdOV;
2731       if (!HiOverflow)
2732         HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true);
2733     }
2734 
2735     // Dividing by a negative swaps the condition.  LT <-> GT
2736     Pred = ICmpInst::getSwappedPredicate(Pred);
2737   }
2738 
2739   switch (Pred) {
2740   default:
2741     llvm_unreachable("Unhandled icmp predicate!");
2742   case ICmpInst::ICMP_EQ:
2743     if (LoOverflow && HiOverflow)
2744       return replaceInstUsesWith(Cmp, Builder.getFalse());
2745     if (HiOverflow)
2746       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
2747                           X, ConstantInt::get(Ty, LoBound));
2748     if (LoOverflow)
2749       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
2750                           X, ConstantInt::get(Ty, HiBound));
2751     return replaceInstUsesWith(
2752         Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true));
2753   case ICmpInst::ICMP_NE:
2754     if (LoOverflow && HiOverflow)
2755       return replaceInstUsesWith(Cmp, Builder.getTrue());
2756     if (HiOverflow)
2757       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
2758                           X, ConstantInt::get(Ty, LoBound));
2759     if (LoOverflow)
2760       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
2761                           X, ConstantInt::get(Ty, HiBound));
2762     return replaceInstUsesWith(
2763         Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, false));
2764   case ICmpInst::ICMP_ULT:
2765   case ICmpInst::ICMP_SLT:
2766     if (LoOverflow == +1) // Low bound is greater than input range.
2767       return replaceInstUsesWith(Cmp, Builder.getTrue());
2768     if (LoOverflow == -1) // Low bound is less than input range.
2769       return replaceInstUsesWith(Cmp, Builder.getFalse());
2770     return new ICmpInst(Pred, X, ConstantInt::get(Ty, LoBound));
2771   case ICmpInst::ICMP_UGT:
2772   case ICmpInst::ICMP_SGT:
2773     if (HiOverflow == +1) // High bound greater than input range.
2774       return replaceInstUsesWith(Cmp, Builder.getFalse());
2775     if (HiOverflow == -1) // High bound less than input range.
2776       return replaceInstUsesWith(Cmp, Builder.getTrue());
2777     if (Pred == ICmpInst::ICMP_UGT)
2778       return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, HiBound));
2779     return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, HiBound));
2780   }
2781 
2782   return nullptr;
2783 }
2784 
2785 /// Fold icmp (sub X, Y), C.
2786 Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp,
2787                                                    BinaryOperator *Sub,
2788                                                    const APInt &C) {
2789   Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
2790   ICmpInst::Predicate Pred = Cmp.getPredicate();
2791   Type *Ty = Sub->getType();
2792 
2793   // (SubC - Y) == C) --> Y == (SubC - C)
2794   // (SubC - Y) != C) --> Y != (SubC - C)
2795   Constant *SubC;
2796   if (Cmp.isEquality() && match(X, m_ImmConstant(SubC))) {
2797     return new ICmpInst(Pred, Y,
2798                         ConstantExpr::getSub(SubC, ConstantInt::get(Ty, C)));
2799   }
2800 
2801   // (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C)
2802   const APInt *C2;
2803   APInt SubResult;
2804   ICmpInst::Predicate SwappedPred = Cmp.getSwappedPredicate();
2805   bool HasNSW = Sub->hasNoSignedWrap();
2806   bool HasNUW = Sub->hasNoUnsignedWrap();
2807   if (match(X, m_APInt(C2)) &&
2808       ((Cmp.isUnsigned() && HasNUW) || (Cmp.isSigned() && HasNSW)) &&
2809       !subWithOverflow(SubResult, *C2, C, Cmp.isSigned()))
2810     return new ICmpInst(SwappedPred, Y, ConstantInt::get(Ty, SubResult));
2811 
2812   // X - Y == 0 --> X == Y.
2813   // X - Y != 0 --> X != Y.
2814   // TODO: We allow this with multiple uses as long as the other uses are not
2815   //       in phis. The phi use check is guarding against a codegen regression
2816   //       for a loop test. If the backend could undo this (and possibly
2817   //       subsequent transforms), we would not need this hack.
2818   if (Cmp.isEquality() && C.isZero() &&
2819       none_of((Sub->users()), [](const User *U) { return isa<PHINode>(U); }))
2820     return new ICmpInst(Pred, X, Y);
2821 
2822   // The following transforms are only worth it if the only user of the subtract
2823   // is the icmp.
2824   // TODO: This is an artificial restriction for all of the transforms below
2825   //       that only need a single replacement icmp. Can these use the phi test
2826   //       like the transform above here?
2827   if (!Sub->hasOneUse())
2828     return nullptr;
2829 
2830   if (Sub->hasNoSignedWrap()) {
2831     // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y)
2832     if (Pred == ICmpInst::ICMP_SGT && C.isAllOnes())
2833       return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
2834 
2835     // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y)
2836     if (Pred == ICmpInst::ICMP_SGT && C.isZero())
2837       return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
2838 
2839     // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y)
2840     if (Pred == ICmpInst::ICMP_SLT && C.isZero())
2841       return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
2842 
2843     // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y)
2844     if (Pred == ICmpInst::ICMP_SLT && C.isOne())
2845       return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
2846   }
2847 
2848   if (!match(X, m_APInt(C2)))
2849     return nullptr;
2850 
2851   // C2 - Y <u C -> (Y | (C - 1)) == C2
2852   //   iff (C2 & (C - 1)) == C - 1 and C is a power of 2
2853   if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() &&
2854       (*C2 & (C - 1)) == (C - 1))
2855     return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, C - 1), X);
2856 
2857   // C2 - Y >u C -> (Y | C) != C2
2858   //   iff C2 & C == C and C + 1 is a power of 2
2859   if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C)
2860     return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X);
2861 
2862   // We have handled special cases that reduce.
2863   // Canonicalize any remaining sub to add as:
2864   // (C2 - Y) > C --> (Y + ~C2) < ~C
2865   Value *Add = Builder.CreateAdd(Y, ConstantInt::get(Ty, ~(*C2)), "notsub",
2866                                  HasNUW, HasNSW);
2867   return new ICmpInst(SwappedPred, Add, ConstantInt::get(Ty, ~C));
2868 }
2869 
2870 static Value *createLogicFromTable(const std::bitset<4> &Table, Value *Op0,
2871                                    Value *Op1, IRBuilderBase &Builder,
2872                                    bool HasOneUse) {
2873   auto FoldConstant = [&](bool Val) {
2874     Constant *Res = Val ? Builder.getTrue() : Builder.getFalse();
2875     if (Op0->getType()->isVectorTy())
2876       Res = ConstantVector::getSplat(
2877           cast<VectorType>(Op0->getType())->getElementCount(), Res);
2878     return Res;
2879   };
2880 
2881   switch (Table.to_ulong()) {
2882   case 0: // 0 0 0 0
2883     return FoldConstant(false);
2884   case 1: // 0 0 0 1
2885     return HasOneUse ? Builder.CreateNot(Builder.CreateOr(Op0, Op1)) : nullptr;
2886   case 2: // 0 0 1 0
2887     return HasOneUse ? Builder.CreateAnd(Builder.CreateNot(Op0), Op1) : nullptr;
2888   case 3: // 0 0 1 1
2889     return Builder.CreateNot(Op0);
2890   case 4: // 0 1 0 0
2891     return HasOneUse ? Builder.CreateAnd(Op0, Builder.CreateNot(Op1)) : nullptr;
2892   case 5: // 0 1 0 1
2893     return Builder.CreateNot(Op1);
2894   case 6: // 0 1 1 0
2895     return Builder.CreateXor(Op0, Op1);
2896   case 7: // 0 1 1 1
2897     return HasOneUse ? Builder.CreateNot(Builder.CreateAnd(Op0, Op1)) : nullptr;
2898   case 8: // 1 0 0 0
2899     return Builder.CreateAnd(Op0, Op1);
2900   case 9: // 1 0 0 1
2901     return HasOneUse ? Builder.CreateNot(Builder.CreateXor(Op0, Op1)) : nullptr;
2902   case 10: // 1 0 1 0
2903     return Op1;
2904   case 11: // 1 0 1 1
2905     return HasOneUse ? Builder.CreateOr(Builder.CreateNot(Op0), Op1) : nullptr;
2906   case 12: // 1 1 0 0
2907     return Op0;
2908   case 13: // 1 1 0 1
2909     return HasOneUse ? Builder.CreateOr(Op0, Builder.CreateNot(Op1)) : nullptr;
2910   case 14: // 1 1 1 0
2911     return Builder.CreateOr(Op0, Op1);
2912   case 15: // 1 1 1 1
2913     return FoldConstant(true);
2914   default:
2915     llvm_unreachable("Invalid Operation");
2916   }
2917   return nullptr;
2918 }
2919 
2920 /// Fold icmp (add X, Y), C.
2921 Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
2922                                                    BinaryOperator *Add,
2923                                                    const APInt &C) {
2924   Value *Y = Add->getOperand(1);
2925   Value *X = Add->getOperand(0);
2926 
2927   Value *Op0, *Op1;
2928   Instruction *Ext0, *Ext1;
2929   const CmpInst::Predicate Pred = Cmp.getPredicate();
2930   if (match(Add,
2931             m_Add(m_CombineAnd(m_Instruction(Ext0), m_ZExtOrSExt(m_Value(Op0))),
2932                   m_CombineAnd(m_Instruction(Ext1),
2933                                m_ZExtOrSExt(m_Value(Op1))))) &&
2934       Op0->getType()->isIntOrIntVectorTy(1) &&
2935       Op1->getType()->isIntOrIntVectorTy(1)) {
2936     unsigned BW = C.getBitWidth();
2937     std::bitset<4> Table;
2938     auto ComputeTable = [&](bool Op0Val, bool Op1Val) {
2939       int Res = 0;
2940       if (Op0Val)
2941         Res += isa<ZExtInst>(Ext0) ? 1 : -1;
2942       if (Op1Val)
2943         Res += isa<ZExtInst>(Ext1) ? 1 : -1;
2944       return ICmpInst::compare(APInt(BW, Res, true), C, Pred);
2945     };
2946 
2947     Table[0] = ComputeTable(false, false);
2948     Table[1] = ComputeTable(false, true);
2949     Table[2] = ComputeTable(true, false);
2950     Table[3] = ComputeTable(true, true);
2951     if (auto *Cond =
2952             createLogicFromTable(Table, Op0, Op1, Builder, Add->hasOneUse()))
2953       return replaceInstUsesWith(Cmp, Cond);
2954   }
2955   const APInt *C2;
2956   if (Cmp.isEquality() || !match(Y, m_APInt(C2)))
2957     return nullptr;
2958 
2959   // Fold icmp pred (add X, C2), C.
2960   Type *Ty = Add->getType();
2961 
2962   // If the add does not wrap, we can always adjust the compare by subtracting
2963   // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
2964   // are canonicalized to SGT/SLT/UGT/ULT.
2965   if ((Add->hasNoSignedWrap() &&
2966        (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) ||
2967       (Add->hasNoUnsignedWrap() &&
2968        (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) {
2969     bool Overflow;
2970     APInt NewC =
2971         Cmp.isSigned() ? C.ssub_ov(*C2, Overflow) : C.usub_ov(*C2, Overflow);
2972     // If there is overflow, the result must be true or false.
2973     // TODO: Can we assert there is no overflow because InstSimplify always
2974     // handles those cases?
2975     if (!Overflow)
2976       // icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2)
2977       return new ICmpInst(Pred, X, ConstantInt::get(Ty, NewC));
2978   }
2979 
2980   auto CR = ConstantRange::makeExactICmpRegion(Pred, C).subtract(*C2);
2981   const APInt &Upper = CR.getUpper();
2982   const APInt &Lower = CR.getLower();
2983   if (Cmp.isSigned()) {
2984     if (Lower.isSignMask())
2985       return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper));
2986     if (Upper.isSignMask())
2987       return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower));
2988   } else {
2989     if (Lower.isMinValue())
2990       return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, Upper));
2991     if (Upper.isMinValue())
2992       return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
2993   }
2994 
2995   // This set of folds is intentionally placed after folds that use no-wrapping
2996   // flags because those folds are likely better for later analysis/codegen.
2997   const APInt SMax = APInt::getSignedMaxValue(Ty->getScalarSizeInBits());
2998   const APInt SMin = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
2999 
3000   // Fold compare with offset to opposite sign compare if it eliminates offset:
3001   // (X + C2) >u C --> X <s -C2 (if C == C2 + SMAX)
3002   if (Pred == CmpInst::ICMP_UGT && C == *C2 + SMax)
3003     return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, -(*C2)));
3004 
3005   // (X + C2) <u C --> X >s ~C2 (if C == C2 + SMIN)
3006   if (Pred == CmpInst::ICMP_ULT && C == *C2 + SMin)
3007     return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantInt::get(Ty, ~(*C2)));
3008 
3009   // (X + C2) >s C --> X <u (SMAX - C) (if C == C2 - 1)
3010   if (Pred == CmpInst::ICMP_SGT && C == *C2 - 1)
3011     return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, SMax - C));
3012 
3013   // (X + C2) <s C --> X >u (C ^ SMAX) (if C == C2)
3014   if (Pred == CmpInst::ICMP_SLT && C == *C2)
3015     return new ICmpInst(ICmpInst::ICMP_UGT, X, ConstantInt::get(Ty, C ^ SMax));
3016 
3017   // (X + -1) <u C --> X <=u C (if X is never null)
3018   if (Pred == CmpInst::ICMP_ULT && C2->isAllOnes()) {
3019     const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
3020     if (llvm::isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT))
3021       return new ICmpInst(ICmpInst::ICMP_ULE, X, ConstantInt::get(Ty, C));
3022   }
3023 
3024   if (!Add->hasOneUse())
3025     return nullptr;
3026 
3027   // X+C <u C2 -> (X & -C2) == C
3028   //   iff C & (C2-1) == 0
3029   //       C2 is a power of 2
3030   if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && (*C2 & (C - 1)) == 0)
3031     return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -C),
3032                         ConstantExpr::getNeg(cast<Constant>(Y)));
3033 
3034   // X+C >u C2 -> (X & ~C2) != C
3035   //   iff C & C2 == 0
3036   //       C2+1 is a power of 2
3037   if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == 0)
3038     return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C),
3039                         ConstantExpr::getNeg(cast<Constant>(Y)));
3040 
3041   // The range test idiom can use either ult or ugt. Arbitrarily canonicalize
3042   // to the ult form.
3043   // X+C2 >u C -> X+(C2-C-1) <u ~C
3044   if (Pred == ICmpInst::ICMP_UGT)
3045     return new ICmpInst(ICmpInst::ICMP_ULT,
3046                         Builder.CreateAdd(X, ConstantInt::get(Ty, *C2 - C - 1)),
3047                         ConstantInt::get(Ty, ~C));
3048 
3049   return nullptr;
3050 }
3051 
3052 bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
3053                                                Value *&RHS, ConstantInt *&Less,
3054                                                ConstantInt *&Equal,
3055                                                ConstantInt *&Greater) {
3056   // TODO: Generalize this to work with other comparison idioms or ensure
3057   // they get canonicalized into this form.
3058 
3059   // select i1 (a == b),
3060   //        i32 Equal,
3061   //        i32 (select i1 (a < b), i32 Less, i32 Greater)
3062   // where Equal, Less and Greater are placeholders for any three constants.
3063   ICmpInst::Predicate PredA;
3064   if (!match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) ||
3065       !ICmpInst::isEquality(PredA))
3066     return false;
3067   Value *EqualVal = SI->getTrueValue();
3068   Value *UnequalVal = SI->getFalseValue();
3069   // We still can get non-canonical predicate here, so canonicalize.
3070   if (PredA == ICmpInst::ICMP_NE)
3071     std::swap(EqualVal, UnequalVal);
3072   if (!match(EqualVal, m_ConstantInt(Equal)))
3073     return false;
3074   ICmpInst::Predicate PredB;
3075   Value *LHS2, *RHS2;
3076   if (!match(UnequalVal, m_Select(m_ICmp(PredB, m_Value(LHS2), m_Value(RHS2)),
3077                                   m_ConstantInt(Less), m_ConstantInt(Greater))))
3078     return false;
3079   // We can get predicate mismatch here, so canonicalize if possible:
3080   // First, ensure that 'LHS' match.
3081   if (LHS2 != LHS) {
3082     // x sgt y <--> y slt x
3083     std::swap(LHS2, RHS2);
3084     PredB = ICmpInst::getSwappedPredicate(PredB);
3085   }
3086   if (LHS2 != LHS)
3087     return false;
3088   // We also need to canonicalize 'RHS'.
3089   if (PredB == ICmpInst::ICMP_SGT && isa<Constant>(RHS2)) {
3090     // x sgt C-1  <-->  x sge C  <-->  not(x slt C)
3091     auto FlippedStrictness =
3092         InstCombiner::getFlippedStrictnessPredicateAndConstant(
3093             PredB, cast<Constant>(RHS2));
3094     if (!FlippedStrictness)
3095       return false;
3096     assert(FlippedStrictness->first == ICmpInst::ICMP_SGE &&
3097            "basic correctness failure");
3098     RHS2 = FlippedStrictness->second;
3099     // And kind-of perform the result swap.
3100     std::swap(Less, Greater);
3101     PredB = ICmpInst::ICMP_SLT;
3102   }
3103   return PredB == ICmpInst::ICMP_SLT && RHS == RHS2;
3104 }
3105 
3106 Instruction *InstCombinerImpl::foldICmpSelectConstant(ICmpInst &Cmp,
3107                                                       SelectInst *Select,
3108                                                       ConstantInt *C) {
3109 
3110   assert(C && "Cmp RHS should be a constant int!");
3111   // If we're testing a constant value against the result of a three way
3112   // comparison, the result can be expressed directly in terms of the
3113   // original values being compared.  Note: We could possibly be more
3114   // aggressive here and remove the hasOneUse test. The original select is
3115   // really likely to simplify or sink when we remove a test of the result.
3116   Value *OrigLHS, *OrigRHS;
3117   ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan;
3118   if (Cmp.hasOneUse() &&
3119       matchThreeWayIntCompare(Select, OrigLHS, OrigRHS, C1LessThan, C2Equal,
3120                               C3GreaterThan)) {
3121     assert(C1LessThan && C2Equal && C3GreaterThan);
3122 
3123     bool TrueWhenLessThan =
3124         ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C)
3125             ->isAllOnesValue();
3126     bool TrueWhenEqual =
3127         ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C)
3128             ->isAllOnesValue();
3129     bool TrueWhenGreaterThan =
3130         ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C)
3131             ->isAllOnesValue();
3132 
3133     // This generates the new instruction that will replace the original Cmp
3134     // Instruction. Instead of enumerating the various combinations when
3135     // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus
3136     // false, we rely on chaining of ORs and future passes of InstCombine to
3137     // simplify the OR further (i.e. a s< b || a == b becomes a s<= b).
3138 
3139     // When none of the three constants satisfy the predicate for the RHS (C),
3140     // the entire original Cmp can be simplified to a false.
3141     Value *Cond = Builder.getFalse();
3142     if (TrueWhenLessThan)
3143       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT,
3144                                                        OrigLHS, OrigRHS));
3145     if (TrueWhenEqual)
3146       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ,
3147                                                        OrigLHS, OrigRHS));
3148     if (TrueWhenGreaterThan)
3149       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT,
3150                                                        OrigLHS, OrigRHS));
3151 
3152     return replaceInstUsesWith(Cmp, Cond);
3153   }
3154   return nullptr;
3155 }
3156 
3157 Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
3158   auto *Bitcast = dyn_cast<BitCastInst>(Cmp.getOperand(0));
3159   if (!Bitcast)
3160     return nullptr;
3161 
3162   ICmpInst::Predicate Pred = Cmp.getPredicate();
3163   Value *Op1 = Cmp.getOperand(1);
3164   Value *BCSrcOp = Bitcast->getOperand(0);
3165   Type *SrcType = Bitcast->getSrcTy();
3166   Type *DstType = Bitcast->getType();
3167 
3168   // Make sure the bitcast doesn't change between scalar and vector and
3169   // doesn't change the number of vector elements.
3170   if (SrcType->isVectorTy() == DstType->isVectorTy() &&
3171       SrcType->getScalarSizeInBits() == DstType->getScalarSizeInBits()) {
3172     // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
3173     Value *X;
3174     if (match(BCSrcOp, m_SIToFP(m_Value(X)))) {
3175       // icmp  eq (bitcast (sitofp X)), 0 --> icmp  eq X, 0
3176       // icmp  ne (bitcast (sitofp X)), 0 --> icmp  ne X, 0
3177       // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
3178       // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
3179       if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
3180            Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
3181           match(Op1, m_Zero()))
3182         return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
3183 
3184       // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
3185       if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
3186         return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
3187 
3188       // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
3189       if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
3190         return new ICmpInst(Pred, X,
3191                             ConstantInt::getAllOnesValue(X->getType()));
3192     }
3193 
3194     // Zero-equality checks are preserved through unsigned floating-point casts:
3195     // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
3196     // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
3197     if (match(BCSrcOp, m_UIToFP(m_Value(X))))
3198       if (Cmp.isEquality() && match(Op1, m_Zero()))
3199         return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
3200 
3201     // If this is a sign-bit test of a bitcast of a casted FP value, eliminate
3202     // the FP extend/truncate because that cast does not change the sign-bit.
3203     // This is true for all standard IEEE-754 types and the X86 80-bit type.
3204     // The sign-bit is always the most significant bit in those types.
3205     const APInt *C;
3206     bool TrueIfSigned;
3207     if (match(Op1, m_APInt(C)) && Bitcast->hasOneUse() &&
3208         isSignBitCheck(Pred, *C, TrueIfSigned)) {
3209       if (match(BCSrcOp, m_FPExt(m_Value(X))) ||
3210           match(BCSrcOp, m_FPTrunc(m_Value(X)))) {
3211         // (bitcast (fpext/fptrunc X)) to iX) < 0 --> (bitcast X to iY) < 0
3212         // (bitcast (fpext/fptrunc X)) to iX) > -1 --> (bitcast X to iY) > -1
3213         Type *XType = X->getType();
3214 
3215         // We can't currently handle Power style floating point operations here.
3216         if (!(XType->isPPC_FP128Ty() || SrcType->isPPC_FP128Ty())) {
3217           Type *NewType = Builder.getIntNTy(XType->getScalarSizeInBits());
3218           if (auto *XVTy = dyn_cast<VectorType>(XType))
3219             NewType = VectorType::get(NewType, XVTy->getElementCount());
3220           Value *NewBitcast = Builder.CreateBitCast(X, NewType);
3221           if (TrueIfSigned)
3222             return new ICmpInst(ICmpInst::ICMP_SLT, NewBitcast,
3223                                 ConstantInt::getNullValue(NewType));
3224           else
3225             return new ICmpInst(ICmpInst::ICMP_SGT, NewBitcast,
3226                                 ConstantInt::getAllOnesValue(NewType));
3227         }
3228       }
3229     }
3230   }
3231 
3232   const APInt *C;
3233   if (!match(Cmp.getOperand(1), m_APInt(C)) || !DstType->isIntegerTy() ||
3234       !SrcType->isIntOrIntVectorTy())
3235     return nullptr;
3236 
3237   // If this is checking if all elements of a vector compare are set or not,
3238   // invert the casted vector equality compare and test if all compare
3239   // elements are clear or not. Compare against zero is generally easier for
3240   // analysis and codegen.
3241   // icmp eq/ne (bitcast (not X) to iN), -1 --> icmp eq/ne (bitcast X to iN), 0
3242   // Example: are all elements equal? --> are zero elements not equal?
3243   // TODO: Try harder to reduce compare of 2 freely invertible operands?
3244   if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse()) {
3245     if (Value *NotBCSrcOp =
3246             getFreelyInverted(BCSrcOp, BCSrcOp->hasOneUse(), &Builder)) {
3247       Value *Cast = Builder.CreateBitCast(NotBCSrcOp, DstType);
3248       return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(DstType));
3249     }
3250   }
3251 
3252   // If this is checking if all elements of an extended vector are clear or not,
3253   // compare in a narrow type to eliminate the extend:
3254   // icmp eq/ne (bitcast (ext X) to iN), 0 --> icmp eq/ne (bitcast X to iM), 0
3255   Value *X;
3256   if (Cmp.isEquality() && C->isZero() && Bitcast->hasOneUse() &&
3257       match(BCSrcOp, m_ZExtOrSExt(m_Value(X)))) {
3258     if (auto *VecTy = dyn_cast<FixedVectorType>(X->getType())) {
3259       Type *NewType = Builder.getIntNTy(VecTy->getPrimitiveSizeInBits());
3260       Value *NewCast = Builder.CreateBitCast(X, NewType);
3261       return new ICmpInst(Pred, NewCast, ConstantInt::getNullValue(NewType));
3262     }
3263   }
3264 
3265   // Folding: icmp <pred> iN X, C
3266   //  where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
3267   //    and C is a splat of a K-bit pattern
3268   //    and SC is a constant vector = <C', C', C', ..., C'>
3269   // Into:
3270   //   %E = extractelement <M x iK> %vec, i32 C'
3271   //   icmp <pred> iK %E, trunc(C)
3272   Value *Vec;
3273   ArrayRef<int> Mask;
3274   if (match(BCSrcOp, m_Shuffle(m_Value(Vec), m_Undef(), m_Mask(Mask)))) {
3275     // Check whether every element of Mask is the same constant
3276     if (all_equal(Mask)) {
3277       auto *VecTy = cast<VectorType>(SrcType);
3278       auto *EltTy = cast<IntegerType>(VecTy->getElementType());
3279       if (C->isSplat(EltTy->getBitWidth())) {
3280         // Fold the icmp based on the value of C
3281         // If C is M copies of an iK sized bit pattern,
3282         // then:
3283         //   =>  %E = extractelement <N x iK> %vec, i32 Elem
3284         //       icmp <pred> iK %SplatVal, <pattern>
3285         Value *Elem = Builder.getInt32(Mask[0]);
3286         Value *Extract = Builder.CreateExtractElement(Vec, Elem);
3287         Value *NewC = ConstantInt::get(EltTy, C->trunc(EltTy->getBitWidth()));
3288         return new ICmpInst(Pred, Extract, NewC);
3289       }
3290     }
3291   }
3292   return nullptr;
3293 }
3294 
3295 /// Try to fold integer comparisons with a constant operand: icmp Pred X, C
3296 /// where X is some kind of instruction.
3297 Instruction *InstCombinerImpl::foldICmpInstWithConstant(ICmpInst &Cmp) {
3298   const APInt *C;
3299 
3300   if (match(Cmp.getOperand(1), m_APInt(C))) {
3301     if (auto *BO = dyn_cast<BinaryOperator>(Cmp.getOperand(0)))
3302       if (Instruction *I = foldICmpBinOpWithConstant(Cmp, BO, *C))
3303         return I;
3304 
3305     if (auto *SI = dyn_cast<SelectInst>(Cmp.getOperand(0)))
3306       // For now, we only support constant integers while folding the
3307       // ICMP(SELECT)) pattern. We can extend this to support vector of integers
3308       // similar to the cases handled by binary ops above.
3309       if (auto *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)))
3310         if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS))
3311           return I;
3312 
3313     if (auto *TI = dyn_cast<TruncInst>(Cmp.getOperand(0)))
3314       if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C))
3315         return I;
3316 
3317     if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
3318       if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
3319         return I;
3320 
3321     // (extractval ([s/u]subo X, Y), 0) == 0 --> X == Y
3322     // (extractval ([s/u]subo X, Y), 0) != 0 --> X != Y
3323     // TODO: This checks one-use, but that is not strictly necessary.
3324     Value *Cmp0 = Cmp.getOperand(0);
3325     Value *X, *Y;
3326     if (C->isZero() && Cmp.isEquality() && Cmp0->hasOneUse() &&
3327         (match(Cmp0,
3328                m_ExtractValue<0>(m_Intrinsic<Intrinsic::ssub_with_overflow>(
3329                    m_Value(X), m_Value(Y)))) ||
3330          match(Cmp0,
3331                m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
3332                    m_Value(X), m_Value(Y))))))
3333       return new ICmpInst(Cmp.getPredicate(), X, Y);
3334   }
3335 
3336   if (match(Cmp.getOperand(1), m_APIntAllowUndef(C)))
3337     return foldICmpInstWithConstantAllowUndef(Cmp, *C);
3338 
3339   return nullptr;
3340 }
3341 
3342 /// Fold an icmp equality instruction with binary operator LHS and constant RHS:
3343 /// icmp eq/ne BO, C.
3344 Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
3345     ICmpInst &Cmp, BinaryOperator *BO, const APInt &C) {
3346   // TODO: Some of these folds could work with arbitrary constants, but this
3347   // function is limited to scalar and vector splat constants.
3348   if (!Cmp.isEquality())
3349     return nullptr;
3350 
3351   ICmpInst::Predicate Pred = Cmp.getPredicate();
3352   bool isICMP_NE = Pred == ICmpInst::ICMP_NE;
3353   Constant *RHS = cast<Constant>(Cmp.getOperand(1));
3354   Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
3355 
3356   switch (BO->getOpcode()) {
3357   case Instruction::SRem:
3358     // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
3359     if (C.isZero() && BO->hasOneUse()) {
3360       const APInt *BOC;
3361       if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
3362         Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName());
3363         return new ICmpInst(Pred, NewRem,
3364                             Constant::getNullValue(BO->getType()));
3365       }
3366     }
3367     break;
3368   case Instruction::Add: {
3369     // (A + C2) == C --> A == (C - C2)
3370     // (A + C2) != C --> A != (C - C2)
3371     // TODO: Remove the one-use limitation? See discussion in D58633.
3372     if (Constant *C2 = dyn_cast<Constant>(BOp1)) {
3373       if (BO->hasOneUse())
3374         return new ICmpInst(Pred, BOp0, ConstantExpr::getSub(RHS, C2));
3375     } else if (C.isZero()) {
3376       // Replace ((add A, B) != 0) with (A != -B) if A or B is
3377       // efficiently invertible, or if the add has just this one use.
3378       if (Value *NegVal = dyn_castNegVal(BOp1))
3379         return new ICmpInst(Pred, BOp0, NegVal);
3380       if (Value *NegVal = dyn_castNegVal(BOp0))
3381         return new ICmpInst(Pred, NegVal, BOp1);
3382       if (BO->hasOneUse()) {
3383         Value *Neg = Builder.CreateNeg(BOp1);
3384         Neg->takeName(BO);
3385         return new ICmpInst(Pred, BOp0, Neg);
3386       }
3387     }
3388     break;
3389   }
3390   case Instruction::Xor:
3391     if (BO->hasOneUse()) {
3392       if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
3393         // For the xor case, we can xor two constants together, eliminating
3394         // the explicit xor.
3395         return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
3396       } else if (C.isZero()) {
3397         // Replace ((xor A, B) != 0) with (A != B)
3398         return new ICmpInst(Pred, BOp0, BOp1);
3399       }
3400     }
3401     break;
3402   case Instruction::Or: {
3403     const APInt *BOC;
3404     if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) {
3405       // Comparing if all bits outside of a constant mask are set?
3406       // Replace (X | C) == -1 with (X & ~C) == ~C.
3407       // This removes the -1 constant.
3408       Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1));
3409       Value *And = Builder.CreateAnd(BOp0, NotBOC);
3410       return new ICmpInst(Pred, And, NotBOC);
3411     }
3412     break;
3413   }
3414   case Instruction::UDiv:
3415   case Instruction::SDiv:
3416     if (BO->isExact()) {
3417       // div exact X, Y eq/ne 0 -> X eq/ne 0
3418       // div exact X, Y eq/ne 1 -> X eq/ne Y
3419       // div exact X, Y eq/ne C ->
3420       //    if Y * C never-overflow && OneUse:
3421       //      -> Y * C eq/ne X
3422       if (C.isZero())
3423         return new ICmpInst(Pred, BOp0, Constant::getNullValue(BO->getType()));
3424       else if (C.isOne())
3425         return new ICmpInst(Pred, BOp0, BOp1);
3426       else if (BO->hasOneUse()) {
3427         OverflowResult OR = computeOverflow(
3428             Instruction::Mul, BO->getOpcode() == Instruction::SDiv, BOp1,
3429             Cmp.getOperand(1), BO);
3430         if (OR == OverflowResult::NeverOverflows) {
3431           Value *YC =
3432               Builder.CreateMul(BOp1, ConstantInt::get(BO->getType(), C));
3433           return new ICmpInst(Pred, YC, BOp0);
3434         }
3435       }
3436     }
3437     if (BO->getOpcode() == Instruction::UDiv && C.isZero()) {
3438       // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
3439       auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
3440       return new ICmpInst(NewPred, BOp1, BOp0);
3441     }
3442     break;
3443   default:
3444     break;
3445   }
3446   return nullptr;
3447 }
3448 
3449 static Instruction *foldCtpopPow2Test(ICmpInst &I, IntrinsicInst *CtpopLhs,
3450                                       const APInt &CRhs,
3451                                       InstCombiner::BuilderTy &Builder,
3452                                       const SimplifyQuery &Q) {
3453   assert(CtpopLhs->getIntrinsicID() == Intrinsic::ctpop &&
3454          "Non-ctpop intrin in ctpop fold");
3455   if (!CtpopLhs->hasOneUse())
3456     return nullptr;
3457 
3458   // Power of 2 test:
3459   //    isPow2OrZero : ctpop(X) u< 2
3460   //    isPow2       : ctpop(X) == 1
3461   //    NotPow2OrZero: ctpop(X) u> 1
3462   //    NotPow2      : ctpop(X) != 1
3463   // If we know any bit of X can be folded to:
3464   //    IsPow2       : X & (~Bit) == 0
3465   //    NotPow2      : X & (~Bit) != 0
3466   const ICmpInst::Predicate Pred = I.getPredicate();
3467   if (((I.isEquality() || Pred == ICmpInst::ICMP_UGT) && CRhs == 1) ||
3468       (Pred == ICmpInst::ICMP_ULT && CRhs == 2)) {
3469     Value *Op = CtpopLhs->getArgOperand(0);
3470     KnownBits OpKnown = computeKnownBits(Op, Q.DL,
3471                                          /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT);
3472     // No need to check for count > 1, that should be already constant folded.
3473     if (OpKnown.countMinPopulation() == 1) {
3474       Value *And = Builder.CreateAnd(
3475           Op, Constant::getIntegerValue(Op->getType(), ~(OpKnown.One)));
3476       return new ICmpInst(
3477           (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_ULT)
3478               ? ICmpInst::ICMP_EQ
3479               : ICmpInst::ICMP_NE,
3480           And, Constant::getNullValue(Op->getType()));
3481     }
3482   }
3483 
3484   return nullptr;
3485 }
3486 
3487 /// Fold an equality icmp with LLVM intrinsic and constant operand.
3488 Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
3489     ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) {
3490   Type *Ty = II->getType();
3491   unsigned BitWidth = C.getBitWidth();
3492   const ICmpInst::Predicate Pred = Cmp.getPredicate();
3493 
3494   switch (II->getIntrinsicID()) {
3495   case Intrinsic::abs:
3496     // abs(A) == 0  ->  A == 0
3497     // abs(A) == INT_MIN  ->  A == INT_MIN
3498     if (C.isZero() || C.isMinSignedValue())
3499       return new ICmpInst(Pred, II->getArgOperand(0), ConstantInt::get(Ty, C));
3500     break;
3501 
3502   case Intrinsic::bswap:
3503     // bswap(A) == C  ->  A == bswap(C)
3504     return new ICmpInst(Pred, II->getArgOperand(0),
3505                         ConstantInt::get(Ty, C.byteSwap()));
3506 
3507   case Intrinsic::bitreverse:
3508     // bitreverse(A) == C  ->  A == bitreverse(C)
3509     return new ICmpInst(Pred, II->getArgOperand(0),
3510                         ConstantInt::get(Ty, C.reverseBits()));
3511 
3512   case Intrinsic::ctlz:
3513   case Intrinsic::cttz: {
3514     // ctz(A) == bitwidth(A)  ->  A == 0 and likewise for !=
3515     if (C == BitWidth)
3516       return new ICmpInst(Pred, II->getArgOperand(0),
3517                           ConstantInt::getNullValue(Ty));
3518 
3519     // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set
3520     // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits.
3521     // Limit to one use to ensure we don't increase instruction count.
3522     unsigned Num = C.getLimitedValue(BitWidth);
3523     if (Num != BitWidth && II->hasOneUse()) {
3524       bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz;
3525       APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1)
3526                                : APInt::getHighBitsSet(BitWidth, Num + 1);
3527       APInt Mask2 = IsTrailing
3528         ? APInt::getOneBitSet(BitWidth, Num)
3529         : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
3530       return new ICmpInst(Pred, Builder.CreateAnd(II->getArgOperand(0), Mask1),
3531                           ConstantInt::get(Ty, Mask2));
3532     }
3533     break;
3534   }
3535 
3536   case Intrinsic::ctpop: {
3537     // popcount(A) == 0  ->  A == 0 and likewise for !=
3538     // popcount(A) == bitwidth(A)  ->  A == -1 and likewise for !=
3539     bool IsZero = C.isZero();
3540     if (IsZero || C == BitWidth)
3541       return new ICmpInst(Pred, II->getArgOperand(0),
3542                           IsZero ? Constant::getNullValue(Ty)
3543                                  : Constant::getAllOnesValue(Ty));
3544 
3545     break;
3546   }
3547 
3548   case Intrinsic::fshl:
3549   case Intrinsic::fshr:
3550     if (II->getArgOperand(0) == II->getArgOperand(1)) {
3551       const APInt *RotAmtC;
3552       // ror(X, RotAmtC) == C --> X == rol(C, RotAmtC)
3553       // rol(X, RotAmtC) == C --> X == ror(C, RotAmtC)
3554       if (match(II->getArgOperand(2), m_APInt(RotAmtC)))
3555         return new ICmpInst(Pred, II->getArgOperand(0),
3556                             II->getIntrinsicID() == Intrinsic::fshl
3557                                 ? ConstantInt::get(Ty, C.rotr(*RotAmtC))
3558                                 : ConstantInt::get(Ty, C.rotl(*RotAmtC)));
3559     }
3560     break;
3561 
3562   case Intrinsic::umax:
3563   case Intrinsic::uadd_sat: {
3564     // uadd.sat(a, b) == 0  ->  (a | b) == 0
3565     // umax(a, b) == 0  ->  (a | b) == 0
3566     if (C.isZero() && II->hasOneUse()) {
3567       Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1));
3568       return new ICmpInst(Pred, Or, Constant::getNullValue(Ty));
3569     }
3570     break;
3571   }
3572 
3573   case Intrinsic::ssub_sat:
3574     // ssub.sat(a, b) == 0 -> a == b
3575     if (C.isZero())
3576       return new ICmpInst(Pred, II->getArgOperand(0), II->getArgOperand(1));
3577     break;
3578   case Intrinsic::usub_sat: {
3579     // usub.sat(a, b) == 0  ->  a <= b
3580     if (C.isZero()) {
3581       ICmpInst::Predicate NewPred =
3582           Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
3583       return new ICmpInst(NewPred, II->getArgOperand(0), II->getArgOperand(1));
3584     }
3585     break;
3586   }
3587   default:
3588     break;
3589   }
3590 
3591   return nullptr;
3592 }
3593 
3594 /// Fold an icmp with LLVM intrinsics
3595 static Instruction *
3596 foldICmpIntrinsicWithIntrinsic(ICmpInst &Cmp,
3597                                InstCombiner::BuilderTy &Builder) {
3598   assert(Cmp.isEquality());
3599 
3600   ICmpInst::Predicate Pred = Cmp.getPredicate();
3601   Value *Op0 = Cmp.getOperand(0);
3602   Value *Op1 = Cmp.getOperand(1);
3603   const auto *IIOp0 = dyn_cast<IntrinsicInst>(Op0);
3604   const auto *IIOp1 = dyn_cast<IntrinsicInst>(Op1);
3605   if (!IIOp0 || !IIOp1 || IIOp0->getIntrinsicID() != IIOp1->getIntrinsicID())
3606     return nullptr;
3607 
3608   switch (IIOp0->getIntrinsicID()) {
3609   case Intrinsic::bswap:
3610   case Intrinsic::bitreverse:
3611     // If both operands are byte-swapped or bit-reversed, just compare the
3612     // original values.
3613     return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0));
3614   case Intrinsic::fshl:
3615   case Intrinsic::fshr: {
3616     // If both operands are rotated by same amount, just compare the
3617     // original values.
3618     if (IIOp0->getOperand(0) != IIOp0->getOperand(1))
3619       break;
3620     if (IIOp1->getOperand(0) != IIOp1->getOperand(1))
3621       break;
3622     if (IIOp0->getOperand(2) == IIOp1->getOperand(2))
3623       return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0));
3624 
3625     // rotate(X, AmtX) == rotate(Y, AmtY)
3626     //  -> rotate(X, AmtX - AmtY) == Y
3627     // Do this if either both rotates have one use or if only one has one use
3628     // and AmtX/AmtY are constants.
3629     unsigned OneUses = IIOp0->hasOneUse() + IIOp1->hasOneUse();
3630     if (OneUses == 2 ||
3631         (OneUses == 1 && match(IIOp0->getOperand(2), m_ImmConstant()) &&
3632          match(IIOp1->getOperand(2), m_ImmConstant()))) {
3633       Value *SubAmt =
3634           Builder.CreateSub(IIOp0->getOperand(2), IIOp1->getOperand(2));
3635       Value *CombinedRotate = Builder.CreateIntrinsic(
3636           Op0->getType(), IIOp0->getIntrinsicID(),
3637           {IIOp0->getOperand(0), IIOp0->getOperand(0), SubAmt});
3638       return new ICmpInst(Pred, IIOp1->getOperand(0), CombinedRotate);
3639     }
3640   } break;
3641   default:
3642     break;
3643   }
3644 
3645   return nullptr;
3646 }
3647 
3648 /// Try to fold integer comparisons with a constant operand: icmp Pred X, C
3649 /// where X is some kind of instruction and C is AllowUndef.
3650 /// TODO: Move more folds which allow undef to this function.
3651 Instruction *
3652 InstCombinerImpl::foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp,
3653                                                      const APInt &C) {
3654   const ICmpInst::Predicate Pred = Cmp.getPredicate();
3655   if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0))) {
3656     switch (II->getIntrinsicID()) {
3657     default:
3658       break;
3659     case Intrinsic::fshl:
3660     case Intrinsic::fshr:
3661       if (Cmp.isEquality() && II->getArgOperand(0) == II->getArgOperand(1)) {
3662         // (rot X, ?) == 0/-1 --> X == 0/-1
3663         if (C.isZero() || C.isAllOnes())
3664           return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1));
3665       }
3666       break;
3667     }
3668   }
3669 
3670   return nullptr;
3671 }
3672 
3673 /// Fold an icmp with BinaryOp and constant operand: icmp Pred BO, C.
3674 Instruction *InstCombinerImpl::foldICmpBinOpWithConstant(ICmpInst &Cmp,
3675                                                          BinaryOperator *BO,
3676                                                          const APInt &C) {
3677   switch (BO->getOpcode()) {
3678   case Instruction::Xor:
3679     if (Instruction *I = foldICmpXorConstant(Cmp, BO, C))
3680       return I;
3681     break;
3682   case Instruction::And:
3683     if (Instruction *I = foldICmpAndConstant(Cmp, BO, C))
3684       return I;
3685     break;
3686   case Instruction::Or:
3687     if (Instruction *I = foldICmpOrConstant(Cmp, BO, C))
3688       return I;
3689     break;
3690   case Instruction::Mul:
3691     if (Instruction *I = foldICmpMulConstant(Cmp, BO, C))
3692       return I;
3693     break;
3694   case Instruction::Shl:
3695     if (Instruction *I = foldICmpShlConstant(Cmp, BO, C))
3696       return I;
3697     break;
3698   case Instruction::LShr:
3699   case Instruction::AShr:
3700     if (Instruction *I = foldICmpShrConstant(Cmp, BO, C))
3701       return I;
3702     break;
3703   case Instruction::SRem:
3704     if (Instruction *I = foldICmpSRemConstant(Cmp, BO, C))
3705       return I;
3706     break;
3707   case Instruction::UDiv:
3708     if (Instruction *I = foldICmpUDivConstant(Cmp, BO, C))
3709       return I;
3710     [[fallthrough]];
3711   case Instruction::SDiv:
3712     if (Instruction *I = foldICmpDivConstant(Cmp, BO, C))
3713       return I;
3714     break;
3715   case Instruction::Sub:
3716     if (Instruction *I = foldICmpSubConstant(Cmp, BO, C))
3717       return I;
3718     break;
3719   case Instruction::Add:
3720     if (Instruction *I = foldICmpAddConstant(Cmp, BO, C))
3721       return I;
3722     break;
3723   default:
3724     break;
3725   }
3726 
3727   // TODO: These folds could be refactored to be part of the above calls.
3728   return foldICmpBinOpEqualityWithConstant(Cmp, BO, C);
3729 }
3730 
3731 static Instruction *
3732 foldICmpUSubSatOrUAddSatWithConstant(ICmpInst::Predicate Pred,
3733                                      SaturatingInst *II, const APInt &C,
3734                                      InstCombiner::BuilderTy &Builder) {
3735   // This transform may end up producing more than one instruction for the
3736   // intrinsic, so limit it to one user of the intrinsic.
3737   if (!II->hasOneUse())
3738     return nullptr;
3739 
3740   // Let Y        = [add/sub]_sat(X, C) pred C2
3741   //     SatVal   = The saturating value for the operation
3742   //     WillWrap = Whether or not the operation will underflow / overflow
3743   // => Y = (WillWrap ? SatVal : (X binop C)) pred C2
3744   // => Y = WillWrap ? (SatVal pred C2) : ((X binop C) pred C2)
3745   //
3746   // When (SatVal pred C2) is true, then
3747   //    Y = WillWrap ? true : ((X binop C) pred C2)
3748   // => Y = WillWrap || ((X binop C) pred C2)
3749   // else
3750   //    Y =  WillWrap ? false : ((X binop C) pred C2)
3751   // => Y = !WillWrap ?  ((X binop C) pred C2) : false
3752   // => Y = !WillWrap && ((X binop C) pred C2)
3753   Value *Op0 = II->getOperand(0);
3754   Value *Op1 = II->getOperand(1);
3755 
3756   const APInt *COp1;
3757   // This transform only works when the intrinsic has an integral constant or
3758   // splat vector as the second operand.
3759   if (!match(Op1, m_APInt(COp1)))
3760     return nullptr;
3761 
3762   APInt SatVal;
3763   switch (II->getIntrinsicID()) {
3764   default:
3765     llvm_unreachable(
3766         "This function only works with usub_sat and uadd_sat for now!");
3767   case Intrinsic::uadd_sat:
3768     SatVal = APInt::getAllOnes(C.getBitWidth());
3769     break;
3770   case Intrinsic::usub_sat:
3771     SatVal = APInt::getZero(C.getBitWidth());
3772     break;
3773   }
3774 
3775   // Check (SatVal pred C2)
3776   bool SatValCheck = ICmpInst::compare(SatVal, C, Pred);
3777 
3778   // !WillWrap.
3779   ConstantRange C1 = ConstantRange::makeExactNoWrapRegion(
3780       II->getBinaryOp(), *COp1, II->getNoWrapKind());
3781 
3782   // WillWrap.
3783   if (SatValCheck)
3784     C1 = C1.inverse();
3785 
3786   ConstantRange C2 = ConstantRange::makeExactICmpRegion(Pred, C);
3787   if (II->getBinaryOp() == Instruction::Add)
3788     C2 = C2.sub(*COp1);
3789   else
3790     C2 = C2.add(*COp1);
3791 
3792   Instruction::BinaryOps CombiningOp =
3793       SatValCheck ? Instruction::BinaryOps::Or : Instruction::BinaryOps::And;
3794 
3795   std::optional<ConstantRange> Combination;
3796   if (CombiningOp == Instruction::BinaryOps::Or)
3797     Combination = C1.exactUnionWith(C2);
3798   else /* CombiningOp == Instruction::BinaryOps::And */
3799     Combination = C1.exactIntersectWith(C2);
3800 
3801   if (!Combination)
3802     return nullptr;
3803 
3804   CmpInst::Predicate EquivPred;
3805   APInt EquivInt;
3806   APInt EquivOffset;
3807 
3808   Combination->getEquivalentICmp(EquivPred, EquivInt, EquivOffset);
3809 
3810   return new ICmpInst(
3811       EquivPred,
3812       Builder.CreateAdd(Op0, ConstantInt::get(Op1->getType(), EquivOffset)),
3813       ConstantInt::get(Op1->getType(), EquivInt));
3814 }
3815 
3816 /// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
3817 Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
3818                                                              IntrinsicInst *II,
3819                                                              const APInt &C) {
3820   ICmpInst::Predicate Pred = Cmp.getPredicate();
3821 
3822   // Handle folds that apply for any kind of icmp.
3823   switch (II->getIntrinsicID()) {
3824   default:
3825     break;
3826   case Intrinsic::uadd_sat:
3827   case Intrinsic::usub_sat:
3828     if (auto *Folded = foldICmpUSubSatOrUAddSatWithConstant(
3829             Pred, cast<SaturatingInst>(II), C, Builder))
3830       return Folded;
3831     break;
3832   case Intrinsic::ctpop: {
3833     const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
3834     if (Instruction *R = foldCtpopPow2Test(Cmp, II, C, Builder, Q))
3835       return R;
3836   } break;
3837   }
3838 
3839   if (Cmp.isEquality())
3840     return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
3841 
3842   Type *Ty = II->getType();
3843   unsigned BitWidth = C.getBitWidth();
3844   switch (II->getIntrinsicID()) {
3845   case Intrinsic::ctpop: {
3846     // (ctpop X > BitWidth - 1) --> X == -1
3847     Value *X = II->getArgOperand(0);
3848     if (C == BitWidth - 1 && Pred == ICmpInst::ICMP_UGT)
3849       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, X,
3850                              ConstantInt::getAllOnesValue(Ty));
3851     // (ctpop X < BitWidth) --> X != -1
3852     if (C == BitWidth && Pred == ICmpInst::ICMP_ULT)
3853       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, X,
3854                              ConstantInt::getAllOnesValue(Ty));
3855     break;
3856   }
3857   case Intrinsic::ctlz: {
3858     // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000
3859     if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
3860       unsigned Num = C.getLimitedValue();
3861       APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
3862       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT,
3863                              II->getArgOperand(0), ConstantInt::get(Ty, Limit));
3864     }
3865 
3866     // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111
3867     if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) {
3868       unsigned Num = C.getLimitedValue();
3869       APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num);
3870       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT,
3871                              II->getArgOperand(0), ConstantInt::get(Ty, Limit));
3872     }
3873     break;
3874   }
3875   case Intrinsic::cttz: {
3876     // Limit to one use to ensure we don't increase instruction count.
3877     if (!II->hasOneUse())
3878       return nullptr;
3879 
3880     // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0
3881     if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
3882       APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1);
3883       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ,
3884                              Builder.CreateAnd(II->getArgOperand(0), Mask),
3885                              ConstantInt::getNullValue(Ty));
3886     }
3887 
3888     // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0
3889     if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) {
3890       APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue());
3891       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE,
3892                              Builder.CreateAnd(II->getArgOperand(0), Mask),
3893                              ConstantInt::getNullValue(Ty));
3894     }
3895     break;
3896   }
3897   case Intrinsic::ssub_sat:
3898     // ssub.sat(a, b) spred 0 -> a spred b
3899     if (ICmpInst::isSigned(Pred)) {
3900       if (C.isZero())
3901         return new ICmpInst(Pred, II->getArgOperand(0), II->getArgOperand(1));
3902       // X s<= 0 is cannonicalized to X s< 1
3903       if (Pred == ICmpInst::ICMP_SLT && C.isOne())
3904         return new ICmpInst(ICmpInst::ICMP_SLE, II->getArgOperand(0),
3905                             II->getArgOperand(1));
3906       // X s>= 0 is cannonicalized to X s> -1
3907       if (Pred == ICmpInst::ICMP_SGT && C.isAllOnes())
3908         return new ICmpInst(ICmpInst::ICMP_SGE, II->getArgOperand(0),
3909                             II->getArgOperand(1));
3910     }
3911     break;
3912   default:
3913     break;
3914   }
3915 
3916   return nullptr;
3917 }
3918 
3919 /// Handle icmp with constant (but not simple integer constant) RHS.
3920 Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) {
3921   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3922   Constant *RHSC = dyn_cast<Constant>(Op1);
3923   Instruction *LHSI = dyn_cast<Instruction>(Op0);
3924   if (!RHSC || !LHSI)
3925     return nullptr;
3926 
3927   switch (LHSI->getOpcode()) {
3928   case Instruction::PHI:
3929     if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
3930       return NV;
3931     break;
3932   case Instruction::IntToPtr:
3933     // icmp pred inttoptr(X), null -> icmp pred X, 0
3934     if (RHSC->isNullValue() &&
3935         DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
3936       return new ICmpInst(
3937           I.getPredicate(), LHSI->getOperand(0),
3938           Constant::getNullValue(LHSI->getOperand(0)->getType()));
3939     break;
3940 
3941   case Instruction::Load:
3942     // Try to optimize things like "A[i] > 4" to index computations.
3943     if (GetElementPtrInst *GEP =
3944             dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
3945       if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
3946         if (Instruction *Res =
3947                 foldCmpLoadFromIndexedGlobal(cast<LoadInst>(LHSI), GEP, GV, I))
3948           return Res;
3949     break;
3950   }
3951 
3952   return nullptr;
3953 }
3954 
3955 Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
3956                                               SelectInst *SI, Value *RHS,
3957                                               const ICmpInst &I) {
3958   // Try to fold the comparison into the select arms, which will cause the
3959   // select to be converted into a logical and/or.
3960   auto SimplifyOp = [&](Value *Op, bool SelectCondIsTrue) -> Value * {
3961     if (Value *Res = simplifyICmpInst(Pred, Op, RHS, SQ))
3962       return Res;
3963     if (std::optional<bool> Impl = isImpliedCondition(
3964             SI->getCondition(), Pred, Op, RHS, DL, SelectCondIsTrue))
3965       return ConstantInt::get(I.getType(), *Impl);
3966     return nullptr;
3967   };
3968 
3969   ConstantInt *CI = nullptr;
3970   Value *Op1 = SimplifyOp(SI->getOperand(1), true);
3971   if (Op1)
3972     CI = dyn_cast<ConstantInt>(Op1);
3973 
3974   Value *Op2 = SimplifyOp(SI->getOperand(2), false);
3975   if (Op2)
3976     CI = dyn_cast<ConstantInt>(Op2);
3977 
3978   // We only want to perform this transformation if it will not lead to
3979   // additional code. This is true if either both sides of the select
3980   // fold to a constant (in which case the icmp is replaced with a select
3981   // which will usually simplify) or this is the only user of the
3982   // select (in which case we are trading a select+icmp for a simpler
3983   // select+icmp) or all uses of the select can be replaced based on
3984   // dominance information ("Global cases").
3985   bool Transform = false;
3986   if (Op1 && Op2)
3987     Transform = true;
3988   else if (Op1 || Op2) {
3989     // Local case
3990     if (SI->hasOneUse())
3991       Transform = true;
3992     // Global cases
3993     else if (CI && !CI->isZero())
3994       // When Op1 is constant try replacing select with second operand.
3995       // Otherwise Op2 is constant and try replacing select with first
3996       // operand.
3997       Transform = replacedSelectWithOperand(SI, &I, Op1 ? 2 : 1);
3998   }
3999   if (Transform) {
4000     if (!Op1)
4001       Op1 = Builder.CreateICmp(Pred, SI->getOperand(1), RHS, I.getName());
4002     if (!Op2)
4003       Op2 = Builder.CreateICmp(Pred, SI->getOperand(2), RHS, I.getName());
4004     return SelectInst::Create(SI->getOperand(0), Op1, Op2);
4005   }
4006 
4007   return nullptr;
4008 }
4009 
4010 /// Some comparisons can be simplified.
4011 /// In this case, we are looking for comparisons that look like
4012 /// a check for a lossy truncation.
4013 /// Folds:
4014 ///   icmp SrcPred (x & Mask), x    to    icmp DstPred x, Mask
4015 /// Where Mask is some pattern that produces all-ones in low bits:
4016 ///    (-1 >> y)
4017 ///    ((-1 << y) >> y)     <- non-canonical, has extra uses
4018 ///   ~(-1 << y)
4019 ///    ((1 << y) + (-1))    <- non-canonical, has extra uses
4020 /// The Mask can be a constant, too.
4021 /// For some predicates, the operands are commutative.
4022 /// For others, x can only be on a specific side.
4023 static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
4024                                           InstCombiner::BuilderTy &Builder) {
4025   ICmpInst::Predicate SrcPred;
4026   Value *X, *M, *Y;
4027   auto m_VariableMask = m_CombineOr(
4028       m_CombineOr(m_Not(m_Shl(m_AllOnes(), m_Value())),
4029                   m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())),
4030       m_CombineOr(m_LShr(m_AllOnes(), m_Value()),
4031                   m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y))));
4032   auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask());
4033   if (!match(&I, m_c_ICmp(SrcPred,
4034                           m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)),
4035                           m_Deferred(X))))
4036     return nullptr;
4037 
4038   ICmpInst::Predicate DstPred;
4039   switch (SrcPred) {
4040   case ICmpInst::Predicate::ICMP_EQ:
4041     //  x & (-1 >> y) == x    ->    x u<= (-1 >> y)
4042     DstPred = ICmpInst::Predicate::ICMP_ULE;
4043     break;
4044   case ICmpInst::Predicate::ICMP_NE:
4045     //  x & (-1 >> y) != x    ->    x u> (-1 >> y)
4046     DstPred = ICmpInst::Predicate::ICMP_UGT;
4047     break;
4048   case ICmpInst::Predicate::ICMP_ULT:
4049     //  x & (-1 >> y) u< x    ->    x u> (-1 >> y)
4050     //  x u> x & (-1 >> y)    ->    x u> (-1 >> y)
4051     DstPred = ICmpInst::Predicate::ICMP_UGT;
4052     break;
4053   case ICmpInst::Predicate::ICMP_UGE:
4054     //  x & (-1 >> y) u>= x    ->    x u<= (-1 >> y)
4055     //  x u<= x & (-1 >> y)    ->    x u<= (-1 >> y)
4056     DstPred = ICmpInst::Predicate::ICMP_ULE;
4057     break;
4058   case ICmpInst::Predicate::ICMP_SLT:
4059     //  x & (-1 >> y) s< x    ->    x s> (-1 >> y)
4060     //  x s> x & (-1 >> y)    ->    x s> (-1 >> y)
4061     if (!match(M, m_Constant())) // Can not do this fold with non-constant.
4062       return nullptr;
4063     if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
4064       return nullptr;
4065     DstPred = ICmpInst::Predicate::ICMP_SGT;
4066     break;
4067   case ICmpInst::Predicate::ICMP_SGE:
4068     //  x & (-1 >> y) s>= x    ->    x s<= (-1 >> y)
4069     //  x s<= x & (-1 >> y)    ->    x s<= (-1 >> y)
4070     if (!match(M, m_Constant())) // Can not do this fold with non-constant.
4071       return nullptr;
4072     if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
4073       return nullptr;
4074     DstPred = ICmpInst::Predicate::ICMP_SLE;
4075     break;
4076   case ICmpInst::Predicate::ICMP_SGT:
4077   case ICmpInst::Predicate::ICMP_SLE:
4078     return nullptr;
4079   case ICmpInst::Predicate::ICMP_UGT:
4080   case ICmpInst::Predicate::ICMP_ULE:
4081     llvm_unreachable("Instsimplify took care of commut. variant");
4082     break;
4083   default:
4084     llvm_unreachable("All possible folds are handled.");
4085   }
4086 
4087   // The mask value may be a vector constant that has undefined elements. But it
4088   // may not be safe to propagate those undefs into the new compare, so replace
4089   // those elements by copying an existing, defined, and safe scalar constant.
4090   Type *OpTy = M->getType();
4091   auto *VecC = dyn_cast<Constant>(M);
4092   auto *OpVTy = dyn_cast<FixedVectorType>(OpTy);
4093   if (OpVTy && VecC && VecC->containsUndefOrPoisonElement()) {
4094     Constant *SafeReplacementConstant = nullptr;
4095     for (unsigned i = 0, e = OpVTy->getNumElements(); i != e; ++i) {
4096       if (!isa<UndefValue>(VecC->getAggregateElement(i))) {
4097         SafeReplacementConstant = VecC->getAggregateElement(i);
4098         break;
4099       }
4100     }
4101     assert(SafeReplacementConstant && "Failed to find undef replacement");
4102     M = Constant::replaceUndefsWith(VecC, SafeReplacementConstant);
4103   }
4104 
4105   return Builder.CreateICmp(DstPred, X, M);
4106 }
4107 
4108 /// Some comparisons can be simplified.
4109 /// In this case, we are looking for comparisons that look like
4110 /// a check for a lossy signed truncation.
4111 /// Folds:   (MaskedBits is a constant.)
4112 ///   ((%x << MaskedBits) a>> MaskedBits) SrcPred %x
4113 /// Into:
4114 ///   (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits)
4115 /// Where  KeptBits = bitwidth(%x) - MaskedBits
4116 static Value *
4117 foldICmpWithTruncSignExtendedVal(ICmpInst &I,
4118                                  InstCombiner::BuilderTy &Builder) {
4119   ICmpInst::Predicate SrcPred;
4120   Value *X;
4121   const APInt *C0, *C1; // FIXME: non-splats, potentially with undef.
4122   // We are ok with 'shl' having multiple uses, but 'ashr' must be one-use.
4123   if (!match(&I, m_c_ICmp(SrcPred,
4124                           m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)),
4125                                           m_APInt(C1))),
4126                           m_Deferred(X))))
4127     return nullptr;
4128 
4129   // Potential handling of non-splats: for each element:
4130   //  * if both are undef, replace with constant 0.
4131   //    Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0.
4132   //  * if both are not undef, and are different, bailout.
4133   //  * else, only one is undef, then pick the non-undef one.
4134 
4135   // The shift amount must be equal.
4136   if (*C0 != *C1)
4137     return nullptr;
4138   const APInt &MaskedBits = *C0;
4139   assert(MaskedBits != 0 && "shift by zero should be folded away already.");
4140 
4141   ICmpInst::Predicate DstPred;
4142   switch (SrcPred) {
4143   case ICmpInst::Predicate::ICMP_EQ:
4144     // ((%x << MaskedBits) a>> MaskedBits) == %x
4145     //   =>
4146     // (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits)
4147     DstPred = ICmpInst::Predicate::ICMP_ULT;
4148     break;
4149   case ICmpInst::Predicate::ICMP_NE:
4150     // ((%x << MaskedBits) a>> MaskedBits) != %x
4151     //   =>
4152     // (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits)
4153     DstPred = ICmpInst::Predicate::ICMP_UGE;
4154     break;
4155   // FIXME: are more folds possible?
4156   default:
4157     return nullptr;
4158   }
4159 
4160   auto *XType = X->getType();
4161   const unsigned XBitWidth = XType->getScalarSizeInBits();
4162   const APInt BitWidth = APInt(XBitWidth, XBitWidth);
4163   assert(BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched");
4164 
4165   // KeptBits = bitwidth(%x) - MaskedBits
4166   const APInt KeptBits = BitWidth - MaskedBits;
4167   assert(KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable");
4168   // ICmpCst = (1 << KeptBits)
4169   const APInt ICmpCst = APInt(XBitWidth, 1).shl(KeptBits);
4170   assert(ICmpCst.isPowerOf2());
4171   // AddCst = (1 << (KeptBits-1))
4172   const APInt AddCst = ICmpCst.lshr(1);
4173   assert(AddCst.ult(ICmpCst) && AddCst.isPowerOf2());
4174 
4175   // T0 = add %x, AddCst
4176   Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst));
4177   // T1 = T0 DstPred ICmpCst
4178   Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst));
4179 
4180   return T1;
4181 }
4182 
4183 // Given pattern:
4184 //   icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
4185 // we should move shifts to the same hand of 'and', i.e. rewrite as
4186 //   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
4187 // We are only interested in opposite logical shifts here.
4188 // One of the shifts can be truncated.
4189 // If we can, we want to end up creating 'lshr' shift.
4190 static Value *
4191 foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
4192                                            InstCombiner::BuilderTy &Builder) {
4193   if (!I.isEquality() || !match(I.getOperand(1), m_Zero()) ||
4194       !I.getOperand(0)->hasOneUse())
4195     return nullptr;
4196 
4197   auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value());
4198 
4199   // Look for an 'and' of two logical shifts, one of which may be truncated.
4200   // We use m_TruncOrSelf() on the RHS to correctly handle commutative case.
4201   Instruction *XShift, *MaybeTruncation, *YShift;
4202   if (!match(
4203           I.getOperand(0),
4204           m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)),
4205                   m_CombineAnd(m_TruncOrSelf(m_CombineAnd(
4206                                    m_AnyLogicalShift, m_Instruction(YShift))),
4207                                m_Instruction(MaybeTruncation)))))
4208     return nullptr;
4209 
4210   // We potentially looked past 'trunc', but only when matching YShift,
4211   // therefore YShift must have the widest type.
4212   Instruction *WidestShift = YShift;
4213   // Therefore XShift must have the shallowest type.
4214   // Or they both have identical types if there was no truncation.
4215   Instruction *NarrowestShift = XShift;
4216 
4217   Type *WidestTy = WidestShift->getType();
4218   Type *NarrowestTy = NarrowestShift->getType();
4219   assert(NarrowestTy == I.getOperand(0)->getType() &&
4220          "We did not look past any shifts while matching XShift though.");
4221   bool HadTrunc = WidestTy != I.getOperand(0)->getType();
4222 
4223   // If YShift is a 'lshr', swap the shifts around.
4224   if (match(YShift, m_LShr(m_Value(), m_Value())))
4225     std::swap(XShift, YShift);
4226 
4227   // The shifts must be in opposite directions.
4228   auto XShiftOpcode = XShift->getOpcode();
4229   if (XShiftOpcode == YShift->getOpcode())
4230     return nullptr; // Do not care about same-direction shifts here.
4231 
4232   Value *X, *XShAmt, *Y, *YShAmt;
4233   match(XShift, m_BinOp(m_Value(X), m_ZExtOrSelf(m_Value(XShAmt))));
4234   match(YShift, m_BinOp(m_Value(Y), m_ZExtOrSelf(m_Value(YShAmt))));
4235 
4236   // If one of the values being shifted is a constant, then we will end with
4237   // and+icmp, and [zext+]shift instrs will be constant-folded. If they are not,
4238   // however, we will need to ensure that we won't increase instruction count.
4239   if (!isa<Constant>(X) && !isa<Constant>(Y)) {
4240     // At least one of the hands of the 'and' should be one-use shift.
4241     if (!match(I.getOperand(0),
4242                m_c_And(m_OneUse(m_AnyLogicalShift), m_Value())))
4243       return nullptr;
4244     if (HadTrunc) {
4245       // Due to the 'trunc', we will need to widen X. For that either the old
4246       // 'trunc' or the shift amt in the non-truncated shift should be one-use.
4247       if (!MaybeTruncation->hasOneUse() &&
4248           !NarrowestShift->getOperand(1)->hasOneUse())
4249         return nullptr;
4250     }
4251   }
4252 
4253   // We have two shift amounts from two different shifts. The types of those
4254   // shift amounts may not match. If that's the case let's bailout now.
4255   if (XShAmt->getType() != YShAmt->getType())
4256     return nullptr;
4257 
4258   // As input, we have the following pattern:
4259   //   icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
4260   // We want to rewrite that as:
4261   //   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
4262   // While we know that originally (Q+K) would not overflow
4263   // (because  2 * (N-1) u<= iN -1), we have looked past extensions of
4264   // shift amounts. so it may now overflow in smaller bitwidth.
4265   // To ensure that does not happen, we need to ensure that the total maximal
4266   // shift amount is still representable in that smaller bit width.
4267   unsigned MaximalPossibleTotalShiftAmount =
4268       (WidestTy->getScalarSizeInBits() - 1) +
4269       (NarrowestTy->getScalarSizeInBits() - 1);
4270   APInt MaximalRepresentableShiftAmount =
4271       APInt::getAllOnes(XShAmt->getType()->getScalarSizeInBits());
4272   if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
4273     return nullptr;
4274 
4275   // Can we fold (XShAmt+YShAmt) ?
4276   auto *NewShAmt = dyn_cast_or_null<Constant>(
4277       simplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false,
4278                       /*isNUW=*/false, SQ.getWithInstruction(&I)));
4279   if (!NewShAmt)
4280     return nullptr;
4281   if (NewShAmt->getType() != WidestTy) {
4282     NewShAmt =
4283         ConstantFoldCastOperand(Instruction::ZExt, NewShAmt, WidestTy, SQ.DL);
4284     if (!NewShAmt)
4285       return nullptr;
4286   }
4287   unsigned WidestBitWidth = WidestTy->getScalarSizeInBits();
4288 
4289   // Is the new shift amount smaller than the bit width?
4290   // FIXME: could also rely on ConstantRange.
4291   if (!match(NewShAmt,
4292              m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
4293                                 APInt(WidestBitWidth, WidestBitWidth))))
4294     return nullptr;
4295 
4296   // An extra legality check is needed if we had trunc-of-lshr.
4297   if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) {
4298     auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ,
4299                     WidestShift]() {
4300       // It isn't obvious whether it's worth it to analyze non-constants here.
4301       // Also, let's basically give up on non-splat cases, pessimizing vectors.
4302       // If *any* of these preconditions matches we can perform the fold.
4303       Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy()
4304                                     ? NewShAmt->getSplatValue()
4305                                     : NewShAmt;
4306       // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold.
4307       if (NewShAmtSplat &&
4308           (NewShAmtSplat->isNullValue() ||
4309            NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1))
4310         return true;
4311       // We consider *min* leading zeros so a single outlier
4312       // blocks the transform as opposed to allowing it.
4313       if (auto *C = dyn_cast<Constant>(NarrowestShift->getOperand(0))) {
4314         KnownBits Known = computeKnownBits(C, SQ.DL);
4315         unsigned MinLeadZero = Known.countMinLeadingZeros();
4316         // If the value being shifted has at most lowest bit set we can fold.
4317         unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
4318         if (MaxActiveBits <= 1)
4319           return true;
4320         // Precondition:  NewShAmt u<= countLeadingZeros(C)
4321         if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero))
4322           return true;
4323       }
4324       if (auto *C = dyn_cast<Constant>(WidestShift->getOperand(0))) {
4325         KnownBits Known = computeKnownBits(C, SQ.DL);
4326         unsigned MinLeadZero = Known.countMinLeadingZeros();
4327         // If the value being shifted has at most lowest bit set we can fold.
4328         unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
4329         if (MaxActiveBits <= 1)
4330           return true;
4331         // Precondition:  ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C)
4332         if (NewShAmtSplat) {
4333           APInt AdjNewShAmt =
4334               (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger();
4335           if (AdjNewShAmt.ule(MinLeadZero))
4336             return true;
4337         }
4338       }
4339       return false; // Can't tell if it's ok.
4340     };
4341     if (!CanFold())
4342       return nullptr;
4343   }
4344 
4345   // All good, we can do this fold.
4346   X = Builder.CreateZExt(X, WidestTy);
4347   Y = Builder.CreateZExt(Y, WidestTy);
4348   // The shift is the same that was for X.
4349   Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
4350                   ? Builder.CreateLShr(X, NewShAmt)
4351                   : Builder.CreateShl(X, NewShAmt);
4352   Value *T1 = Builder.CreateAnd(T0, Y);
4353   return Builder.CreateICmp(I.getPredicate(), T1,
4354                             Constant::getNullValue(WidestTy));
4355 }
4356 
4357 /// Fold
4358 ///   (-1 u/ x) u< y
4359 ///   ((x * y) ?/ x) != y
4360 /// to
4361 ///   @llvm.?mul.with.overflow(x, y) plus extraction of overflow bit
4362 /// Note that the comparison is commutative, while inverted (u>=, ==) predicate
4363 /// will mean that we are looking for the opposite answer.
4364 Value *InstCombinerImpl::foldMultiplicationOverflowCheck(ICmpInst &I) {
4365   ICmpInst::Predicate Pred;
4366   Value *X, *Y;
4367   Instruction *Mul;
4368   Instruction *Div;
4369   bool NeedNegation;
4370   // Look for: (-1 u/ x) u</u>= y
4371   if (!I.isEquality() &&
4372       match(&I, m_c_ICmp(Pred,
4373                          m_CombineAnd(m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))),
4374                                       m_Instruction(Div)),
4375                          m_Value(Y)))) {
4376     Mul = nullptr;
4377 
4378     // Are we checking that overflow does not happen, or does happen?
4379     switch (Pred) {
4380     case ICmpInst::Predicate::ICMP_ULT:
4381       NeedNegation = false;
4382       break; // OK
4383     case ICmpInst::Predicate::ICMP_UGE:
4384       NeedNegation = true;
4385       break; // OK
4386     default:
4387       return nullptr; // Wrong predicate.
4388     }
4389   } else // Look for: ((x * y) / x) !=/== y
4390       if (I.isEquality() &&
4391           match(&I,
4392                 m_c_ICmp(Pred, m_Value(Y),
4393                          m_CombineAnd(
4394                              m_OneUse(m_IDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y),
4395                                                                   m_Value(X)),
4396                                                           m_Instruction(Mul)),
4397                                              m_Deferred(X))),
4398                              m_Instruction(Div))))) {
4399     NeedNegation = Pred == ICmpInst::Predicate::ICMP_EQ;
4400   } else
4401     return nullptr;
4402 
4403   BuilderTy::InsertPointGuard Guard(Builder);
4404   // If the pattern included (x * y), we'll want to insert new instructions
4405   // right before that original multiplication so that we can replace it.
4406   bool MulHadOtherUses = Mul && !Mul->hasOneUse();
4407   if (MulHadOtherUses)
4408     Builder.SetInsertPoint(Mul);
4409 
4410   Function *F = Intrinsic::getDeclaration(I.getModule(),
4411                                           Div->getOpcode() == Instruction::UDiv
4412                                               ? Intrinsic::umul_with_overflow
4413                                               : Intrinsic::smul_with_overflow,
4414                                           X->getType());
4415   CallInst *Call = Builder.CreateCall(F, {X, Y}, "mul");
4416 
4417   // If the multiplication was used elsewhere, to ensure that we don't leave
4418   // "duplicate" instructions, replace uses of that original multiplication
4419   // with the multiplication result from the with.overflow intrinsic.
4420   if (MulHadOtherUses)
4421     replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "mul.val"));
4422 
4423   Value *Res = Builder.CreateExtractValue(Call, 1, "mul.ov");
4424   if (NeedNegation) // This technically increases instruction count.
4425     Res = Builder.CreateNot(Res, "mul.not.ov");
4426 
4427   // If we replaced the mul, erase it. Do this after all uses of Builder,
4428   // as the mul is used as insertion point.
4429   if (MulHadOtherUses)
4430     eraseInstFromFunction(*Mul);
4431 
4432   return Res;
4433 }
4434 
4435 static Instruction *foldICmpXNegX(ICmpInst &I,
4436                                   InstCombiner::BuilderTy &Builder) {
4437   CmpInst::Predicate Pred;
4438   Value *X;
4439   if (match(&I, m_c_ICmp(Pred, m_NSWNeg(m_Value(X)), m_Deferred(X)))) {
4440 
4441     if (ICmpInst::isSigned(Pred))
4442       Pred = ICmpInst::getSwappedPredicate(Pred);
4443     else if (ICmpInst::isUnsigned(Pred))
4444       Pred = ICmpInst::getSignedPredicate(Pred);
4445     // else for equality-comparisons just keep the predicate.
4446 
4447     return ICmpInst::Create(Instruction::ICmp, Pred, X,
4448                             Constant::getNullValue(X->getType()), I.getName());
4449   }
4450 
4451   // A value is not equal to its negation unless that value is 0 or
4452   // MinSignedValue, ie: a != -a --> (a & MaxSignedVal) != 0
4453   if (match(&I, m_c_ICmp(Pred, m_OneUse(m_Neg(m_Value(X))), m_Deferred(X))) &&
4454       ICmpInst::isEquality(Pred)) {
4455     Type *Ty = X->getType();
4456     uint32_t BitWidth = Ty->getScalarSizeInBits();
4457     Constant *MaxSignedVal =
4458         ConstantInt::get(Ty, APInt::getSignedMaxValue(BitWidth));
4459     Value *And = Builder.CreateAnd(X, MaxSignedVal);
4460     Constant *Zero = Constant::getNullValue(Ty);
4461     return CmpInst::Create(Instruction::ICmp, Pred, And, Zero);
4462   }
4463 
4464   return nullptr;
4465 }
4466 
4467 static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q,
4468                                   InstCombinerImpl &IC) {
4469   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
4470   // Normalize and operand as operand 0.
4471   CmpInst::Predicate Pred = I.getPredicate();
4472   if (match(Op1, m_c_And(m_Specific(Op0), m_Value()))) {
4473     std::swap(Op0, Op1);
4474     Pred = ICmpInst::getSwappedPredicate(Pred);
4475   }
4476 
4477   if (!match(Op0, m_c_And(m_Specific(Op1), m_Value(A))))
4478     return nullptr;
4479 
4480   // (icmp (X & Y) u< X --> (X & Y) != X
4481   if (Pred == ICmpInst::ICMP_ULT)
4482     return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4483 
4484   // (icmp (X & Y) u>= X --> (X & Y) == X
4485   if (Pred == ICmpInst::ICMP_UGE)
4486     return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4487 
4488   return nullptr;
4489 }
4490 
4491 static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
4492                                  InstCombinerImpl &IC) {
4493   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
4494 
4495   // Normalize or operand as operand 0.
4496   CmpInst::Predicate Pred = I.getPredicate();
4497   if (match(Op1, m_c_Or(m_Specific(Op0), m_Value(A)))) {
4498     std::swap(Op0, Op1);
4499     Pred = ICmpInst::getSwappedPredicate(Pred);
4500   } else if (!match(Op0, m_c_Or(m_Specific(Op1), m_Value(A)))) {
4501     return nullptr;
4502   }
4503 
4504   // icmp (X | Y) u<= X --> (X | Y) == X
4505   if (Pred == ICmpInst::ICMP_ULE)
4506     return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4507 
4508   // icmp (X | Y) u> X --> (X | Y) != X
4509   if (Pred == ICmpInst::ICMP_UGT)
4510     return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4511 
4512   if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) {
4513     // icmp (X | Y) eq/ne Y --> (X & ~Y) eq/ne 0 if Y is freely invertible
4514     if (Value *NotOp1 =
4515             IC.getFreelyInverted(Op1, Op1->hasOneUse(), &IC.Builder))
4516       return new ICmpInst(Pred, IC.Builder.CreateAnd(A, NotOp1),
4517                           Constant::getNullValue(Op1->getType()));
4518     // icmp (X | Y) eq/ne Y --> (~X | Y) eq/ne -1 if X  is freely invertible.
4519     if (Value *NotA = IC.getFreelyInverted(A, A->hasOneUse(), &IC.Builder))
4520       return new ICmpInst(Pred, IC.Builder.CreateOr(Op1, NotA),
4521                           Constant::getAllOnesValue(Op1->getType()));
4522   }
4523   return nullptr;
4524 }
4525 
4526 static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
4527                                   InstCombinerImpl &IC) {
4528   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
4529   // Normalize xor operand as operand 0.
4530   CmpInst::Predicate Pred = I.getPredicate();
4531   if (match(Op1, m_c_Xor(m_Specific(Op0), m_Value()))) {
4532     std::swap(Op0, Op1);
4533     Pred = ICmpInst::getSwappedPredicate(Pred);
4534   }
4535   if (!match(Op0, m_c_Xor(m_Specific(Op1), m_Value(A))))
4536     return nullptr;
4537 
4538   // icmp (X ^ Y_NonZero) u>= X --> icmp (X ^ Y_NonZero) u> X
4539   // icmp (X ^ Y_NonZero) u<= X --> icmp (X ^ Y_NonZero) u< X
4540   // icmp (X ^ Y_NonZero) s>= X --> icmp (X ^ Y_NonZero) s> X
4541   // icmp (X ^ Y_NonZero) s<= X --> icmp (X ^ Y_NonZero) s< X
4542   CmpInst::Predicate PredOut = CmpInst::getStrictPredicate(Pred);
4543   if (PredOut != Pred &&
4544       isKnownNonZero(A, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
4545     return new ICmpInst(PredOut, Op0, Op1);
4546 
4547   return nullptr;
4548 }
4549 
4550 /// Try to fold icmp (binop), X or icmp X, (binop).
4551 /// TODO: A large part of this logic is duplicated in InstSimplify's
4552 /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
4553 /// duplication.
4554 Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
4555                                              const SimplifyQuery &SQ) {
4556   const SimplifyQuery Q = SQ.getWithInstruction(&I);
4557   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
4558 
4559   // Special logic for binary operators.
4560   BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
4561   BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
4562   if (!BO0 && !BO1)
4563     return nullptr;
4564 
4565   if (Instruction *NewICmp = foldICmpXNegX(I, Builder))
4566     return NewICmp;
4567 
4568   const CmpInst::Predicate Pred = I.getPredicate();
4569   Value *X;
4570 
4571   // Convert add-with-unsigned-overflow comparisons into a 'not' with compare.
4572   // (Op1 + X) u</u>= Op1 --> ~Op1 u</u>= X
4573   if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) &&
4574       (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE))
4575     return new ICmpInst(Pred, Builder.CreateNot(Op1), X);
4576   // Op0 u>/u<= (Op0 + X) --> X u>/u<= ~Op0
4577   if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) &&
4578       (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE))
4579     return new ICmpInst(Pred, X, Builder.CreateNot(Op0));
4580 
4581   {
4582     // (Op1 + X) + C u</u>= Op1 --> ~C - X u</u>= Op1
4583     Constant *C;
4584     if (match(Op0, m_OneUse(m_Add(m_c_Add(m_Specific(Op1), m_Value(X)),
4585                                   m_ImmConstant(C)))) &&
4586         (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) {
4587       Constant *C2 = ConstantExpr::getNot(C);
4588       return new ICmpInst(Pred, Builder.CreateSub(C2, X), Op1);
4589     }
4590     // Op0 u>/u<= (Op0 + X) + C --> Op0 u>/u<= ~C - X
4591     if (match(Op1, m_OneUse(m_Add(m_c_Add(m_Specific(Op0), m_Value(X)),
4592                                   m_ImmConstant(C)))) &&
4593         (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) {
4594       Constant *C2 = ConstantExpr::getNot(C);
4595       return new ICmpInst(Pred, Op0, Builder.CreateSub(C2, X));
4596     }
4597   }
4598 
4599   {
4600     // Similar to above: an unsigned overflow comparison may use offset + mask:
4601     // ((Op1 + C) & C) u<  Op1 --> Op1 != 0
4602     // ((Op1 + C) & C) u>= Op1 --> Op1 == 0
4603     // Op0 u>  ((Op0 + C) & C) --> Op0 != 0
4604     // Op0 u<= ((Op0 + C) & C) --> Op0 == 0
4605     BinaryOperator *BO;
4606     const APInt *C;
4607     if ((Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) &&
4608         match(Op0, m_And(m_BinOp(BO), m_LowBitMask(C))) &&
4609         match(BO, m_Add(m_Specific(Op1), m_SpecificIntAllowUndef(*C)))) {
4610       CmpInst::Predicate NewPred =
4611           Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
4612       Constant *Zero = ConstantInt::getNullValue(Op1->getType());
4613       return new ICmpInst(NewPred, Op1, Zero);
4614     }
4615 
4616     if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE) &&
4617         match(Op1, m_And(m_BinOp(BO), m_LowBitMask(C))) &&
4618         match(BO, m_Add(m_Specific(Op0), m_SpecificIntAllowUndef(*C)))) {
4619       CmpInst::Predicate NewPred =
4620           Pred == ICmpInst::ICMP_UGT ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
4621       Constant *Zero = ConstantInt::getNullValue(Op1->getType());
4622       return new ICmpInst(NewPred, Op0, Zero);
4623     }
4624   }
4625 
4626   bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
4627   if (BO0 && isa<OverflowingBinaryOperator>(BO0))
4628     NoOp0WrapProblem =
4629         ICmpInst::isEquality(Pred) ||
4630         (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
4631         (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
4632   if (BO1 && isa<OverflowingBinaryOperator>(BO1))
4633     NoOp1WrapProblem =
4634         ICmpInst::isEquality(Pred) ||
4635         (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
4636         (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
4637 
4638   // Analyze the case when either Op0 or Op1 is an add instruction.
4639   // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
4640   Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
4641   if (BO0 && BO0->getOpcode() == Instruction::Add) {
4642     A = BO0->getOperand(0);
4643     B = BO0->getOperand(1);
4644   }
4645   if (BO1 && BO1->getOpcode() == Instruction::Add) {
4646     C = BO1->getOperand(0);
4647     D = BO1->getOperand(1);
4648   }
4649 
4650   // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow.
4651   // icmp (A+B), B -> icmp A, 0 for equalities or if there is no overflow.
4652   if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
4653     return new ICmpInst(Pred, A == Op1 ? B : A,
4654                         Constant::getNullValue(Op1->getType()));
4655 
4656   // icmp C, (C+D) -> icmp 0, D for equalities or if there is no overflow.
4657   // icmp D, (C+D) -> icmp 0, C for equalities or if there is no overflow.
4658   if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
4659     return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
4660                         C == Op0 ? D : C);
4661 
4662   // icmp (A+B), (A+D) -> icmp B, D for equalities or if there is no overflow.
4663   if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem &&
4664       NoOp1WrapProblem) {
4665     // Determine Y and Z in the form icmp (X+Y), (X+Z).
4666     Value *Y, *Z;
4667     if (A == C) {
4668       // C + B == C + D  ->  B == D
4669       Y = B;
4670       Z = D;
4671     } else if (A == D) {
4672       // D + B == C + D  ->  B == C
4673       Y = B;
4674       Z = C;
4675     } else if (B == C) {
4676       // A + C == C + D  ->  A == D
4677       Y = A;
4678       Z = D;
4679     } else {
4680       assert(B == D);
4681       // A + D == C + D  ->  A == C
4682       Y = A;
4683       Z = C;
4684     }
4685     return new ICmpInst(Pred, Y, Z);
4686   }
4687 
4688   // icmp slt (A + -1), Op1 -> icmp sle A, Op1
4689   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
4690       match(B, m_AllOnes()))
4691     return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
4692 
4693   // icmp sge (A + -1), Op1 -> icmp sgt A, Op1
4694   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
4695       match(B, m_AllOnes()))
4696     return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
4697 
4698   // icmp sle (A + 1), Op1 -> icmp slt A, Op1
4699   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
4700     return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
4701 
4702   // icmp sgt (A + 1), Op1 -> icmp sge A, Op1
4703   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
4704     return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
4705 
4706   // icmp sgt Op0, (C + -1) -> icmp sge Op0, C
4707   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
4708       match(D, m_AllOnes()))
4709     return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
4710 
4711   // icmp sle Op0, (C + -1) -> icmp slt Op0, C
4712   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
4713       match(D, m_AllOnes()))
4714     return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
4715 
4716   // icmp sge Op0, (C + 1) -> icmp sgt Op0, C
4717   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
4718     return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
4719 
4720   // icmp slt Op0, (C + 1) -> icmp sle Op0, C
4721   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
4722     return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
4723 
4724   // TODO: The subtraction-related identities shown below also hold, but
4725   // canonicalization from (X -nuw 1) to (X + -1) means that the combinations
4726   // wouldn't happen even if they were implemented.
4727   //
4728   // icmp ult (A - 1), Op1 -> icmp ule A, Op1
4729   // icmp uge (A - 1), Op1 -> icmp ugt A, Op1
4730   // icmp ugt Op0, (C - 1) -> icmp uge Op0, C
4731   // icmp ule Op0, (C - 1) -> icmp ult Op0, C
4732 
4733   // icmp ule (A + 1), Op0 -> icmp ult A, Op1
4734   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
4735     return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
4736 
4737   // icmp ugt (A + 1), Op0 -> icmp uge A, Op1
4738   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
4739     return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
4740 
4741   // icmp uge Op0, (C + 1) -> icmp ugt Op0, C
4742   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
4743     return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
4744 
4745   // icmp ult Op0, (C + 1) -> icmp ule Op0, C
4746   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
4747     return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
4748 
4749   // if C1 has greater magnitude than C2:
4750   //  icmp (A + C1), (C + C2) -> icmp (A + C3), C
4751   //  s.t. C3 = C1 - C2
4752   //
4753   // if C2 has greater magnitude than C1:
4754   //  icmp (A + C1), (C + C2) -> icmp A, (C + C3)
4755   //  s.t. C3 = C2 - C1
4756   if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
4757       (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) {
4758     const APInt *AP1, *AP2;
4759     // TODO: Support non-uniform vectors.
4760     // TODO: Allow undef passthrough if B AND D's element is undef.
4761     if (match(B, m_APIntAllowUndef(AP1)) && match(D, m_APIntAllowUndef(AP2)) &&
4762         AP1->isNegative() == AP2->isNegative()) {
4763       APInt AP1Abs = AP1->abs();
4764       APInt AP2Abs = AP2->abs();
4765       if (AP1Abs.uge(AP2Abs)) {
4766         APInt Diff = *AP1 - *AP2;
4767         bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1);
4768         bool HasNSW = BO0->hasNoSignedWrap();
4769         Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
4770         Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW);
4771         return new ICmpInst(Pred, NewAdd, C);
4772       } else {
4773         APInt Diff = *AP2 - *AP1;
4774         bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2);
4775         bool HasNSW = BO1->hasNoSignedWrap();
4776         Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
4777         Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW);
4778         return new ICmpInst(Pred, A, NewAdd);
4779       }
4780     }
4781     Constant *Cst1, *Cst2;
4782     if (match(B, m_ImmConstant(Cst1)) && match(D, m_ImmConstant(Cst2)) &&
4783         ICmpInst::isEquality(Pred)) {
4784       Constant *Diff = ConstantExpr::getSub(Cst2, Cst1);
4785       Value *NewAdd = Builder.CreateAdd(C, Diff);
4786       return new ICmpInst(Pred, A, NewAdd);
4787     }
4788   }
4789 
4790   // Analyze the case when either Op0 or Op1 is a sub instruction.
4791   // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
4792   A = nullptr;
4793   B = nullptr;
4794   C = nullptr;
4795   D = nullptr;
4796   if (BO0 && BO0->getOpcode() == Instruction::Sub) {
4797     A = BO0->getOperand(0);
4798     B = BO0->getOperand(1);
4799   }
4800   if (BO1 && BO1->getOpcode() == Instruction::Sub) {
4801     C = BO1->getOperand(0);
4802     D = BO1->getOperand(1);
4803   }
4804 
4805   // icmp (A-B), A -> icmp 0, B for equalities or if there is no overflow.
4806   if (A == Op1 && NoOp0WrapProblem)
4807     return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
4808   // icmp C, (C-D) -> icmp D, 0 for equalities or if there is no overflow.
4809   if (C == Op0 && NoOp1WrapProblem)
4810     return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
4811 
4812   // Convert sub-with-unsigned-overflow comparisons into a comparison of args.
4813   // (A - B) u>/u<= A --> B u>/u<= A
4814   if (A == Op1 && (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE))
4815     return new ICmpInst(Pred, B, A);
4816   // C u</u>= (C - D) --> C u</u>= D
4817   if (C == Op0 && (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE))
4818     return new ICmpInst(Pred, C, D);
4819   // (A - B) u>=/u< A --> B u>/u<= A  iff B != 0
4820   if (A == Op1 && (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) &&
4821       isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
4822     return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), B, A);
4823   // C u<=/u> (C - D) --> C u</u>= D  iff B != 0
4824   if (C == Op0 && (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) &&
4825       isKnownNonZero(D, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
4826     return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), C, D);
4827 
4828   // icmp (A-B), (C-B) -> icmp A, C for equalities or if there is no overflow.
4829   if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem)
4830     return new ICmpInst(Pred, A, C);
4831 
4832   // icmp (A-B), (A-D) -> icmp D, B for equalities or if there is no overflow.
4833   if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem)
4834     return new ICmpInst(Pred, D, B);
4835 
4836   // icmp (0-X) < cst --> x > -cst
4837   if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
4838     Value *X;
4839     if (match(BO0, m_Neg(m_Value(X))))
4840       if (Constant *RHSC = dyn_cast<Constant>(Op1))
4841         if (RHSC->isNotMinSignedValue())
4842           return new ICmpInst(I.getSwappedPredicate(), X,
4843                               ConstantExpr::getNeg(RHSC));
4844   }
4845 
4846   if (Instruction * R = foldICmpXorXX(I, Q, *this))
4847     return R;
4848   if (Instruction *R = foldICmpOrXX(I, Q, *this))
4849     return R;
4850 
4851   {
4852     // Try to remove shared multiplier from comparison:
4853     // X * Z u{lt/le/gt/ge}/eq/ne Y * Z
4854     Value *X, *Y, *Z;
4855     if (Pred == ICmpInst::getUnsignedPredicate(Pred) &&
4856         ((match(Op0, m_Mul(m_Value(X), m_Value(Z))) &&
4857           match(Op1, m_c_Mul(m_Specific(Z), m_Value(Y)))) ||
4858          (match(Op0, m_Mul(m_Value(Z), m_Value(X))) &&
4859           match(Op1, m_c_Mul(m_Specific(Z), m_Value(Y)))))) {
4860       bool NonZero;
4861       if (ICmpInst::isEquality(Pred)) {
4862         KnownBits ZKnown = computeKnownBits(Z, 0, &I);
4863         // if Z % 2 != 0
4864         //    X * Z eq/ne Y * Z -> X eq/ne Y
4865         if (ZKnown.countMaxTrailingZeros() == 0)
4866           return new ICmpInst(Pred, X, Y);
4867         NonZero = !ZKnown.One.isZero() ||
4868                   isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
4869         // if Z != 0 and nsw(X * Z) and nsw(Y * Z)
4870         //    X * Z eq/ne Y * Z -> X eq/ne Y
4871         if (NonZero && BO0 && BO1 && BO0->hasNoSignedWrap() &&
4872             BO1->hasNoSignedWrap())
4873           return new ICmpInst(Pred, X, Y);
4874       } else
4875         NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
4876 
4877       // If Z != 0 and nuw(X * Z) and nuw(Y * Z)
4878       //    X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y
4879       if (NonZero && BO0 && BO1 && BO0->hasNoUnsignedWrap() &&
4880           BO1->hasNoUnsignedWrap())
4881         return new ICmpInst(Pred, X, Y);
4882     }
4883   }
4884 
4885   BinaryOperator *SRem = nullptr;
4886   // icmp (srem X, Y), Y
4887   if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1))
4888     SRem = BO0;
4889   // icmp Y, (srem X, Y)
4890   else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
4891            Op0 == BO1->getOperand(1))
4892     SRem = BO1;
4893   if (SRem) {
4894     // We don't check hasOneUse to avoid increasing register pressure because
4895     // the value we use is the same value this instruction was already using.
4896     switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
4897     default:
4898       break;
4899     case ICmpInst::ICMP_EQ:
4900       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4901     case ICmpInst::ICMP_NE:
4902       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4903     case ICmpInst::ICMP_SGT:
4904     case ICmpInst::ICMP_SGE:
4905       return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
4906                           Constant::getAllOnesValue(SRem->getType()));
4907     case ICmpInst::ICMP_SLT:
4908     case ICmpInst::ICMP_SLE:
4909       return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
4910                           Constant::getNullValue(SRem->getType()));
4911     }
4912   }
4913 
4914   if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() &&
4915       BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) {
4916     switch (BO0->getOpcode()) {
4917     default:
4918       break;
4919     case Instruction::Add:
4920     case Instruction::Sub:
4921     case Instruction::Xor: {
4922       if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
4923         return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4924 
4925       const APInt *C;
4926       if (match(BO0->getOperand(1), m_APInt(C))) {
4927         // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
4928         if (C->isSignMask()) {
4929           ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate();
4930           return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
4931         }
4932 
4933         // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
4934         if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) {
4935           ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate();
4936           NewPred = I.getSwappedPredicate(NewPred);
4937           return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
4938         }
4939       }
4940       break;
4941     }
4942     case Instruction::Mul: {
4943       if (!I.isEquality())
4944         break;
4945 
4946       const APInt *C;
4947       if (match(BO0->getOperand(1), m_APInt(C)) && !C->isZero() &&
4948           !C->isOne()) {
4949         // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
4950         // Mask = -1 >> count-trailing-zeros(C).
4951         if (unsigned TZs = C->countr_zero()) {
4952           Constant *Mask = ConstantInt::get(
4953               BO0->getType(),
4954               APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
4955           Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask);
4956           Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask);
4957           return new ICmpInst(Pred, And1, And2);
4958         }
4959       }
4960       break;
4961     }
4962     case Instruction::UDiv:
4963     case Instruction::LShr:
4964       if (I.isSigned() || !BO0->isExact() || !BO1->isExact())
4965         break;
4966       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4967 
4968     case Instruction::SDiv:
4969       if (!I.isEquality() || !BO0->isExact() || !BO1->isExact())
4970         break;
4971       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4972 
4973     case Instruction::AShr:
4974       if (!BO0->isExact() || !BO1->isExact())
4975         break;
4976       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4977 
4978     case Instruction::Shl: {
4979       bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
4980       bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
4981       if (!NUW && !NSW)
4982         break;
4983       if (!NSW && I.isSigned())
4984         break;
4985       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4986     }
4987     }
4988   }
4989 
4990   if (BO0) {
4991     // Transform  A & (L - 1) `ult` L --> L != 0
4992     auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
4993     auto BitwiseAnd = m_c_And(m_Value(), LSubOne);
4994 
4995     if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) {
4996       auto *Zero = Constant::getNullValue(BO0->getType());
4997       return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
4998     }
4999   }
5000 
5001   // For unsigned predicates / eq / ne:
5002   // icmp pred (x << 1), x --> icmp getSignedPredicate(pred) x, 0
5003   // icmp pred x, (x << 1) --> icmp getSignedPredicate(pred) 0, x
5004   if (!ICmpInst::isSigned(Pred)) {
5005     if (match(Op0, m_Shl(m_Specific(Op1), m_One())))
5006       return new ICmpInst(ICmpInst::getSignedPredicate(Pred), Op1,
5007                           Constant::getNullValue(Op1->getType()));
5008     else if (match(Op1, m_Shl(m_Specific(Op0), m_One())))
5009       return new ICmpInst(ICmpInst::getSignedPredicate(Pred),
5010                           Constant::getNullValue(Op0->getType()), Op0);
5011   }
5012 
5013   if (Value *V = foldMultiplicationOverflowCheck(I))
5014     return replaceInstUsesWith(I, V);
5015 
5016   if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
5017     return replaceInstUsesWith(I, V);
5018 
5019   if (Instruction *R = foldICmpAndXX(I, Q, *this))
5020     return R;
5021 
5022   if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
5023     return replaceInstUsesWith(I, V);
5024 
5025   if (Value *V = foldShiftIntoShiftInAnotherHandOfAndInICmp(I, SQ, Builder))
5026     return replaceInstUsesWith(I, V);
5027 
5028   return nullptr;
5029 }
5030 
5031 /// Fold icmp Pred min|max(X, Y), Z.
5032 Instruction *
5033 InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I,
5034                                          MinMaxIntrinsic *MinMax, Value *Z,
5035                                          ICmpInst::Predicate Pred) {
5036   Value *X = MinMax->getLHS();
5037   Value *Y = MinMax->getRHS();
5038   if (ICmpInst::isSigned(Pred) && !MinMax->isSigned())
5039     return nullptr;
5040   if (ICmpInst::isUnsigned(Pred) && MinMax->isSigned())
5041     return nullptr;
5042   SimplifyQuery Q = SQ.getWithInstruction(&I);
5043   auto IsCondKnownTrue = [](Value *Val) -> std::optional<bool> {
5044     if (!Val)
5045       return std::nullopt;
5046     if (match(Val, m_One()))
5047       return true;
5048     if (match(Val, m_Zero()))
5049       return false;
5050     return std::nullopt;
5051   };
5052   auto CmpXZ = IsCondKnownTrue(simplifyICmpInst(Pred, X, Z, Q));
5053   auto CmpYZ = IsCondKnownTrue(simplifyICmpInst(Pred, Y, Z, Q));
5054   if (!CmpXZ.has_value() && !CmpYZ.has_value())
5055     return nullptr;
5056   if (!CmpXZ.has_value()) {
5057     std::swap(X, Y);
5058     std::swap(CmpXZ, CmpYZ);
5059   }
5060 
5061   auto FoldIntoCmpYZ = [&]() -> Instruction * {
5062     if (CmpYZ.has_value())
5063       return replaceInstUsesWith(I, ConstantInt::getBool(I.getType(), *CmpYZ));
5064     return ICmpInst::Create(Instruction::ICmp, Pred, Y, Z);
5065   };
5066 
5067   switch (Pred) {
5068   case ICmpInst::ICMP_EQ:
5069   case ICmpInst::ICMP_NE: {
5070     // If X == Z:
5071     //     Expr       Result
5072     // min(X, Y) == Z X <= Y
5073     // max(X, Y) == Z X >= Y
5074     // min(X, Y) != Z X > Y
5075     // max(X, Y) != Z X < Y
5076     if ((Pred == ICmpInst::ICMP_EQ) == *CmpXZ) {
5077       ICmpInst::Predicate NewPred =
5078           ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
5079       if (Pred == ICmpInst::ICMP_NE)
5080         NewPred = ICmpInst::getInversePredicate(NewPred);
5081       return ICmpInst::Create(Instruction::ICmp, NewPred, X, Y);
5082     }
5083     // Otherwise (X != Z):
5084     ICmpInst::Predicate NewPred = MinMax->getPredicate();
5085     auto MinMaxCmpXZ = IsCondKnownTrue(simplifyICmpInst(NewPred, X, Z, Q));
5086     if (!MinMaxCmpXZ.has_value()) {
5087       std::swap(X, Y);
5088       std::swap(CmpXZ, CmpYZ);
5089       // Re-check pre-condition X != Z
5090       if (!CmpXZ.has_value() || (Pred == ICmpInst::ICMP_EQ) == *CmpXZ)
5091         break;
5092       MinMaxCmpXZ = IsCondKnownTrue(simplifyICmpInst(NewPred, X, Z, Q));
5093     }
5094     if (!MinMaxCmpXZ.has_value())
5095       break;
5096     if (*MinMaxCmpXZ) {
5097       //    Expr         Fact    Result
5098       // min(X, Y) == Z  X < Z   false
5099       // max(X, Y) == Z  X > Z   false
5100       // min(X, Y) != Z  X < Z    true
5101       // max(X, Y) != Z  X > Z    true
5102       return replaceInstUsesWith(
5103           I, ConstantInt::getBool(I.getType(), Pred == ICmpInst::ICMP_NE));
5104     } else {
5105       //    Expr         Fact    Result
5106       // min(X, Y) == Z  X > Z   Y == Z
5107       // max(X, Y) == Z  X < Z   Y == Z
5108       // min(X, Y) != Z  X > Z   Y != Z
5109       // max(X, Y) != Z  X < Z   Y != Z
5110       return FoldIntoCmpYZ();
5111     }
5112     break;
5113   }
5114   case ICmpInst::ICMP_SLT:
5115   case ICmpInst::ICMP_ULT:
5116   case ICmpInst::ICMP_SLE:
5117   case ICmpInst::ICMP_ULE:
5118   case ICmpInst::ICMP_SGT:
5119   case ICmpInst::ICMP_UGT:
5120   case ICmpInst::ICMP_SGE:
5121   case ICmpInst::ICMP_UGE: {
5122     bool IsSame = MinMax->getPredicate() == ICmpInst::getStrictPredicate(Pred);
5123     if (*CmpXZ) {
5124       if (IsSame) {
5125         //      Expr        Fact    Result
5126         // min(X, Y) < Z    X < Z   true
5127         // min(X, Y) <= Z   X <= Z  true
5128         // max(X, Y) > Z    X > Z   true
5129         // max(X, Y) >= Z   X >= Z  true
5130         return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
5131       } else {
5132         //      Expr        Fact    Result
5133         // max(X, Y) < Z    X < Z   Y < Z
5134         // max(X, Y) <= Z   X <= Z  Y <= Z
5135         // min(X, Y) > Z    X > Z   Y > Z
5136         // min(X, Y) >= Z   X >= Z  Y >= Z
5137         return FoldIntoCmpYZ();
5138       }
5139     } else {
5140       if (IsSame) {
5141         //      Expr        Fact    Result
5142         // min(X, Y) < Z    X >= Z  Y < Z
5143         // min(X, Y) <= Z   X > Z   Y <= Z
5144         // max(X, Y) > Z    X <= Z  Y > Z
5145         // max(X, Y) >= Z   X < Z   Y >= Z
5146         return FoldIntoCmpYZ();
5147       } else {
5148         //      Expr        Fact    Result
5149         // max(X, Y) < Z    X >= Z  false
5150         // max(X, Y) <= Z   X > Z   false
5151         // min(X, Y) > Z    X <= Z  false
5152         // min(X, Y) >= Z   X < Z   false
5153         return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
5154       }
5155     }
5156     break;
5157   }
5158   default:
5159     break;
5160   }
5161 
5162   return nullptr;
5163 }
5164 Instruction *InstCombinerImpl::foldICmpWithMinMax(ICmpInst &Cmp) {
5165   ICmpInst::Predicate Pred = Cmp.getPredicate();
5166   Value *Lhs = Cmp.getOperand(0);
5167   Value *Rhs = Cmp.getOperand(1);
5168 
5169   if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Lhs)) {
5170     if (Instruction *Res = foldICmpWithMinMaxImpl(Cmp, MinMax, Rhs, Pred))
5171       return Res;
5172   }
5173 
5174   if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Rhs)) {
5175     if (Instruction *Res = foldICmpWithMinMaxImpl(
5176             Cmp, MinMax, Lhs, ICmpInst::getSwappedPredicate(Pred)))
5177       return Res;
5178   }
5179 
5180   return nullptr;
5181 }
5182 
5183 // Canonicalize checking for a power-of-2-or-zero value:
5184 static Instruction *foldICmpPow2Test(ICmpInst &I,
5185                                      InstCombiner::BuilderTy &Builder) {
5186   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
5187   const CmpInst::Predicate Pred = I.getPredicate();
5188   Value *A = nullptr;
5189   bool CheckIs;
5190   if (I.isEquality()) {
5191     // (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants)
5192     // ((A-1) & A) != 0 --> ctpop(A) > 1 (two commuted variants)
5193     if (!match(Op0, m_OneUse(m_c_And(m_Add(m_Value(A), m_AllOnes()),
5194                                      m_Deferred(A)))) ||
5195         !match(Op1, m_ZeroInt()))
5196       A = nullptr;
5197 
5198     // (A & -A) == A --> ctpop(A) < 2 (four commuted variants)
5199     // (-A & A) != A --> ctpop(A) > 1 (four commuted variants)
5200     if (match(Op0, m_OneUse(m_c_And(m_Neg(m_Specific(Op1)), m_Specific(Op1)))))
5201       A = Op1;
5202     else if (match(Op1,
5203                    m_OneUse(m_c_And(m_Neg(m_Specific(Op0)), m_Specific(Op0)))))
5204       A = Op0;
5205 
5206     CheckIs = Pred == ICmpInst::ICMP_EQ;
5207   } else if (ICmpInst::isUnsigned(Pred)) {
5208     // (A ^ (A-1)) u>= A --> ctpop(A) < 2 (two commuted variants)
5209     // ((A-1) ^ A) u< A --> ctpop(A) > 1 (two commuted variants)
5210 
5211     if ((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) &&
5212         match(Op0, m_OneUse(m_c_Xor(m_Add(m_Specific(Op1), m_AllOnes()),
5213                                     m_Specific(Op1))))) {
5214       A = Op1;
5215       CheckIs = Pred == ICmpInst::ICMP_UGE;
5216     } else if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE) &&
5217                match(Op1, m_OneUse(m_c_Xor(m_Add(m_Specific(Op0), m_AllOnes()),
5218                                            m_Specific(Op0))))) {
5219       A = Op0;
5220       CheckIs = Pred == ICmpInst::ICMP_ULE;
5221     }
5222   }
5223 
5224   if (A) {
5225     Type *Ty = A->getType();
5226     CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A);
5227     return CheckIs ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop,
5228                                   ConstantInt::get(Ty, 2))
5229                    : new ICmpInst(ICmpInst::ICMP_UGT, CtPop,
5230                                   ConstantInt::get(Ty, 1));
5231   }
5232 
5233   return nullptr;
5234 }
5235 
5236 Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
5237   if (!I.isEquality())
5238     return nullptr;
5239 
5240   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
5241   const CmpInst::Predicate Pred = I.getPredicate();
5242   Value *A, *B, *C, *D;
5243   if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
5244     if (A == Op1 || B == Op1) { // (A^B) == A  ->  B == 0
5245       Value *OtherVal = A == Op1 ? B : A;
5246       return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
5247     }
5248 
5249     if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
5250       // A^c1 == C^c2 --> A == C^(c1^c2)
5251       ConstantInt *C1, *C2;
5252       if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) &&
5253           Op1->hasOneUse()) {
5254         Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue());
5255         Value *Xor = Builder.CreateXor(C, NC);
5256         return new ICmpInst(Pred, A, Xor);
5257       }
5258 
5259       // A^B == A^D -> B == D
5260       if (A == C)
5261         return new ICmpInst(Pred, B, D);
5262       if (A == D)
5263         return new ICmpInst(Pred, B, C);
5264       if (B == C)
5265         return new ICmpInst(Pred, A, D);
5266       if (B == D)
5267         return new ICmpInst(Pred, A, C);
5268     }
5269   }
5270 
5271   // canoncalize:
5272   // (icmp eq/ne (and X, C), X)
5273   //    -> (icmp eq/ne (and X, ~C), 0)
5274   {
5275     Constant *CMask;
5276     A = nullptr;
5277     if (match(Op0, m_OneUse(m_And(m_Specific(Op1), m_ImmConstant(CMask)))))
5278       A = Op1;
5279     else if (match(Op1, m_OneUse(m_And(m_Specific(Op0), m_ImmConstant(CMask)))))
5280       A = Op0;
5281     if (A)
5282       return new ICmpInst(Pred, Builder.CreateAnd(A, Builder.CreateNot(CMask)),
5283                           Constant::getNullValue(A->getType()));
5284   }
5285 
5286   if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
5287     // A == (A^B)  ->  B == 0
5288     Value *OtherVal = A == Op0 ? B : A;
5289     return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
5290   }
5291 
5292   // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
5293   if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
5294       match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
5295     Value *X = nullptr, *Y = nullptr, *Z = nullptr;
5296 
5297     if (A == C) {
5298       X = B;
5299       Y = D;
5300       Z = A;
5301     } else if (A == D) {
5302       X = B;
5303       Y = C;
5304       Z = A;
5305     } else if (B == C) {
5306       X = A;
5307       Y = D;
5308       Z = B;
5309     } else if (B == D) {
5310       X = A;
5311       Y = C;
5312       Z = B;
5313     }
5314 
5315     if (X) { // Build (X^Y) & Z
5316       Op1 = Builder.CreateXor(X, Y);
5317       Op1 = Builder.CreateAnd(Op1, Z);
5318       return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType()));
5319     }
5320   }
5321 
5322   {
5323     // Similar to above, but specialized for constant because invert is needed:
5324     // (X | C) == (Y | C) --> (X ^ Y) & ~C == 0
5325     Value *X, *Y;
5326     Constant *C;
5327     if (match(Op0, m_OneUse(m_Or(m_Value(X), m_Constant(C)))) &&
5328         match(Op1, m_OneUse(m_Or(m_Value(Y), m_Specific(C))))) {
5329       Value *Xor = Builder.CreateXor(X, Y);
5330       Value *And = Builder.CreateAnd(Xor, ConstantExpr::getNot(C));
5331       return new ICmpInst(Pred, And, Constant::getNullValue(And->getType()));
5332     }
5333   }
5334 
5335   if (match(Op1, m_ZExt(m_Value(A))) &&
5336       (Op0->hasOneUse() || Op1->hasOneUse())) {
5337     // (B & (Pow2C-1)) == zext A --> A == trunc B
5338     // (B & (Pow2C-1)) != zext A --> A != trunc B
5339     const APInt *MaskC;
5340     if (match(Op0, m_And(m_Value(B), m_LowBitMask(MaskC))) &&
5341         MaskC->countr_one() == A->getType()->getScalarSizeInBits())
5342       return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
5343   }
5344 
5345   // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
5346   // For lshr and ashr pairs.
5347   const APInt *AP1, *AP2;
5348   if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_APIntAllowUndef(AP1)))) &&
5349        match(Op1, m_OneUse(m_LShr(m_Value(B), m_APIntAllowUndef(AP2))))) ||
5350       (match(Op0, m_OneUse(m_AShr(m_Value(A), m_APIntAllowUndef(AP1)))) &&
5351        match(Op1, m_OneUse(m_AShr(m_Value(B), m_APIntAllowUndef(AP2)))))) {
5352     if (AP1 != AP2)
5353       return nullptr;
5354     unsigned TypeBits = AP1->getBitWidth();
5355     unsigned ShAmt = AP1->getLimitedValue(TypeBits);
5356     if (ShAmt < TypeBits && ShAmt != 0) {
5357       ICmpInst::Predicate NewPred =
5358           Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
5359       Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
5360       APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
5361       return new ICmpInst(NewPred, Xor, ConstantInt::get(A->getType(), CmpVal));
5362     }
5363   }
5364 
5365   // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
5366   ConstantInt *Cst1;
5367   if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
5368       match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
5369     unsigned TypeBits = Cst1->getBitWidth();
5370     unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
5371     if (ShAmt < TypeBits && ShAmt != 0) {
5372       Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
5373       APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
5374       Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal),
5375                                       I.getName() + ".mask");
5376       return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType()));
5377     }
5378   }
5379 
5380   // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
5381   // "icmp (and X, mask), cst"
5382   uint64_t ShAmt = 0;
5383   if (Op0->hasOneUse() &&
5384       match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) &&
5385       match(Op1, m_ConstantInt(Cst1)) &&
5386       // Only do this when A has multiple uses.  This is most important to do
5387       // when it exposes other optimizations.
5388       !A->hasOneUse()) {
5389     unsigned ASize = cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
5390 
5391     if (ShAmt < ASize) {
5392       APInt MaskV =
5393           APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
5394       MaskV <<= ShAmt;
5395 
5396       APInt CmpV = Cst1->getValue().zext(ASize);
5397       CmpV <<= ShAmt;
5398 
5399       Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV));
5400       return new ICmpInst(Pred, Mask, Builder.getInt(CmpV));
5401     }
5402   }
5403 
5404   if (Instruction *ICmp = foldICmpIntrinsicWithIntrinsic(I, Builder))
5405     return ICmp;
5406 
5407   // Match icmp eq (trunc (lshr A, BW), (ashr (trunc A), BW-1)), which checks the
5408   // top BW/2 + 1 bits are all the same. Create "A >=s INT_MIN && A <=s INT_MAX",
5409   // which we generate as "icmp ult (add A, 2^(BW-1)), 2^BW" to skip a few steps
5410   // of instcombine.
5411   unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
5412   if (match(Op0, m_AShr(m_Trunc(m_Value(A)), m_SpecificInt(BitWidth - 1))) &&
5413       match(Op1, m_Trunc(m_LShr(m_Specific(A), m_SpecificInt(BitWidth)))) &&
5414       A->getType()->getScalarSizeInBits() == BitWidth * 2 &&
5415       (I.getOperand(0)->hasOneUse() || I.getOperand(1)->hasOneUse())) {
5416     APInt C = APInt::getOneBitSet(BitWidth * 2, BitWidth - 1);
5417     Value *Add = Builder.CreateAdd(A, ConstantInt::get(A->getType(), C));
5418     return new ICmpInst(Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT
5419                                                   : ICmpInst::ICMP_UGE,
5420                         Add, ConstantInt::get(A->getType(), C.shl(1)));
5421   }
5422 
5423   // Canonicalize:
5424   // Assume B_Pow2 != 0
5425   // 1. A & B_Pow2 != B_Pow2 -> A & B_Pow2 == 0
5426   // 2. A & B_Pow2 == B_Pow2 -> A & B_Pow2 != 0
5427   if (match(Op0, m_c_And(m_Specific(Op1), m_Value())) &&
5428       isKnownToBeAPowerOfTwo(Op1, /* OrZero */ false, 0, &I))
5429     return new ICmpInst(CmpInst::getInversePredicate(Pred), Op0,
5430                         ConstantInt::getNullValue(Op0->getType()));
5431 
5432   if (match(Op1, m_c_And(m_Specific(Op0), m_Value())) &&
5433       isKnownToBeAPowerOfTwo(Op0, /* OrZero */ false, 0, &I))
5434     return new ICmpInst(CmpInst::getInversePredicate(Pred), Op1,
5435                         ConstantInt::getNullValue(Op1->getType()));
5436 
5437   // Canonicalize:
5438   // icmp eq/ne X, OneUse(rotate-right(X))
5439   //    -> icmp eq/ne X, rotate-left(X)
5440   // We generally try to convert rotate-right -> rotate-left, this just
5441   // canonicalizes another case.
5442   CmpInst::Predicate PredUnused = Pred;
5443   if (match(&I, m_c_ICmp(PredUnused, m_Value(A),
5444                          m_OneUse(m_Intrinsic<Intrinsic::fshr>(
5445                              m_Deferred(A), m_Deferred(A), m_Value(B))))))
5446     return new ICmpInst(
5447         Pred, A,
5448         Builder.CreateIntrinsic(Op0->getType(), Intrinsic::fshl, {A, A, B}));
5449 
5450   // Canonicalize:
5451   // icmp eq/ne OneUse(A ^ Cst), B --> icmp eq/ne (A ^ B), Cst
5452   Constant *Cst;
5453   if (match(&I, m_c_ICmp(PredUnused,
5454                          m_OneUse(m_Xor(m_Value(A), m_ImmConstant(Cst))),
5455                          m_CombineAnd(m_Value(B), m_Unless(m_ImmConstant())))))
5456     return new ICmpInst(Pred, Builder.CreateXor(A, B), Cst);
5457 
5458   {
5459     // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2)
5460     auto m_Matcher =
5461         m_CombineOr(m_CombineOr(m_c_Add(m_Value(B), m_Deferred(A)),
5462                                 m_c_Xor(m_Value(B), m_Deferred(A))),
5463                     m_Sub(m_Value(B), m_Deferred(A)));
5464     std::optional<bool> IsZero = std::nullopt;
5465     if (match(&I, m_c_ICmp(PredUnused, m_OneUse(m_c_And(m_Value(A), m_Matcher)),
5466                            m_Deferred(A))))
5467       IsZero = false;
5468     // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0)
5469     else if (match(&I,
5470                    m_ICmp(PredUnused, m_OneUse(m_c_And(m_Value(A), m_Matcher)),
5471                           m_Zero())))
5472       IsZero = true;
5473 
5474     if (IsZero && isKnownToBeAPowerOfTwo(A, /* OrZero */ true, /*Depth*/ 0, &I))
5475       // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2)
5476       //    -> (icmp eq/ne (and X, P2), 0)
5477       // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0)
5478       //    -> (icmp eq/ne (and X, P2), P2)
5479       return new ICmpInst(Pred, Builder.CreateAnd(B, A),
5480                           *IsZero ? A
5481                                   : ConstantInt::getNullValue(A->getType()));
5482   }
5483 
5484   return nullptr;
5485 }
5486 
5487 Instruction *InstCombinerImpl::foldICmpWithTrunc(ICmpInst &ICmp) {
5488   ICmpInst::Predicate Pred = ICmp.getPredicate();
5489   Value *Op0 = ICmp.getOperand(0), *Op1 = ICmp.getOperand(1);
5490 
5491   // Try to canonicalize trunc + compare-to-constant into a mask + cmp.
5492   // The trunc masks high bits while the compare may effectively mask low bits.
5493   Value *X;
5494   const APInt *C;
5495   if (!match(Op0, m_OneUse(m_Trunc(m_Value(X)))) || !match(Op1, m_APInt(C)))
5496     return nullptr;
5497 
5498   // This matches patterns corresponding to tests of the signbit as well as:
5499   // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?)
5500   // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?)
5501   APInt Mask;
5502   if (decomposeBitTestICmp(Op0, Op1, Pred, X, Mask, true /* WithTrunc */)) {
5503     Value *And = Builder.CreateAnd(X, Mask);
5504     Constant *Zero = ConstantInt::getNullValue(X->getType());
5505     return new ICmpInst(Pred, And, Zero);
5506   }
5507 
5508   unsigned SrcBits = X->getType()->getScalarSizeInBits();
5509   if (Pred == ICmpInst::ICMP_ULT && C->isNegatedPowerOf2()) {
5510     // If C is a negative power-of-2 (high-bit mask):
5511     // (trunc X) u< C --> (X & C) != C (are any masked-high-bits clear?)
5512     Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits));
5513     Value *And = Builder.CreateAnd(X, MaskC);
5514     return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC);
5515   }
5516 
5517   if (Pred == ICmpInst::ICMP_UGT && (~*C).isPowerOf2()) {
5518     // If C is not-of-power-of-2 (one clear bit):
5519     // (trunc X) u> C --> (X & (C+1)) == C+1 (are all masked-high-bits set?)
5520     Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits));
5521     Value *And = Builder.CreateAnd(X, MaskC);
5522     return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC);
5523   }
5524 
5525   if (auto *II = dyn_cast<IntrinsicInst>(X)) {
5526     if (II->getIntrinsicID() == Intrinsic::cttz ||
5527         II->getIntrinsicID() == Intrinsic::ctlz) {
5528       unsigned MaxRet = SrcBits;
5529       // If the "is_zero_poison" argument is set, then we know at least
5530       // one bit is set in the input, so the result is always at least one
5531       // less than the full bitwidth of that input.
5532       if (match(II->getArgOperand(1), m_One()))
5533         MaxRet--;
5534 
5535       // Make sure the destination is wide enough to hold the largest output of
5536       // the intrinsic.
5537       if (llvm::Log2_32(MaxRet) + 1 <= Op0->getType()->getScalarSizeInBits())
5538         if (Instruction *I =
5539                 foldICmpIntrinsicWithConstant(ICmp, II, C->zext(SrcBits)))
5540           return I;
5541     }
5542   }
5543 
5544   return nullptr;
5545 }
5546 
5547 Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {
5548   assert(isa<CastInst>(ICmp.getOperand(0)) && "Expected cast for operand 0");
5549   auto *CastOp0 = cast<CastInst>(ICmp.getOperand(0));
5550   Value *X;
5551   if (!match(CastOp0, m_ZExtOrSExt(m_Value(X))))
5552     return nullptr;
5553 
5554   bool IsSignedExt = CastOp0->getOpcode() == Instruction::SExt;
5555   bool IsSignedCmp = ICmp.isSigned();
5556 
5557   // icmp Pred (ext X), (ext Y)
5558   Value *Y;
5559   if (match(ICmp.getOperand(1), m_ZExtOrSExt(m_Value(Y)))) {
5560     bool IsZext0 = isa<ZExtInst>(ICmp.getOperand(0));
5561     bool IsZext1 = isa<ZExtInst>(ICmp.getOperand(1));
5562 
5563     if (IsZext0 != IsZext1) {
5564         // If X and Y and both i1
5565         // (icmp eq/ne (zext X) (sext Y))
5566         //      eq -> (icmp eq (or X, Y), 0)
5567         //      ne -> (icmp ne (or X, Y), 0)
5568       if (ICmp.isEquality() && X->getType()->isIntOrIntVectorTy(1) &&
5569           Y->getType()->isIntOrIntVectorTy(1))
5570         return new ICmpInst(ICmp.getPredicate(), Builder.CreateOr(X, Y),
5571                             Constant::getNullValue(X->getType()));
5572 
5573       // If we have mismatched casts and zext has the nneg flag, we can
5574       //  treat the "zext nneg" as "sext". Otherwise, we cannot fold and quit.
5575 
5576       auto *NonNegInst0 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(0));
5577       auto *NonNegInst1 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(1));
5578 
5579       bool IsNonNeg0 = NonNegInst0 && NonNegInst0->hasNonNeg();
5580       bool IsNonNeg1 = NonNegInst1 && NonNegInst1->hasNonNeg();
5581 
5582       if ((IsZext0 && IsNonNeg0) || (IsZext1 && IsNonNeg1))
5583         IsSignedExt = true;
5584       else
5585         return nullptr;
5586     }
5587 
5588     // Not an extension from the same type?
5589     Type *XTy = X->getType(), *YTy = Y->getType();
5590     if (XTy != YTy) {
5591       // One of the casts must have one use because we are creating a new cast.
5592       if (!ICmp.getOperand(0)->hasOneUse() && !ICmp.getOperand(1)->hasOneUse())
5593         return nullptr;
5594       // Extend the narrower operand to the type of the wider operand.
5595       CastInst::CastOps CastOpcode =
5596           IsSignedExt ? Instruction::SExt : Instruction::ZExt;
5597       if (XTy->getScalarSizeInBits() < YTy->getScalarSizeInBits())
5598         X = Builder.CreateCast(CastOpcode, X, YTy);
5599       else if (YTy->getScalarSizeInBits() < XTy->getScalarSizeInBits())
5600         Y = Builder.CreateCast(CastOpcode, Y, XTy);
5601       else
5602         return nullptr;
5603     }
5604 
5605     // (zext X) == (zext Y) --> X == Y
5606     // (sext X) == (sext Y) --> X == Y
5607     if (ICmp.isEquality())
5608       return new ICmpInst(ICmp.getPredicate(), X, Y);
5609 
5610     // A signed comparison of sign extended values simplifies into a
5611     // signed comparison.
5612     if (IsSignedCmp && IsSignedExt)
5613       return new ICmpInst(ICmp.getPredicate(), X, Y);
5614 
5615     // The other three cases all fold into an unsigned comparison.
5616     return new ICmpInst(ICmp.getUnsignedPredicate(), X, Y);
5617   }
5618 
5619   // Below here, we are only folding a compare with constant.
5620   auto *C = dyn_cast<Constant>(ICmp.getOperand(1));
5621   if (!C)
5622     return nullptr;
5623 
5624   // If a lossless truncate is possible...
5625   Type *SrcTy = CastOp0->getSrcTy();
5626   Constant *Res = getLosslessTrunc(C, SrcTy, CastOp0->getOpcode());
5627   if (Res) {
5628     if (ICmp.isEquality())
5629       return new ICmpInst(ICmp.getPredicate(), X, Res);
5630 
5631     // A signed comparison of sign extended values simplifies into a
5632     // signed comparison.
5633     if (IsSignedExt && IsSignedCmp)
5634       return new ICmpInst(ICmp.getPredicate(), X, Res);
5635 
5636     // The other three cases all fold into an unsigned comparison.
5637     return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res);
5638   }
5639 
5640   // The re-extended constant changed, partly changed (in the case of a vector),
5641   // or could not be determined to be equal (in the case of a constant
5642   // expression), so the constant cannot be represented in the shorter type.
5643   // All the cases that fold to true or false will have already been handled
5644   // by simplifyICmpInst, so only deal with the tricky case.
5645   if (IsSignedCmp || !IsSignedExt || !isa<ConstantInt>(C))
5646     return nullptr;
5647 
5648   // Is source op positive?
5649   // icmp ult (sext X), C --> icmp sgt X, -1
5650   if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
5651     return new ICmpInst(CmpInst::ICMP_SGT, X, Constant::getAllOnesValue(SrcTy));
5652 
5653   // Is source op negative?
5654   // icmp ugt (sext X), C --> icmp slt X, 0
5655   assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
5656   return new ICmpInst(CmpInst::ICMP_SLT, X, Constant::getNullValue(SrcTy));
5657 }
5658 
5659 /// Handle icmp (cast x), (cast or constant).
5660 Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) {
5661   // If any operand of ICmp is a inttoptr roundtrip cast then remove it as
5662   // icmp compares only pointer's value.
5663   // icmp (inttoptr (ptrtoint p1)), p2 --> icmp p1, p2.
5664   Value *SimplifiedOp0 = simplifyIntToPtrRoundTripCast(ICmp.getOperand(0));
5665   Value *SimplifiedOp1 = simplifyIntToPtrRoundTripCast(ICmp.getOperand(1));
5666   if (SimplifiedOp0 || SimplifiedOp1)
5667     return new ICmpInst(ICmp.getPredicate(),
5668                         SimplifiedOp0 ? SimplifiedOp0 : ICmp.getOperand(0),
5669                         SimplifiedOp1 ? SimplifiedOp1 : ICmp.getOperand(1));
5670 
5671   auto *CastOp0 = dyn_cast<CastInst>(ICmp.getOperand(0));
5672   if (!CastOp0)
5673     return nullptr;
5674   if (!isa<Constant>(ICmp.getOperand(1)) && !isa<CastInst>(ICmp.getOperand(1)))
5675     return nullptr;
5676 
5677   Value *Op0Src = CastOp0->getOperand(0);
5678   Type *SrcTy = CastOp0->getSrcTy();
5679   Type *DestTy = CastOp0->getDestTy();
5680 
5681   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
5682   // integer type is the same size as the pointer type.
5683   auto CompatibleSizes = [&](Type *SrcTy, Type *DestTy) {
5684     if (isa<VectorType>(SrcTy)) {
5685       SrcTy = cast<VectorType>(SrcTy)->getElementType();
5686       DestTy = cast<VectorType>(DestTy)->getElementType();
5687     }
5688     return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth();
5689   };
5690   if (CastOp0->getOpcode() == Instruction::PtrToInt &&
5691       CompatibleSizes(SrcTy, DestTy)) {
5692     Value *NewOp1 = nullptr;
5693     if (auto *PtrToIntOp1 = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {
5694       Value *PtrSrc = PtrToIntOp1->getOperand(0);
5695       if (PtrSrc->getType() == Op0Src->getType())
5696         NewOp1 = PtrToIntOp1->getOperand(0);
5697     } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
5698       NewOp1 = ConstantExpr::getIntToPtr(RHSC, SrcTy);
5699     }
5700 
5701     if (NewOp1)
5702       return new ICmpInst(ICmp.getPredicate(), Op0Src, NewOp1);
5703   }
5704 
5705   if (Instruction *R = foldICmpWithTrunc(ICmp))
5706     return R;
5707 
5708   return foldICmpWithZextOrSext(ICmp);
5709 }
5710 
5711 static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS, bool IsSigned) {
5712   switch (BinaryOp) {
5713     default:
5714       llvm_unreachable("Unsupported binary op");
5715     case Instruction::Add:
5716     case Instruction::Sub:
5717       return match(RHS, m_Zero());
5718     case Instruction::Mul:
5719       return !(RHS->getType()->isIntOrIntVectorTy(1) && IsSigned) &&
5720              match(RHS, m_One());
5721   }
5722 }
5723 
5724 OverflowResult
5725 InstCombinerImpl::computeOverflow(Instruction::BinaryOps BinaryOp,
5726                                   bool IsSigned, Value *LHS, Value *RHS,
5727                                   Instruction *CxtI) const {
5728   switch (BinaryOp) {
5729     default:
5730       llvm_unreachable("Unsupported binary op");
5731     case Instruction::Add:
5732       if (IsSigned)
5733         return computeOverflowForSignedAdd(LHS, RHS, CxtI);
5734       else
5735         return computeOverflowForUnsignedAdd(LHS, RHS, CxtI);
5736     case Instruction::Sub:
5737       if (IsSigned)
5738         return computeOverflowForSignedSub(LHS, RHS, CxtI);
5739       else
5740         return computeOverflowForUnsignedSub(LHS, RHS, CxtI);
5741     case Instruction::Mul:
5742       if (IsSigned)
5743         return computeOverflowForSignedMul(LHS, RHS, CxtI);
5744       else
5745         return computeOverflowForUnsignedMul(LHS, RHS, CxtI);
5746   }
5747 }
5748 
5749 bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp,
5750                                              bool IsSigned, Value *LHS,
5751                                              Value *RHS, Instruction &OrigI,
5752                                              Value *&Result,
5753                                              Constant *&Overflow) {
5754   if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
5755     std::swap(LHS, RHS);
5756 
5757   // If the overflow check was an add followed by a compare, the insertion point
5758   // may be pointing to the compare.  We want to insert the new instructions
5759   // before the add in case there are uses of the add between the add and the
5760   // compare.
5761   Builder.SetInsertPoint(&OrigI);
5762 
5763   Type *OverflowTy = Type::getInt1Ty(LHS->getContext());
5764   if (auto *LHSTy = dyn_cast<VectorType>(LHS->getType()))
5765     OverflowTy = VectorType::get(OverflowTy, LHSTy->getElementCount());
5766 
5767   if (isNeutralValue(BinaryOp, RHS, IsSigned)) {
5768     Result = LHS;
5769     Overflow = ConstantInt::getFalse(OverflowTy);
5770     return true;
5771   }
5772 
5773   switch (computeOverflow(BinaryOp, IsSigned, LHS, RHS, &OrigI)) {
5774     case OverflowResult::MayOverflow:
5775       return false;
5776     case OverflowResult::AlwaysOverflowsLow:
5777     case OverflowResult::AlwaysOverflowsHigh:
5778       Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
5779       Result->takeName(&OrigI);
5780       Overflow = ConstantInt::getTrue(OverflowTy);
5781       return true;
5782     case OverflowResult::NeverOverflows:
5783       Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
5784       Result->takeName(&OrigI);
5785       Overflow = ConstantInt::getFalse(OverflowTy);
5786       if (auto *Inst = dyn_cast<Instruction>(Result)) {
5787         if (IsSigned)
5788           Inst->setHasNoSignedWrap();
5789         else
5790           Inst->setHasNoUnsignedWrap();
5791       }
5792       return true;
5793   }
5794 
5795   llvm_unreachable("Unexpected overflow result");
5796 }
5797 
5798 /// Recognize and process idiom involving test for multiplication
5799 /// overflow.
5800 ///
5801 /// The caller has matched a pattern of the form:
5802 ///   I = cmp u (mul(zext A, zext B), V
5803 /// The function checks if this is a test for overflow and if so replaces
5804 /// multiplication with call to 'mul.with.overflow' intrinsic.
5805 ///
5806 /// \param I Compare instruction.
5807 /// \param MulVal Result of 'mult' instruction.  It is one of the arguments of
5808 ///               the compare instruction.  Must be of integer type.
5809 /// \param OtherVal The other argument of compare instruction.
5810 /// \returns Instruction which must replace the compare instruction, NULL if no
5811 ///          replacement required.
5812 static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
5813                                          const APInt *OtherVal,
5814                                          InstCombinerImpl &IC) {
5815   // Don't bother doing this transformation for pointers, don't do it for
5816   // vectors.
5817   if (!isa<IntegerType>(MulVal->getType()))
5818     return nullptr;
5819 
5820   auto *MulInstr = dyn_cast<Instruction>(MulVal);
5821   if (!MulInstr)
5822     return nullptr;
5823   assert(MulInstr->getOpcode() == Instruction::Mul);
5824 
5825   auto *LHS = cast<ZExtInst>(MulInstr->getOperand(0)),
5826        *RHS = cast<ZExtInst>(MulInstr->getOperand(1));
5827   assert(LHS->getOpcode() == Instruction::ZExt);
5828   assert(RHS->getOpcode() == Instruction::ZExt);
5829   Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
5830 
5831   // Calculate type and width of the result produced by mul.with.overflow.
5832   Type *TyA = A->getType(), *TyB = B->getType();
5833   unsigned WidthA = TyA->getPrimitiveSizeInBits(),
5834            WidthB = TyB->getPrimitiveSizeInBits();
5835   unsigned MulWidth;
5836   Type *MulType;
5837   if (WidthB > WidthA) {
5838     MulWidth = WidthB;
5839     MulType = TyB;
5840   } else {
5841     MulWidth = WidthA;
5842     MulType = TyA;
5843   }
5844 
5845   // In order to replace the original mul with a narrower mul.with.overflow,
5846   // all uses must ignore upper bits of the product.  The number of used low
5847   // bits must be not greater than the width of mul.with.overflow.
5848   if (MulVal->hasNUsesOrMore(2))
5849     for (User *U : MulVal->users()) {
5850       if (U == &I)
5851         continue;
5852       if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
5853         // Check if truncation ignores bits above MulWidth.
5854         unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
5855         if (TruncWidth > MulWidth)
5856           return nullptr;
5857       } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
5858         // Check if AND ignores bits above MulWidth.
5859         if (BO->getOpcode() != Instruction::And)
5860           return nullptr;
5861         if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
5862           const APInt &CVal = CI->getValue();
5863           if (CVal.getBitWidth() - CVal.countl_zero() > MulWidth)
5864             return nullptr;
5865         } else {
5866           // In this case we could have the operand of the binary operation
5867           // being defined in another block, and performing the replacement
5868           // could break the dominance relation.
5869           return nullptr;
5870         }
5871       } else {
5872         // Other uses prohibit this transformation.
5873         return nullptr;
5874       }
5875     }
5876 
5877   // Recognize patterns
5878   switch (I.getPredicate()) {
5879   case ICmpInst::ICMP_UGT: {
5880     // Recognize pattern:
5881     //   mulval = mul(zext A, zext B)
5882     //   cmp ugt mulval, max
5883     APInt MaxVal = APInt::getMaxValue(MulWidth);
5884     MaxVal = MaxVal.zext(OtherVal->getBitWidth());
5885     if (MaxVal.eq(*OtherVal))
5886       break; // Recognized
5887     return nullptr;
5888   }
5889 
5890   case ICmpInst::ICMP_ULT: {
5891     // Recognize pattern:
5892     //   mulval = mul(zext A, zext B)
5893     //   cmp ule mulval, max + 1
5894     APInt MaxVal = APInt::getOneBitSet(OtherVal->getBitWidth(), MulWidth);
5895     if (MaxVal.eq(*OtherVal))
5896       break; // Recognized
5897     return nullptr;
5898   }
5899 
5900   default:
5901     return nullptr;
5902   }
5903 
5904   InstCombiner::BuilderTy &Builder = IC.Builder;
5905   Builder.SetInsertPoint(MulInstr);
5906 
5907   // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
5908   Value *MulA = A, *MulB = B;
5909   if (WidthA < MulWidth)
5910     MulA = Builder.CreateZExt(A, MulType);
5911   if (WidthB < MulWidth)
5912     MulB = Builder.CreateZExt(B, MulType);
5913   Function *F = Intrinsic::getDeclaration(
5914       I.getModule(), Intrinsic::umul_with_overflow, MulType);
5915   CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
5916   IC.addToWorklist(MulInstr);
5917 
5918   // If there are uses of mul result other than the comparison, we know that
5919   // they are truncation or binary AND. Change them to use result of
5920   // mul.with.overflow and adjust properly mask/size.
5921   if (MulVal->hasNUsesOrMore(2)) {
5922     Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
5923     for (User *U : make_early_inc_range(MulVal->users())) {
5924       if (U == &I)
5925         continue;
5926       if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
5927         if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
5928           IC.replaceInstUsesWith(*TI, Mul);
5929         else
5930           TI->setOperand(0, Mul);
5931       } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
5932         assert(BO->getOpcode() == Instruction::And);
5933         // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
5934         ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
5935         APInt ShortMask = CI->getValue().trunc(MulWidth);
5936         Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
5937         Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType());
5938         IC.replaceInstUsesWith(*BO, Zext);
5939       } else {
5940         llvm_unreachable("Unexpected Binary operation");
5941       }
5942       IC.addToWorklist(cast<Instruction>(U));
5943     }
5944   }
5945 
5946   // The original icmp gets replaced with the overflow value, maybe inverted
5947   // depending on predicate.
5948   if (I.getPredicate() == ICmpInst::ICMP_ULT) {
5949     Value *Res = Builder.CreateExtractValue(Call, 1);
5950     return BinaryOperator::CreateNot(Res);
5951   }
5952 
5953   return ExtractValueInst::Create(Call, 1);
5954 }
5955 
5956 /// When performing a comparison against a constant, it is possible that not all
5957 /// the bits in the LHS are demanded. This helper method computes the mask that
5958 /// IS demanded.
5959 static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
5960   const APInt *RHS;
5961   if (!match(I.getOperand(1), m_APInt(RHS)))
5962     return APInt::getAllOnes(BitWidth);
5963 
5964   // If this is a normal comparison, it demands all bits. If it is a sign bit
5965   // comparison, it only demands the sign bit.
5966   bool UnusedBit;
5967   if (InstCombiner::isSignBitCheck(I.getPredicate(), *RHS, UnusedBit))
5968     return APInt::getSignMask(BitWidth);
5969 
5970   switch (I.getPredicate()) {
5971   // For a UGT comparison, we don't care about any bits that
5972   // correspond to the trailing ones of the comparand.  The value of these
5973   // bits doesn't impact the outcome of the comparison, because any value
5974   // greater than the RHS must differ in a bit higher than these due to carry.
5975   case ICmpInst::ICMP_UGT:
5976     return APInt::getBitsSetFrom(BitWidth, RHS->countr_one());
5977 
5978   // Similarly, for a ULT comparison, we don't care about the trailing zeros.
5979   // Any value less than the RHS must differ in a higher bit because of carries.
5980   case ICmpInst::ICMP_ULT:
5981     return APInt::getBitsSetFrom(BitWidth, RHS->countr_zero());
5982 
5983   default:
5984     return APInt::getAllOnes(BitWidth);
5985   }
5986 }
5987 
5988 /// Check that one use is in the same block as the definition and all
5989 /// other uses are in blocks dominated by a given block.
5990 ///
5991 /// \param DI Definition
5992 /// \param UI Use
5993 /// \param DB Block that must dominate all uses of \p DI outside
5994 ///           the parent block
5995 /// \return true when \p UI is the only use of \p DI in the parent block
5996 /// and all other uses of \p DI are in blocks dominated by \p DB.
5997 ///
5998 bool InstCombinerImpl::dominatesAllUses(const Instruction *DI,
5999                                         const Instruction *UI,
6000                                         const BasicBlock *DB) const {
6001   assert(DI && UI && "Instruction not defined\n");
6002   // Ignore incomplete definitions.
6003   if (!DI->getParent())
6004     return false;
6005   // DI and UI must be in the same block.
6006   if (DI->getParent() != UI->getParent())
6007     return false;
6008   // Protect from self-referencing blocks.
6009   if (DI->getParent() == DB)
6010     return false;
6011   for (const User *U : DI->users()) {
6012     auto *Usr = cast<Instruction>(U);
6013     if (Usr != UI && !DT.dominates(DB, Usr->getParent()))
6014       return false;
6015   }
6016   return true;
6017 }
6018 
6019 /// Return true when the instruction sequence within a block is select-cmp-br.
6020 static bool isChainSelectCmpBranch(const SelectInst *SI) {
6021   const BasicBlock *BB = SI->getParent();
6022   if (!BB)
6023     return false;
6024   auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator());
6025   if (!BI || BI->getNumSuccessors() != 2)
6026     return false;
6027   auto *IC = dyn_cast<ICmpInst>(BI->getCondition());
6028   if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI))
6029     return false;
6030   return true;
6031 }
6032 
6033 /// True when a select result is replaced by one of its operands
6034 /// in select-icmp sequence. This will eventually result in the elimination
6035 /// of the select.
6036 ///
6037 /// \param SI    Select instruction
6038 /// \param Icmp  Compare instruction
6039 /// \param SIOpd Operand that replaces the select
6040 ///
6041 /// Notes:
6042 /// - The replacement is global and requires dominator information
6043 /// - The caller is responsible for the actual replacement
6044 ///
6045 /// Example:
6046 ///
6047 /// entry:
6048 ///  %4 = select i1 %3, %C* %0, %C* null
6049 ///  %5 = icmp eq %C* %4, null
6050 ///  br i1 %5, label %9, label %7
6051 ///  ...
6052 ///  ; <label>:7                                       ; preds = %entry
6053 ///  %8 = getelementptr inbounds %C* %4, i64 0, i32 0
6054 ///  ...
6055 ///
6056 /// can be transformed to
6057 ///
6058 ///  %5 = icmp eq %C* %0, null
6059 ///  %6 = select i1 %3, i1 %5, i1 true
6060 ///  br i1 %6, label %9, label %7
6061 ///  ...
6062 ///  ; <label>:7                                       ; preds = %entry
6063 ///  %8 = getelementptr inbounds %C* %0, i64 0, i32 0  // replace by %0!
6064 ///
6065 /// Similar when the first operand of the select is a constant or/and
6066 /// the compare is for not equal rather than equal.
6067 ///
6068 /// NOTE: The function is only called when the select and compare constants
6069 /// are equal, the optimization can work only for EQ predicates. This is not a
6070 /// major restriction since a NE compare should be 'normalized' to an equal
6071 /// compare, which usually happens in the combiner and test case
6072 /// select-cmp-br.ll checks for it.
6073 bool InstCombinerImpl::replacedSelectWithOperand(SelectInst *SI,
6074                                                  const ICmpInst *Icmp,
6075                                                  const unsigned SIOpd) {
6076   assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!");
6077   if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
6078     BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
6079     // The check for the single predecessor is not the best that can be
6080     // done. But it protects efficiently against cases like when SI's
6081     // home block has two successors, Succ and Succ1, and Succ1 predecessor
6082     // of Succ. Then SI can't be replaced by SIOpd because the use that gets
6083     // replaced can be reached on either path. So the uniqueness check
6084     // guarantees that the path all uses of SI (outside SI's parent) are on
6085     // is disjoint from all other paths out of SI. But that information
6086     // is more expensive to compute, and the trade-off here is in favor
6087     // of compile-time. It should also be noticed that we check for a single
6088     // predecessor and not only uniqueness. This to handle the situation when
6089     // Succ and Succ1 points to the same basic block.
6090     if (Succ->getSinglePredecessor() && dominatesAllUses(SI, Icmp, Succ)) {
6091       NumSel++;
6092       SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent());
6093       return true;
6094     }
6095   }
6096   return false;
6097 }
6098 
6099 /// Try to fold the comparison based on range information we can get by checking
6100 /// whether bits are known to be zero or one in the inputs.
6101 Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
6102   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
6103   Type *Ty = Op0->getType();
6104   ICmpInst::Predicate Pred = I.getPredicate();
6105 
6106   // Get scalar or pointer size.
6107   unsigned BitWidth = Ty->isIntOrIntVectorTy()
6108                           ? Ty->getScalarSizeInBits()
6109                           : DL.getPointerTypeSizeInBits(Ty->getScalarType());
6110 
6111   if (!BitWidth)
6112     return nullptr;
6113 
6114   KnownBits Op0Known(BitWidth);
6115   KnownBits Op1Known(BitWidth);
6116 
6117   {
6118     // Don't use dominating conditions when folding icmp using known bits. This
6119     // may convert signed into unsigned predicates in ways that other passes
6120     // (especially IndVarSimplify) may not be able to reliably undo.
6121     SQ.DC = nullptr;
6122     auto _ = make_scope_exit([&]() { SQ.DC = &DC; });
6123     if (SimplifyDemandedBits(&I, 0, getDemandedBitsLHSMask(I, BitWidth),
6124                              Op0Known, 0))
6125       return &I;
6126 
6127     if (SimplifyDemandedBits(&I, 1, APInt::getAllOnes(BitWidth), Op1Known, 0))
6128       return &I;
6129   }
6130 
6131   // Given the known and unknown bits, compute a range that the LHS could be
6132   // in.  Compute the Min, Max and RHS values based on the known bits. For the
6133   // EQ and NE we use unsigned values.
6134   APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
6135   APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
6136   if (I.isSigned()) {
6137     Op0Min = Op0Known.getSignedMinValue();
6138     Op0Max = Op0Known.getSignedMaxValue();
6139     Op1Min = Op1Known.getSignedMinValue();
6140     Op1Max = Op1Known.getSignedMaxValue();
6141   } else {
6142     Op0Min = Op0Known.getMinValue();
6143     Op0Max = Op0Known.getMaxValue();
6144     Op1Min = Op1Known.getMinValue();
6145     Op1Max = Op1Known.getMaxValue();
6146   }
6147 
6148   // If Min and Max are known to be the same, then SimplifyDemandedBits figured
6149   // out that the LHS or RHS is a constant. Constant fold this now, so that
6150   // code below can assume that Min != Max.
6151   if (!isa<Constant>(Op0) && Op0Min == Op0Max)
6152     return new ICmpInst(Pred, ConstantExpr::getIntegerValue(Ty, Op0Min), Op1);
6153   if (!isa<Constant>(Op1) && Op1Min == Op1Max)
6154     return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min));
6155 
6156   // Don't break up a clamp pattern -- (min(max X, Y), Z) -- by replacing a
6157   // min/max canonical compare with some other compare. That could lead to
6158   // conflict with select canonicalization and infinite looping.
6159   // FIXME: This constraint may go away if min/max intrinsics are canonical.
6160   auto isMinMaxCmp = [&](Instruction &Cmp) {
6161     if (!Cmp.hasOneUse())
6162       return false;
6163     Value *A, *B;
6164     SelectPatternFlavor SPF = matchSelectPattern(Cmp.user_back(), A, B).Flavor;
6165     if (!SelectPatternResult::isMinOrMax(SPF))
6166       return false;
6167     return match(Op0, m_MaxOrMin(m_Value(), m_Value())) ||
6168            match(Op1, m_MaxOrMin(m_Value(), m_Value()));
6169   };
6170   if (!isMinMaxCmp(I)) {
6171     switch (Pred) {
6172     default:
6173       break;
6174     case ICmpInst::ICMP_ULT: {
6175       if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
6176         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
6177       const APInt *CmpC;
6178       if (match(Op1, m_APInt(CmpC))) {
6179         // A <u C -> A == C-1 if min(A)+1 == C
6180         if (*CmpC == Op0Min + 1)
6181           return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
6182                               ConstantInt::get(Op1->getType(), *CmpC - 1));
6183         // X <u C --> X == 0, if the number of zero bits in the bottom of X
6184         // exceeds the log2 of C.
6185         if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
6186           return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
6187                               Constant::getNullValue(Op1->getType()));
6188       }
6189       break;
6190     }
6191     case ICmpInst::ICMP_UGT: {
6192       if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
6193         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
6194       const APInt *CmpC;
6195       if (match(Op1, m_APInt(CmpC))) {
6196         // A >u C -> A == C+1 if max(a)-1 == C
6197         if (*CmpC == Op0Max - 1)
6198           return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
6199                               ConstantInt::get(Op1->getType(), *CmpC + 1));
6200         // X >u C --> X != 0, if the number of zero bits in the bottom of X
6201         // exceeds the log2 of C.
6202         if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
6203           return new ICmpInst(ICmpInst::ICMP_NE, Op0,
6204                               Constant::getNullValue(Op1->getType()));
6205       }
6206       break;
6207     }
6208     case ICmpInst::ICMP_SLT: {
6209       if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
6210         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
6211       const APInt *CmpC;
6212       if (match(Op1, m_APInt(CmpC))) {
6213         if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
6214           return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
6215                               ConstantInt::get(Op1->getType(), *CmpC - 1));
6216       }
6217       break;
6218     }
6219     case ICmpInst::ICMP_SGT: {
6220       if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
6221         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
6222       const APInt *CmpC;
6223       if (match(Op1, m_APInt(CmpC))) {
6224         if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
6225           return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
6226                               ConstantInt::get(Op1->getType(), *CmpC + 1));
6227       }
6228       break;
6229     }
6230     }
6231   }
6232 
6233   // Based on the range information we know about the LHS, see if we can
6234   // simplify this comparison.  For example, (x&4) < 8 is always true.
6235   switch (Pred) {
6236   default:
6237     llvm_unreachable("Unknown icmp opcode!");
6238   case ICmpInst::ICMP_EQ:
6239   case ICmpInst::ICMP_NE: {
6240     if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
6241       return replaceInstUsesWith(
6242           I, ConstantInt::getBool(I.getType(), Pred == CmpInst::ICMP_NE));
6243 
6244     // If all bits are known zero except for one, then we know at most one bit
6245     // is set. If the comparison is against zero, then this is a check to see if
6246     // *that* bit is set.
6247     APInt Op0KnownZeroInverted = ~Op0Known.Zero;
6248     if (Op1Known.isZero()) {
6249       // If the LHS is an AND with the same constant, look through it.
6250       Value *LHS = nullptr;
6251       const APInt *LHSC;
6252       if (!match(Op0, m_And(m_Value(LHS), m_APInt(LHSC))) ||
6253           *LHSC != Op0KnownZeroInverted)
6254         LHS = Op0;
6255 
6256       Value *X;
6257       const APInt *C1;
6258       if (match(LHS, m_Shl(m_Power2(C1), m_Value(X)))) {
6259         Type *XTy = X->getType();
6260         unsigned Log2C1 = C1->countr_zero();
6261         APInt C2 = Op0KnownZeroInverted;
6262         APInt C2Pow2 = (C2 & ~(*C1 - 1)) + *C1;
6263         if (C2Pow2.isPowerOf2()) {
6264           // iff (C1 is pow2) & ((C2 & ~(C1-1)) + C1) is pow2):
6265           // ((C1 << X) & C2) == 0 -> X >= (Log2(C2+C1) - Log2(C1))
6266           // ((C1 << X) & C2) != 0 -> X  < (Log2(C2+C1) - Log2(C1))
6267           unsigned Log2C2 = C2Pow2.countr_zero();
6268           auto *CmpC = ConstantInt::get(XTy, Log2C2 - Log2C1);
6269           auto NewPred =
6270               Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT;
6271           return new ICmpInst(NewPred, X, CmpC);
6272         }
6273       }
6274     }
6275 
6276     // Op0 eq C_Pow2 -> Op0 ne 0 if Op0 is known to be C_Pow2 or zero.
6277     if (Op1Known.isConstant() && Op1Known.getConstant().isPowerOf2() &&
6278         (Op0Known & Op1Known) == Op0Known)
6279       return new ICmpInst(CmpInst::getInversePredicate(Pred), Op0,
6280                           ConstantInt::getNullValue(Op1->getType()));
6281     break;
6282   }
6283   case ICmpInst::ICMP_ULT: {
6284     if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
6285       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6286     if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
6287       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6288     break;
6289   }
6290   case ICmpInst::ICMP_UGT: {
6291     if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
6292       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6293     if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
6294       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6295     break;
6296   }
6297   case ICmpInst::ICMP_SLT: {
6298     if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
6299       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6300     if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
6301       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6302     break;
6303   }
6304   case ICmpInst::ICMP_SGT: {
6305     if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
6306       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6307     if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
6308       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6309     break;
6310   }
6311   case ICmpInst::ICMP_SGE:
6312     assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
6313     if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
6314       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6315     if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
6316       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6317     if (Op1Min == Op0Max) // A >=s B -> A == B if max(A) == min(B)
6318       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
6319     break;
6320   case ICmpInst::ICMP_SLE:
6321     assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
6322     if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
6323       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6324     if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
6325       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6326     if (Op1Max == Op0Min) // A <=s B -> A == B if min(A) == max(B)
6327       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
6328     break;
6329   case ICmpInst::ICMP_UGE:
6330     assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
6331     if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
6332       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6333     if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
6334       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6335     if (Op1Min == Op0Max) // A >=u B -> A == B if max(A) == min(B)
6336       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
6337     break;
6338   case ICmpInst::ICMP_ULE:
6339     assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
6340     if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
6341       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6342     if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
6343       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6344     if (Op1Max == Op0Min) // A <=u B -> A == B if min(A) == max(B)
6345       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
6346     break;
6347   }
6348 
6349   // Turn a signed comparison into an unsigned one if both operands are known to
6350   // have the same sign.
6351   if (I.isSigned() &&
6352       ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) ||
6353        (Op0Known.One.isNegative() && Op1Known.One.isNegative())))
6354     return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
6355 
6356   return nullptr;
6357 }
6358 
6359 /// If one operand of an icmp is effectively a bool (value range of {0,1}),
6360 /// then try to reduce patterns based on that limit.
6361 Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) {
6362   Value *X, *Y;
6363   ICmpInst::Predicate Pred;
6364 
6365   // X must be 0 and bool must be true for "ULT":
6366   // X <u (zext i1 Y) --> (X == 0) & Y
6367   if (match(&I, m_c_ICmp(Pred, m_Value(X), m_OneUse(m_ZExt(m_Value(Y))))) &&
6368       Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULT)
6369     return BinaryOperator::CreateAnd(Builder.CreateIsNull(X), Y);
6370 
6371   // X must be 0 or bool must be true for "ULE":
6372   // X <=u (sext i1 Y) --> (X == 0) | Y
6373   if (match(&I, m_c_ICmp(Pred, m_Value(X), m_OneUse(m_SExt(m_Value(Y))))) &&
6374       Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE)
6375     return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y);
6376 
6377   // icmp eq/ne X, (zext/sext (icmp eq/ne X, C))
6378   ICmpInst::Predicate Pred1, Pred2;
6379   const APInt *C;
6380   Instruction *ExtI;
6381   if (match(&I, m_c_ICmp(Pred1, m_Value(X),
6382                          m_CombineAnd(m_Instruction(ExtI),
6383                                       m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X),
6384                                                           m_APInt(C)))))) &&
6385       ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) {
6386     bool IsSExt = ExtI->getOpcode() == Instruction::SExt;
6387     bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse();
6388     auto CreateRangeCheck = [&] {
6389       Value *CmpV1 =
6390           Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType()));
6391       Value *CmpV2 = Builder.CreateICmp(
6392           Pred1, X, ConstantInt::getSigned(X->getType(), IsSExt ? -1 : 1));
6393       return BinaryOperator::Create(
6394           Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And,
6395           CmpV1, CmpV2);
6396     };
6397     if (C->isZero()) {
6398       if (Pred2 == ICmpInst::ICMP_EQ) {
6399         // icmp eq X, (zext/sext (icmp eq X, 0)) --> false
6400         // icmp ne X, (zext/sext (icmp eq X, 0)) --> true
6401         return replaceInstUsesWith(
6402             I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE));
6403       } else if (!IsSExt || HasOneUse) {
6404         // icmp eq X, (zext (icmp ne X, 0)) --> X == 0 || X == 1
6405         // icmp ne X, (zext (icmp ne X, 0)) --> X != 0 && X != 1
6406         // icmp eq X, (sext (icmp ne X, 0)) --> X == 0 || X == -1
6407         // icmp ne X, (sext (icmp ne X, 0)) --> X != 0 && X == -1
6408         return CreateRangeCheck();
6409       }
6410     } else if (IsSExt ? C->isAllOnes() : C->isOne()) {
6411       if (Pred2 == ICmpInst::ICMP_NE) {
6412         // icmp eq X, (zext (icmp ne X, 1)) --> false
6413         // icmp ne X, (zext (icmp ne X, 1)) --> true
6414         // icmp eq X, (sext (icmp ne X, -1)) --> false
6415         // icmp ne X, (sext (icmp ne X, -1)) --> true
6416         return replaceInstUsesWith(
6417             I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE));
6418       } else if (!IsSExt || HasOneUse) {
6419         // icmp eq X, (zext (icmp eq X, 1)) --> X == 0 || X == 1
6420         // icmp ne X, (zext (icmp eq X, 1)) --> X != 0 && X != 1
6421         // icmp eq X, (sext (icmp eq X, -1)) --> X == 0 || X == -1
6422         // icmp ne X, (sext (icmp eq X, -1)) --> X != 0 && X == -1
6423         return CreateRangeCheck();
6424       }
6425     } else {
6426       // when C != 0 && C != 1:
6427       //   icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0
6428       //   icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1
6429       //   icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0
6430       //   icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1
6431       // when C != 0 && C != -1:
6432       //   icmp eq X, (sext (icmp eq X, C)) --> icmp eq X, 0
6433       //   icmp eq X, (sext (icmp ne X, C)) --> icmp eq X, -1
6434       //   icmp ne X, (sext (icmp eq X, C)) --> icmp ne X, 0
6435       //   icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1
6436       return ICmpInst::Create(
6437           Instruction::ICmp, Pred1, X,
6438           ConstantInt::getSigned(X->getType(), Pred2 == ICmpInst::ICMP_NE
6439                                                    ? (IsSExt ? -1 : 1)
6440                                                    : 0));
6441     }
6442   }
6443 
6444   return nullptr;
6445 }
6446 
6447 std::optional<std::pair<CmpInst::Predicate, Constant *>>
6448 InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
6449                                                        Constant *C) {
6450   assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) &&
6451          "Only for relational integer predicates.");
6452 
6453   Type *Type = C->getType();
6454   bool IsSigned = ICmpInst::isSigned(Pred);
6455 
6456   CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred);
6457   bool WillIncrement =
6458       UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT;
6459 
6460   // Check if the constant operand can be safely incremented/decremented
6461   // without overflowing/underflowing.
6462   auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) {
6463     return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned);
6464   };
6465 
6466   Constant *SafeReplacementConstant = nullptr;
6467   if (auto *CI = dyn_cast<ConstantInt>(C)) {
6468     // Bail out if the constant can't be safely incremented/decremented.
6469     if (!ConstantIsOk(CI))
6470       return std::nullopt;
6471   } else if (auto *FVTy = dyn_cast<FixedVectorType>(Type)) {
6472     unsigned NumElts = FVTy->getNumElements();
6473     for (unsigned i = 0; i != NumElts; ++i) {
6474       Constant *Elt = C->getAggregateElement(i);
6475       if (!Elt)
6476         return std::nullopt;
6477 
6478       if (isa<UndefValue>(Elt))
6479         continue;
6480 
6481       // Bail out if we can't determine if this constant is min/max or if we
6482       // know that this constant is min/max.
6483       auto *CI = dyn_cast<ConstantInt>(Elt);
6484       if (!CI || !ConstantIsOk(CI))
6485         return std::nullopt;
6486 
6487       if (!SafeReplacementConstant)
6488         SafeReplacementConstant = CI;
6489     }
6490   } else {
6491     // ConstantExpr?
6492     return std::nullopt;
6493   }
6494 
6495   // It may not be safe to change a compare predicate in the presence of
6496   // undefined elements, so replace those elements with the first safe constant
6497   // that we found.
6498   // TODO: in case of poison, it is safe; let's replace undefs only.
6499   if (C->containsUndefOrPoisonElement()) {
6500     assert(SafeReplacementConstant && "Replacement constant not set");
6501     C = Constant::replaceUndefsWith(C, SafeReplacementConstant);
6502   }
6503 
6504   CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred);
6505 
6506   // Increment or decrement the constant.
6507   Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 1 : -1, true);
6508   Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne);
6509 
6510   return std::make_pair(NewPred, NewC);
6511 }
6512 
6513 /// If we have an icmp le or icmp ge instruction with a constant operand, turn
6514 /// it into the appropriate icmp lt or icmp gt instruction. This transform
6515 /// allows them to be folded in visitICmpInst.
6516 static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
6517   ICmpInst::Predicate Pred = I.getPredicate();
6518   if (ICmpInst::isEquality(Pred) || !ICmpInst::isIntPredicate(Pred) ||
6519       InstCombiner::isCanonicalPredicate(Pred))
6520     return nullptr;
6521 
6522   Value *Op0 = I.getOperand(0);
6523   Value *Op1 = I.getOperand(1);
6524   auto *Op1C = dyn_cast<Constant>(Op1);
6525   if (!Op1C)
6526     return nullptr;
6527 
6528   auto FlippedStrictness =
6529       InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, Op1C);
6530   if (!FlippedStrictness)
6531     return nullptr;
6532 
6533   return new ICmpInst(FlippedStrictness->first, Op0, FlippedStrictness->second);
6534 }
6535 
6536 /// If we have a comparison with a non-canonical predicate, if we can update
6537 /// all the users, invert the predicate and adjust all the users.
6538 CmpInst *InstCombinerImpl::canonicalizeICmpPredicate(CmpInst &I) {
6539   // Is the predicate already canonical?
6540   CmpInst::Predicate Pred = I.getPredicate();
6541   if (InstCombiner::isCanonicalPredicate(Pred))
6542     return nullptr;
6543 
6544   // Can all users be adjusted to predicate inversion?
6545   if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr))
6546     return nullptr;
6547 
6548   // Ok, we can canonicalize comparison!
6549   // Let's first invert the comparison's predicate.
6550   I.setPredicate(CmpInst::getInversePredicate(Pred));
6551   I.setName(I.getName() + ".not");
6552 
6553   // And, adapt users.
6554   freelyInvertAllUsersOf(&I);
6555 
6556   return &I;
6557 }
6558 
6559 /// Integer compare with boolean values can always be turned into bitwise ops.
6560 static Instruction *canonicalizeICmpBool(ICmpInst &I,
6561                                          InstCombiner::BuilderTy &Builder) {
6562   Value *A = I.getOperand(0), *B = I.getOperand(1);
6563   assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only");
6564 
6565   // A boolean compared to true/false can be simplified to Op0/true/false in
6566   // 14 out of the 20 (10 predicates * 2 constants) possible combinations.
6567   // Cases not handled by InstSimplify are always 'not' of Op0.
6568   if (match(B, m_Zero())) {
6569     switch (I.getPredicate()) {
6570       case CmpInst::ICMP_EQ:  // A ==   0 -> !A
6571       case CmpInst::ICMP_ULE: // A <=u  0 -> !A
6572       case CmpInst::ICMP_SGE: // A >=s  0 -> !A
6573         return BinaryOperator::CreateNot(A);
6574       default:
6575         llvm_unreachable("ICmp i1 X, C not simplified as expected.");
6576     }
6577   } else if (match(B, m_One())) {
6578     switch (I.getPredicate()) {
6579       case CmpInst::ICMP_NE:  // A !=  1 -> !A
6580       case CmpInst::ICMP_ULT: // A <u  1 -> !A
6581       case CmpInst::ICMP_SGT: // A >s -1 -> !A
6582         return BinaryOperator::CreateNot(A);
6583       default:
6584         llvm_unreachable("ICmp i1 X, C not simplified as expected.");
6585     }
6586   }
6587 
6588   switch (I.getPredicate()) {
6589   default:
6590     llvm_unreachable("Invalid icmp instruction!");
6591   case ICmpInst::ICMP_EQ:
6592     // icmp eq i1 A, B -> ~(A ^ B)
6593     return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
6594 
6595   case ICmpInst::ICMP_NE:
6596     // icmp ne i1 A, B -> A ^ B
6597     return BinaryOperator::CreateXor(A, B);
6598 
6599   case ICmpInst::ICMP_UGT:
6600     // icmp ugt -> icmp ult
6601     std::swap(A, B);
6602     [[fallthrough]];
6603   case ICmpInst::ICMP_ULT:
6604     // icmp ult i1 A, B -> ~A & B
6605     return BinaryOperator::CreateAnd(Builder.CreateNot(A), B);
6606 
6607   case ICmpInst::ICMP_SGT:
6608     // icmp sgt -> icmp slt
6609     std::swap(A, B);
6610     [[fallthrough]];
6611   case ICmpInst::ICMP_SLT:
6612     // icmp slt i1 A, B -> A & ~B
6613     return BinaryOperator::CreateAnd(Builder.CreateNot(B), A);
6614 
6615   case ICmpInst::ICMP_UGE:
6616     // icmp uge -> icmp ule
6617     std::swap(A, B);
6618     [[fallthrough]];
6619   case ICmpInst::ICMP_ULE:
6620     // icmp ule i1 A, B -> ~A | B
6621     return BinaryOperator::CreateOr(Builder.CreateNot(A), B);
6622 
6623   case ICmpInst::ICMP_SGE:
6624     // icmp sge -> icmp sle
6625     std::swap(A, B);
6626     [[fallthrough]];
6627   case ICmpInst::ICMP_SLE:
6628     // icmp sle i1 A, B -> A | ~B
6629     return BinaryOperator::CreateOr(Builder.CreateNot(B), A);
6630   }
6631 }
6632 
6633 // Transform pattern like:
6634 //   (1 << Y) u<= X  or  ~(-1 << Y) u<  X  or  ((1 << Y)+(-1)) u<  X
6635 //   (1 << Y) u>  X  or  ~(-1 << Y) u>= X  or  ((1 << Y)+(-1)) u>= X
6636 // Into:
6637 //   (X l>> Y) != 0
6638 //   (X l>> Y) == 0
6639 static Instruction *foldICmpWithHighBitMask(ICmpInst &Cmp,
6640                                             InstCombiner::BuilderTy &Builder) {
6641   ICmpInst::Predicate Pred, NewPred;
6642   Value *X, *Y;
6643   if (match(&Cmp,
6644             m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) {
6645     switch (Pred) {
6646     case ICmpInst::ICMP_ULE:
6647       NewPred = ICmpInst::ICMP_NE;
6648       break;
6649     case ICmpInst::ICMP_UGT:
6650       NewPred = ICmpInst::ICMP_EQ;
6651       break;
6652     default:
6653       return nullptr;
6654     }
6655   } else if (match(&Cmp, m_c_ICmp(Pred,
6656                                   m_OneUse(m_CombineOr(
6657                                       m_Not(m_Shl(m_AllOnes(), m_Value(Y))),
6658                                       m_Add(m_Shl(m_One(), m_Value(Y)),
6659                                             m_AllOnes()))),
6660                                   m_Value(X)))) {
6661     // The variant with 'add' is not canonical, (the variant with 'not' is)
6662     // we only get it because it has extra uses, and can't be canonicalized,
6663 
6664     switch (Pred) {
6665     case ICmpInst::ICMP_ULT:
6666       NewPred = ICmpInst::ICMP_NE;
6667       break;
6668     case ICmpInst::ICMP_UGE:
6669       NewPred = ICmpInst::ICMP_EQ;
6670       break;
6671     default:
6672       return nullptr;
6673     }
6674   } else
6675     return nullptr;
6676 
6677   Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits");
6678   Constant *Zero = Constant::getNullValue(NewX->getType());
6679   return CmpInst::Create(Instruction::ICmp, NewPred, NewX, Zero);
6680 }
6681 
6682 static Instruction *foldVectorCmp(CmpInst &Cmp,
6683                                   InstCombiner::BuilderTy &Builder) {
6684   const CmpInst::Predicate Pred = Cmp.getPredicate();
6685   Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1);
6686   Value *V1, *V2;
6687 
6688   auto createCmpReverse = [&](CmpInst::Predicate Pred, Value *X, Value *Y) {
6689     Value *V = Builder.CreateCmp(Pred, X, Y, Cmp.getName());
6690     if (auto *I = dyn_cast<Instruction>(V))
6691       I->copyIRFlags(&Cmp);
6692     Module *M = Cmp.getModule();
6693     Function *F = Intrinsic::getDeclaration(
6694         M, Intrinsic::experimental_vector_reverse, V->getType());
6695     return CallInst::Create(F, V);
6696   };
6697 
6698   if (match(LHS, m_VecReverse(m_Value(V1)))) {
6699     // cmp Pred, rev(V1), rev(V2) --> rev(cmp Pred, V1, V2)
6700     if (match(RHS, m_VecReverse(m_Value(V2))) &&
6701         (LHS->hasOneUse() || RHS->hasOneUse()))
6702       return createCmpReverse(Pred, V1, V2);
6703 
6704     // cmp Pred, rev(V1), RHSSplat --> rev(cmp Pred, V1, RHSSplat)
6705     if (LHS->hasOneUse() && isSplatValue(RHS))
6706       return createCmpReverse(Pred, V1, RHS);
6707   }
6708   // cmp Pred, LHSSplat, rev(V2) --> rev(cmp Pred, LHSSplat, V2)
6709   else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
6710     return createCmpReverse(Pred, LHS, V2);
6711 
6712   ArrayRef<int> M;
6713   if (!match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(M))))
6714     return nullptr;
6715 
6716   // If both arguments of the cmp are shuffles that use the same mask and
6717   // shuffle within a single vector, move the shuffle after the cmp:
6718   // cmp (shuffle V1, M), (shuffle V2, M) --> shuffle (cmp V1, V2), M
6719   Type *V1Ty = V1->getType();
6720   if (match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(M))) &&
6721       V1Ty == V2->getType() && (LHS->hasOneUse() || RHS->hasOneUse())) {
6722     Value *NewCmp = Builder.CreateCmp(Pred, V1, V2);
6723     return new ShuffleVectorInst(NewCmp, M);
6724   }
6725 
6726   // Try to canonicalize compare with splatted operand and splat constant.
6727   // TODO: We could generalize this for more than splats. See/use the code in
6728   //       InstCombiner::foldVectorBinop().
6729   Constant *C;
6730   if (!LHS->hasOneUse() || !match(RHS, m_Constant(C)))
6731     return nullptr;
6732 
6733   // Length-changing splats are ok, so adjust the constants as needed:
6734   // cmp (shuffle V1, M), C --> shuffle (cmp V1, C'), M
6735   Constant *ScalarC = C->getSplatValue(/* AllowUndefs */ true);
6736   int MaskSplatIndex;
6737   if (ScalarC && match(M, m_SplatOrUndefMask(MaskSplatIndex))) {
6738     // We allow undefs in matching, but this transform removes those for safety.
6739     // Demanded elements analysis should be able to recover some/all of that.
6740     C = ConstantVector::getSplat(cast<VectorType>(V1Ty)->getElementCount(),
6741                                  ScalarC);
6742     SmallVector<int, 8> NewM(M.size(), MaskSplatIndex);
6743     Value *NewCmp = Builder.CreateCmp(Pred, V1, C);
6744     return new ShuffleVectorInst(NewCmp, NewM);
6745   }
6746 
6747   return nullptr;
6748 }
6749 
6750 // extract(uadd.with.overflow(A, B), 0) ult A
6751 //  -> extract(uadd.with.overflow(A, B), 1)
6752 static Instruction *foldICmpOfUAddOv(ICmpInst &I) {
6753   CmpInst::Predicate Pred = I.getPredicate();
6754   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
6755 
6756   Value *UAddOv;
6757   Value *A, *B;
6758   auto UAddOvResultPat = m_ExtractValue<0>(
6759       m_Intrinsic<Intrinsic::uadd_with_overflow>(m_Value(A), m_Value(B)));
6760   if (match(Op0, UAddOvResultPat) &&
6761       ((Pred == ICmpInst::ICMP_ULT && (Op1 == A || Op1 == B)) ||
6762        (Pred == ICmpInst::ICMP_EQ && match(Op1, m_ZeroInt()) &&
6763         (match(A, m_One()) || match(B, m_One()))) ||
6764        (Pred == ICmpInst::ICMP_NE && match(Op1, m_AllOnes()) &&
6765         (match(A, m_AllOnes()) || match(B, m_AllOnes())))))
6766     // extract(uadd.with.overflow(A, B), 0) < A
6767     // extract(uadd.with.overflow(A, 1), 0) == 0
6768     // extract(uadd.with.overflow(A, -1), 0) != -1
6769     UAddOv = cast<ExtractValueInst>(Op0)->getAggregateOperand();
6770   else if (match(Op1, UAddOvResultPat) &&
6771            Pred == ICmpInst::ICMP_UGT && (Op0 == A || Op0 == B))
6772     // A > extract(uadd.with.overflow(A, B), 0)
6773     UAddOv = cast<ExtractValueInst>(Op1)->getAggregateOperand();
6774   else
6775     return nullptr;
6776 
6777   return ExtractValueInst::Create(UAddOv, 1);
6778 }
6779 
6780 static Instruction *foldICmpInvariantGroup(ICmpInst &I) {
6781   if (!I.getOperand(0)->getType()->isPointerTy() ||
6782       NullPointerIsDefined(
6783           I.getParent()->getParent(),
6784           I.getOperand(0)->getType()->getPointerAddressSpace())) {
6785     return nullptr;
6786   }
6787   Instruction *Op;
6788   if (match(I.getOperand(0), m_Instruction(Op)) &&
6789       match(I.getOperand(1), m_Zero()) &&
6790       Op->isLaunderOrStripInvariantGroup()) {
6791     return ICmpInst::Create(Instruction::ICmp, I.getPredicate(),
6792                             Op->getOperand(0), I.getOperand(1));
6793   }
6794   return nullptr;
6795 }
6796 
6797 /// This function folds patterns produced by lowering of reduce idioms, such as
6798 /// llvm.vector.reduce.and which are lowered into instruction chains. This code
6799 /// attempts to generate fewer number of scalar comparisons instead of vector
6800 /// comparisons when possible.
6801 static Instruction *foldReductionIdiom(ICmpInst &I,
6802                                        InstCombiner::BuilderTy &Builder,
6803                                        const DataLayout &DL) {
6804   if (I.getType()->isVectorTy())
6805     return nullptr;
6806   ICmpInst::Predicate OuterPred, InnerPred;
6807   Value *LHS, *RHS;
6808 
6809   // Match lowering of @llvm.vector.reduce.and. Turn
6810   ///   %vec_ne = icmp ne <8 x i8> %lhs, %rhs
6811   ///   %scalar_ne = bitcast <8 x i1> %vec_ne to i8
6812   ///   %res = icmp <pred> i8 %scalar_ne, 0
6813   ///
6814   /// into
6815   ///
6816   ///   %lhs.scalar = bitcast <8 x i8> %lhs to i64
6817   ///   %rhs.scalar = bitcast <8 x i8> %rhs to i64
6818   ///   %res = icmp <pred> i64 %lhs.scalar, %rhs.scalar
6819   ///
6820   /// for <pred> in {ne, eq}.
6821   if (!match(&I, m_ICmp(OuterPred,
6822                         m_OneUse(m_BitCast(m_OneUse(
6823                             m_ICmp(InnerPred, m_Value(LHS), m_Value(RHS))))),
6824                         m_Zero())))
6825     return nullptr;
6826   auto *LHSTy = dyn_cast<FixedVectorType>(LHS->getType());
6827   if (!LHSTy || !LHSTy->getElementType()->isIntegerTy())
6828     return nullptr;
6829   unsigned NumBits =
6830       LHSTy->getNumElements() * LHSTy->getElementType()->getIntegerBitWidth();
6831   // TODO: Relax this to "not wider than max legal integer type"?
6832   if (!DL.isLegalInteger(NumBits))
6833     return nullptr;
6834 
6835   if (ICmpInst::isEquality(OuterPred) && InnerPred == ICmpInst::ICMP_NE) {
6836     auto *ScalarTy = Builder.getIntNTy(NumBits);
6837     LHS = Builder.CreateBitCast(LHS, ScalarTy, LHS->getName() + ".scalar");
6838     RHS = Builder.CreateBitCast(RHS, ScalarTy, RHS->getName() + ".scalar");
6839     return ICmpInst::Create(Instruction::ICmp, OuterPred, LHS, RHS,
6840                             I.getName());
6841   }
6842 
6843   return nullptr;
6844 }
6845 
6846 Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
6847   bool Changed = false;
6848   const SimplifyQuery Q = SQ.getWithInstruction(&I);
6849   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
6850   unsigned Op0Cplxity = getComplexity(Op0);
6851   unsigned Op1Cplxity = getComplexity(Op1);
6852 
6853   /// Orders the operands of the compare so that they are listed from most
6854   /// complex to least complex.  This puts constants before unary operators,
6855   /// before binary operators.
6856   if (Op0Cplxity < Op1Cplxity) {
6857     I.swapOperands();
6858     std::swap(Op0, Op1);
6859     Changed = true;
6860   }
6861 
6862   if (Value *V = simplifyICmpInst(I.getPredicate(), Op0, Op1, Q))
6863     return replaceInstUsesWith(I, V);
6864 
6865   // Comparing -val or val with non-zero is the same as just comparing val
6866   // ie, abs(val) != 0 -> val != 0
6867   if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) {
6868     Value *Cond, *SelectTrue, *SelectFalse;
6869     if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
6870                             m_Value(SelectFalse)))) {
6871       if (Value *V = dyn_castNegVal(SelectTrue)) {
6872         if (V == SelectFalse)
6873           return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
6874       }
6875       else if (Value *V = dyn_castNegVal(SelectFalse)) {
6876         if (V == SelectTrue)
6877           return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
6878       }
6879     }
6880   }
6881 
6882   if (Op0->getType()->isIntOrIntVectorTy(1))
6883     if (Instruction *Res = canonicalizeICmpBool(I, Builder))
6884       return Res;
6885 
6886   if (Instruction *Res = canonicalizeCmpWithConstant(I))
6887     return Res;
6888 
6889   if (Instruction *Res = canonicalizeICmpPredicate(I))
6890     return Res;
6891 
6892   if (Instruction *Res = foldICmpWithConstant(I))
6893     return Res;
6894 
6895   if (Instruction *Res = foldICmpWithDominatingICmp(I))
6896     return Res;
6897 
6898   if (Instruction *Res = foldICmpUsingBoolRange(I))
6899     return Res;
6900 
6901   if (Instruction *Res = foldICmpUsingKnownBits(I))
6902     return Res;
6903 
6904   if (Instruction *Res = foldICmpTruncWithTruncOrExt(I, Q))
6905     return Res;
6906 
6907   // Test if the ICmpInst instruction is used exclusively by a select as
6908   // part of a minimum or maximum operation. If so, refrain from doing
6909   // any other folding. This helps out other analyses which understand
6910   // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
6911   // and CodeGen. And in this case, at least one of the comparison
6912   // operands has at least one user besides the compare (the select),
6913   // which would often largely negate the benefit of folding anyway.
6914   //
6915   // Do the same for the other patterns recognized by matchSelectPattern.
6916   if (I.hasOneUse())
6917     if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
6918       Value *A, *B;
6919       SelectPatternResult SPR = matchSelectPattern(SI, A, B);
6920       if (SPR.Flavor != SPF_UNKNOWN)
6921         return nullptr;
6922     }
6923 
6924   // Do this after checking for min/max to prevent infinite looping.
6925   if (Instruction *Res = foldICmpWithZero(I))
6926     return Res;
6927 
6928   // FIXME: We only do this after checking for min/max to prevent infinite
6929   // looping caused by a reverse canonicalization of these patterns for min/max.
6930   // FIXME: The organization of folds is a mess. These would naturally go into
6931   // canonicalizeCmpWithConstant(), but we can't move all of the above folds
6932   // down here after the min/max restriction.
6933   ICmpInst::Predicate Pred = I.getPredicate();
6934   const APInt *C;
6935   if (match(Op1, m_APInt(C))) {
6936     // For i32: x >u 2147483647 -> x <s 0  -> true if sign bit set
6937     if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) {
6938       Constant *Zero = Constant::getNullValue(Op0->getType());
6939       return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero);
6940     }
6941 
6942     // For i32: x <u 2147483648 -> x >s -1  -> true if sign bit clear
6943     if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) {
6944       Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
6945       return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
6946     }
6947   }
6948 
6949   // The folds in here may rely on wrapping flags and special constants, so
6950   // they can break up min/max idioms in some cases but not seemingly similar
6951   // patterns.
6952   // FIXME: It may be possible to enhance select folding to make this
6953   //        unnecessary. It may also be moot if we canonicalize to min/max
6954   //        intrinsics.
6955   if (Instruction *Res = foldICmpBinOp(I, Q))
6956     return Res;
6957 
6958   if (Instruction *Res = foldICmpInstWithConstant(I))
6959     return Res;
6960 
6961   // Try to match comparison as a sign bit test. Intentionally do this after
6962   // foldICmpInstWithConstant() to potentially let other folds to happen first.
6963   if (Instruction *New = foldSignBitTest(I))
6964     return New;
6965 
6966   if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
6967     return Res;
6968 
6969   // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'.
6970   if (auto *GEP = dyn_cast<GEPOperator>(Op0))
6971     if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I))
6972       return NI;
6973   if (auto *GEP = dyn_cast<GEPOperator>(Op1))
6974     if (Instruction *NI = foldGEPICmp(GEP, Op0, I.getSwappedPredicate(), I))
6975       return NI;
6976 
6977   if (auto *SI = dyn_cast<SelectInst>(Op0))
6978     if (Instruction *NI = foldSelectICmp(I.getPredicate(), SI, Op1, I))
6979       return NI;
6980   if (auto *SI = dyn_cast<SelectInst>(Op1))
6981     if (Instruction *NI = foldSelectICmp(I.getSwappedPredicate(), SI, Op0, I))
6982       return NI;
6983 
6984   // In case of a comparison with two select instructions having the same
6985   // condition, check whether one of the resulting branches can be simplified.
6986   // If so, just compare the other branch and select the appropriate result.
6987   // For example:
6988   //   %tmp1 = select i1 %cmp, i32 %y, i32 %x
6989   //   %tmp2 = select i1 %cmp, i32 %z, i32 %x
6990   //   %cmp2 = icmp slt i32 %tmp2, %tmp1
6991   // The icmp will result false for the false value of selects and the result
6992   // will depend upon the comparison of true values of selects if %cmp is
6993   // true. Thus, transform this into:
6994   //   %cmp = icmp slt i32 %y, %z
6995   //   %sel = select i1 %cond, i1 %cmp, i1 false
6996   // This handles similar cases to transform.
6997   {
6998     Value *Cond, *A, *B, *C, *D;
6999     if (match(Op0, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
7000         match(Op1, m_Select(m_Specific(Cond), m_Value(C), m_Value(D))) &&
7001         (Op0->hasOneUse() || Op1->hasOneUse())) {
7002       // Check whether comparison of TrueValues can be simplified
7003       if (Value *Res = simplifyICmpInst(Pred, A, C, SQ)) {
7004         Value *NewICMP = Builder.CreateICmp(Pred, B, D);
7005         return SelectInst::Create(Cond, Res, NewICMP);
7006       }
7007       // Check whether comparison of FalseValues can be simplified
7008       if (Value *Res = simplifyICmpInst(Pred, B, D, SQ)) {
7009         Value *NewICMP = Builder.CreateICmp(Pred, A, C);
7010         return SelectInst::Create(Cond, NewICMP, Res);
7011       }
7012     }
7013   }
7014 
7015   // Try to optimize equality comparisons against alloca-based pointers.
7016   if (Op0->getType()->isPointerTy() && I.isEquality()) {
7017     assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
7018     if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op0)))
7019       if (foldAllocaCmp(Alloca))
7020         return nullptr;
7021     if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op1)))
7022       if (foldAllocaCmp(Alloca))
7023         return nullptr;
7024   }
7025 
7026   if (Instruction *Res = foldICmpBitCast(I))
7027     return Res;
7028 
7029   // TODO: Hoist this above the min/max bailout.
7030   if (Instruction *R = foldICmpWithCastOp(I))
7031     return R;
7032 
7033   if (Instruction *Res = foldICmpWithMinMax(I))
7034     return Res;
7035 
7036   {
7037     Value *X, *Y;
7038     // Transform (X & ~Y) == 0 --> (X & Y) != 0
7039     // and       (X & ~Y) != 0 --> (X & Y) == 0
7040     // if A is a power of 2.
7041     if (match(Op0, m_And(m_Value(X), m_Not(m_Value(Y)))) &&
7042         match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(X, false, 0, &I) &&
7043         I.isEquality())
7044       return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(X, Y),
7045                           Op1);
7046 
7047     // Op0 pred Op1 -> ~Op1 pred ~Op0, if this allows us to drop an instruction.
7048     if (Op0->getType()->isIntOrIntVectorTy()) {
7049       bool ConsumesOp0, ConsumesOp1;
7050       if (isFreeToInvert(Op0, Op0->hasOneUse(), ConsumesOp0) &&
7051           isFreeToInvert(Op1, Op1->hasOneUse(), ConsumesOp1) &&
7052           (ConsumesOp0 || ConsumesOp1)) {
7053         Value *InvOp0 = getFreelyInverted(Op0, Op0->hasOneUse(), &Builder);
7054         Value *InvOp1 = getFreelyInverted(Op1, Op1->hasOneUse(), &Builder);
7055         assert(InvOp0 && InvOp1 &&
7056                "Mismatch between isFreeToInvert and getFreelyInverted");
7057         return new ICmpInst(I.getSwappedPredicate(), InvOp0, InvOp1);
7058       }
7059     }
7060 
7061     Instruction *AddI = nullptr;
7062     if (match(&I, m_UAddWithOverflow(m_Value(X), m_Value(Y),
7063                                      m_Instruction(AddI))) &&
7064         isa<IntegerType>(X->getType())) {
7065       Value *Result;
7066       Constant *Overflow;
7067       // m_UAddWithOverflow can match patterns that do not include  an explicit
7068       // "add" instruction, so check the opcode of the matched op.
7069       if (AddI->getOpcode() == Instruction::Add &&
7070           OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, X, Y, *AddI,
7071                                 Result, Overflow)) {
7072         replaceInstUsesWith(*AddI, Result);
7073         eraseInstFromFunction(*AddI);
7074         return replaceInstUsesWith(I, Overflow);
7075       }
7076     }
7077 
7078     // (zext X) * (zext Y)  --> llvm.umul.with.overflow.
7079     if (match(Op0, m_NUWMul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
7080         match(Op1, m_APInt(C))) {
7081       if (Instruction *R = processUMulZExtIdiom(I, Op0, C, *this))
7082         return R;
7083     }
7084 
7085     // Signbit test folds
7086     // Fold (X u>> BitWidth - 1 Pred ZExt(i1))  -->  X s< 0 Pred i1
7087     // Fold (X s>> BitWidth - 1 Pred SExt(i1))  -->  X s< 0 Pred i1
7088     Instruction *ExtI;
7089     if ((I.isUnsigned() || I.isEquality()) &&
7090         match(Op1,
7091               m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(Y)))) &&
7092         Y->getType()->getScalarSizeInBits() == 1 &&
7093         (Op0->hasOneUse() || Op1->hasOneUse())) {
7094       unsigned OpWidth = Op0->getType()->getScalarSizeInBits();
7095       Instruction *ShiftI;
7096       if (match(Op0, m_CombineAnd(m_Instruction(ShiftI),
7097                                   m_Shr(m_Value(X), m_SpecificIntAllowUndef(
7098                                                         OpWidth - 1))))) {
7099         unsigned ExtOpc = ExtI->getOpcode();
7100         unsigned ShiftOpc = ShiftI->getOpcode();
7101         if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) ||
7102             (ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) {
7103           Value *SLTZero =
7104               Builder.CreateICmpSLT(X, Constant::getNullValue(X->getType()));
7105           Value *Cmp = Builder.CreateICmp(Pred, SLTZero, Y, I.getName());
7106           return replaceInstUsesWith(I, Cmp);
7107         }
7108       }
7109     }
7110   }
7111 
7112   if (Instruction *Res = foldICmpEquality(I))
7113     return Res;
7114 
7115   if (Instruction *Res = foldICmpPow2Test(I, Builder))
7116     return Res;
7117 
7118   if (Instruction *Res = foldICmpOfUAddOv(I))
7119     return Res;
7120 
7121   // The 'cmpxchg' instruction returns an aggregate containing the old value and
7122   // an i1 which indicates whether or not we successfully did the swap.
7123   //
7124   // Replace comparisons between the old value and the expected value with the
7125   // indicator that 'cmpxchg' returns.
7126   //
7127   // N.B.  This transform is only valid when the 'cmpxchg' is not permitted to
7128   // spuriously fail.  In those cases, the old value may equal the expected
7129   // value but it is possible for the swap to not occur.
7130   if (I.getPredicate() == ICmpInst::ICMP_EQ)
7131     if (auto *EVI = dyn_cast<ExtractValueInst>(Op0))
7132       if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand()))
7133         if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 &&
7134             !ACXI->isWeak())
7135           return ExtractValueInst::Create(ACXI, 1);
7136 
7137   {
7138     Value *X;
7139     const APInt *C;
7140     // icmp X+Cst, X
7141     if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
7142       return foldICmpAddOpConst(X, *C, I.getPredicate());
7143 
7144     // icmp X, X+Cst
7145     if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X)
7146       return foldICmpAddOpConst(X, *C, I.getSwappedPredicate());
7147   }
7148 
7149   if (Instruction *Res = foldICmpWithHighBitMask(I, Builder))
7150     return Res;
7151 
7152   if (I.getType()->isVectorTy())
7153     if (Instruction *Res = foldVectorCmp(I, Builder))
7154       return Res;
7155 
7156   if (Instruction *Res = foldICmpInvariantGroup(I))
7157     return Res;
7158 
7159   if (Instruction *Res = foldReductionIdiom(I, Builder, DL))
7160     return Res;
7161 
7162   return Changed ? &I : nullptr;
7163 }
7164 
7165 /// Fold fcmp ([us]itofp x, cst) if possible.
7166 Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I,
7167                                                     Instruction *LHSI,
7168                                                     Constant *RHSC) {
7169   if (!isa<ConstantFP>(RHSC)) return nullptr;
7170   const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
7171 
7172   // Get the width of the mantissa.  We don't want to hack on conversions that
7173   // might lose information from the integer, e.g. "i64 -> float"
7174   int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
7175   if (MantissaWidth == -1) return nullptr;  // Unknown.
7176 
7177   IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
7178 
7179   bool LHSUnsigned = isa<UIToFPInst>(LHSI);
7180 
7181   if (I.isEquality()) {
7182     FCmpInst::Predicate P = I.getPredicate();
7183     bool IsExact = false;
7184     APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned);
7185     RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact);
7186 
7187     // If the floating point constant isn't an integer value, we know if we will
7188     // ever compare equal / not equal to it.
7189     if (!IsExact) {
7190       // TODO: Can never be -0.0 and other non-representable values
7191       APFloat RHSRoundInt(RHS);
7192       RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven);
7193       if (RHS != RHSRoundInt) {
7194         if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ)
7195           return replaceInstUsesWith(I, Builder.getFalse());
7196 
7197         assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE);
7198         return replaceInstUsesWith(I, Builder.getTrue());
7199       }
7200     }
7201 
7202     // TODO: If the constant is exactly representable, is it always OK to do
7203     // equality compares as integer?
7204   }
7205 
7206   // Check to see that the input is converted from an integer type that is small
7207   // enough that preserves all bits.  TODO: check here for "known" sign bits.
7208   // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
7209   unsigned InputSize = IntTy->getScalarSizeInBits();
7210 
7211   // Following test does NOT adjust InputSize downwards for signed inputs,
7212   // because the most negative value still requires all the mantissa bits
7213   // to distinguish it from one less than that value.
7214   if ((int)InputSize > MantissaWidth) {
7215     // Conversion would lose accuracy. Check if loss can impact comparison.
7216     int Exp = ilogb(RHS);
7217     if (Exp == APFloat::IEK_Inf) {
7218       int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
7219       if (MaxExponent < (int)InputSize - !LHSUnsigned)
7220         // Conversion could create infinity.
7221         return nullptr;
7222     } else {
7223       // Note that if RHS is zero or NaN, then Exp is negative
7224       // and first condition is trivially false.
7225       if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned)
7226         // Conversion could affect comparison.
7227         return nullptr;
7228     }
7229   }
7230 
7231   // Otherwise, we can potentially simplify the comparison.  We know that it
7232   // will always come through as an integer value and we know the constant is
7233   // not a NAN (it would have been previously simplified).
7234   assert(!RHS.isNaN() && "NaN comparison not already folded!");
7235 
7236   ICmpInst::Predicate Pred;
7237   switch (I.getPredicate()) {
7238   default: llvm_unreachable("Unexpected predicate!");
7239   case FCmpInst::FCMP_UEQ:
7240   case FCmpInst::FCMP_OEQ:
7241     Pred = ICmpInst::ICMP_EQ;
7242     break;
7243   case FCmpInst::FCMP_UGT:
7244   case FCmpInst::FCMP_OGT:
7245     Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
7246     break;
7247   case FCmpInst::FCMP_UGE:
7248   case FCmpInst::FCMP_OGE:
7249     Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
7250     break;
7251   case FCmpInst::FCMP_ULT:
7252   case FCmpInst::FCMP_OLT:
7253     Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
7254     break;
7255   case FCmpInst::FCMP_ULE:
7256   case FCmpInst::FCMP_OLE:
7257     Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
7258     break;
7259   case FCmpInst::FCMP_UNE:
7260   case FCmpInst::FCMP_ONE:
7261     Pred = ICmpInst::ICMP_NE;
7262     break;
7263   case FCmpInst::FCMP_ORD:
7264     return replaceInstUsesWith(I, Builder.getTrue());
7265   case FCmpInst::FCMP_UNO:
7266     return replaceInstUsesWith(I, Builder.getFalse());
7267   }
7268 
7269   // Now we know that the APFloat is a normal number, zero or inf.
7270 
7271   // See if the FP constant is too large for the integer.  For example,
7272   // comparing an i8 to 300.0.
7273   unsigned IntWidth = IntTy->getScalarSizeInBits();
7274 
7275   if (!LHSUnsigned) {
7276     // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
7277     // and large values.
7278     APFloat SMax(RHS.getSemantics());
7279     SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
7280                           APFloat::rmNearestTiesToEven);
7281     if (SMax < RHS) { // smax < 13123.0
7282       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
7283           Pred == ICmpInst::ICMP_SLE)
7284         return replaceInstUsesWith(I, Builder.getTrue());
7285       return replaceInstUsesWith(I, Builder.getFalse());
7286     }
7287   } else {
7288     // If the RHS value is > UnsignedMax, fold the comparison. This handles
7289     // +INF and large values.
7290     APFloat UMax(RHS.getSemantics());
7291     UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
7292                           APFloat::rmNearestTiesToEven);
7293     if (UMax < RHS) { // umax < 13123.0
7294       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
7295           Pred == ICmpInst::ICMP_ULE)
7296         return replaceInstUsesWith(I, Builder.getTrue());
7297       return replaceInstUsesWith(I, Builder.getFalse());
7298     }
7299   }
7300 
7301   if (!LHSUnsigned) {
7302     // See if the RHS value is < SignedMin.
7303     APFloat SMin(RHS.getSemantics());
7304     SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
7305                           APFloat::rmNearestTiesToEven);
7306     if (SMin > RHS) { // smin > 12312.0
7307       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
7308           Pred == ICmpInst::ICMP_SGE)
7309         return replaceInstUsesWith(I, Builder.getTrue());
7310       return replaceInstUsesWith(I, Builder.getFalse());
7311     }
7312   } else {
7313     // See if the RHS value is < UnsignedMin.
7314     APFloat UMin(RHS.getSemantics());
7315     UMin.convertFromAPInt(APInt::getMinValue(IntWidth), false,
7316                           APFloat::rmNearestTiesToEven);
7317     if (UMin > RHS) { // umin > 12312.0
7318       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
7319           Pred == ICmpInst::ICMP_UGE)
7320         return replaceInstUsesWith(I, Builder.getTrue());
7321       return replaceInstUsesWith(I, Builder.getFalse());
7322     }
7323   }
7324 
7325   // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
7326   // [0, UMAX], but it may still be fractional. Check whether this is the case
7327   // using the IsExact flag.
7328   // Don't do this for zero, because -0.0 is not fractional.
7329   APSInt RHSInt(IntWidth, LHSUnsigned);
7330   bool IsExact;
7331   RHS.convertToInteger(RHSInt, APFloat::rmTowardZero, &IsExact);
7332   if (!RHS.isZero()) {
7333     if (!IsExact) {
7334       // If we had a comparison against a fractional value, we have to adjust
7335       // the compare predicate and sometimes the value.  RHSC is rounded towards
7336       // zero at this point.
7337       switch (Pred) {
7338       default: llvm_unreachable("Unexpected integer comparison!");
7339       case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
7340         return replaceInstUsesWith(I, Builder.getTrue());
7341       case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
7342         return replaceInstUsesWith(I, Builder.getFalse());
7343       case ICmpInst::ICMP_ULE:
7344         // (float)int <= 4.4   --> int <= 4
7345         // (float)int <= -4.4  --> false
7346         if (RHS.isNegative())
7347           return replaceInstUsesWith(I, Builder.getFalse());
7348         break;
7349       case ICmpInst::ICMP_SLE:
7350         // (float)int <= 4.4   --> int <= 4
7351         // (float)int <= -4.4  --> int < -4
7352         if (RHS.isNegative())
7353           Pred = ICmpInst::ICMP_SLT;
7354         break;
7355       case ICmpInst::ICMP_ULT:
7356         // (float)int < -4.4   --> false
7357         // (float)int < 4.4    --> int <= 4
7358         if (RHS.isNegative())
7359           return replaceInstUsesWith(I, Builder.getFalse());
7360         Pred = ICmpInst::ICMP_ULE;
7361         break;
7362       case ICmpInst::ICMP_SLT:
7363         // (float)int < -4.4   --> int < -4
7364         // (float)int < 4.4    --> int <= 4
7365         if (!RHS.isNegative())
7366           Pred = ICmpInst::ICMP_SLE;
7367         break;
7368       case ICmpInst::ICMP_UGT:
7369         // (float)int > 4.4    --> int > 4
7370         // (float)int > -4.4   --> true
7371         if (RHS.isNegative())
7372           return replaceInstUsesWith(I, Builder.getTrue());
7373         break;
7374       case ICmpInst::ICMP_SGT:
7375         // (float)int > 4.4    --> int > 4
7376         // (float)int > -4.4   --> int >= -4
7377         if (RHS.isNegative())
7378           Pred = ICmpInst::ICMP_SGE;
7379         break;
7380       case ICmpInst::ICMP_UGE:
7381         // (float)int >= -4.4   --> true
7382         // (float)int >= 4.4    --> int > 4
7383         if (RHS.isNegative())
7384           return replaceInstUsesWith(I, Builder.getTrue());
7385         Pred = ICmpInst::ICMP_UGT;
7386         break;
7387       case ICmpInst::ICMP_SGE:
7388         // (float)int >= -4.4   --> int >= -4
7389         // (float)int >= 4.4    --> int > 4
7390         if (!RHS.isNegative())
7391           Pred = ICmpInst::ICMP_SGT;
7392         break;
7393       }
7394     }
7395   }
7396 
7397   // Lower this FP comparison into an appropriate integer version of the
7398   // comparison.
7399   return new ICmpInst(Pred, LHSI->getOperand(0), Builder.getInt(RHSInt));
7400 }
7401 
7402 /// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary.
7403 static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
7404                                               Constant *RHSC) {
7405   // When C is not 0.0 and infinities are not allowed:
7406   // (C / X) < 0.0 is a sign-bit test of X
7407   // (C / X) < 0.0 --> X < 0.0 (if C is positive)
7408   // (C / X) < 0.0 --> X > 0.0 (if C is negative, swap the predicate)
7409   //
7410   // Proof:
7411   // Multiply (C / X) < 0.0 by X * X / C.
7412   // - X is non zero, if it is the flag 'ninf' is violated.
7413   // - C defines the sign of X * X * C. Thus it also defines whether to swap
7414   //   the predicate. C is also non zero by definition.
7415   //
7416   // Thus X * X / C is non zero and the transformation is valid. [qed]
7417 
7418   FCmpInst::Predicate Pred = I.getPredicate();
7419 
7420   // Check that predicates are valid.
7421   if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) &&
7422       (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
7423     return nullptr;
7424 
7425   // Check that RHS operand is zero.
7426   if (!match(RHSC, m_AnyZeroFP()))
7427     return nullptr;
7428 
7429   // Check fastmath flags ('ninf').
7430   if (!LHSI->hasNoInfs() || !I.hasNoInfs())
7431     return nullptr;
7432 
7433   // Check the properties of the dividend. It must not be zero to avoid a
7434   // division by zero (see Proof).
7435   const APFloat *C;
7436   if (!match(LHSI->getOperand(0), m_APFloat(C)))
7437     return nullptr;
7438 
7439   if (C->isZero())
7440     return nullptr;
7441 
7442   // Get swapped predicate if necessary.
7443   if (C->isNegative())
7444     Pred = I.getSwappedPredicate();
7445 
7446   return new FCmpInst(Pred, LHSI->getOperand(1), RHSC, "", &I);
7447 }
7448 
7449 /// Optimize fabs(X) compared with zero.
7450 static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) {
7451   Value *X;
7452   if (!match(I.getOperand(0), m_FAbs(m_Value(X))))
7453     return nullptr;
7454 
7455   const APFloat *C;
7456   if (!match(I.getOperand(1), m_APFloat(C)))
7457     return nullptr;
7458 
7459   if (!C->isPosZero()) {
7460     if (!C->isSmallestNormalized())
7461       return nullptr;
7462 
7463     const Function *F = I.getFunction();
7464     DenormalMode Mode = F->getDenormalMode(C->getSemantics());
7465     if (Mode.Input == DenormalMode::PreserveSign ||
7466         Mode.Input == DenormalMode::PositiveZero) {
7467 
7468       auto replaceFCmp = [](FCmpInst *I, FCmpInst::Predicate P, Value *X) {
7469         Constant *Zero = ConstantFP::getZero(X->getType());
7470         return new FCmpInst(P, X, Zero, "", I);
7471       };
7472 
7473       switch (I.getPredicate()) {
7474       case FCmpInst::FCMP_OLT:
7475         // fcmp olt fabs(x), smallest_normalized_number -> fcmp oeq x, 0.0
7476         return replaceFCmp(&I, FCmpInst::FCMP_OEQ, X);
7477       case FCmpInst::FCMP_UGE:
7478         // fcmp uge fabs(x), smallest_normalized_number -> fcmp une x, 0.0
7479         return replaceFCmp(&I, FCmpInst::FCMP_UNE, X);
7480       case FCmpInst::FCMP_OGE:
7481         // fcmp oge fabs(x), smallest_normalized_number -> fcmp one x, 0.0
7482         return replaceFCmp(&I, FCmpInst::FCMP_ONE, X);
7483       case FCmpInst::FCMP_ULT:
7484         // fcmp ult fabs(x), smallest_normalized_number -> fcmp ueq x, 0.0
7485         return replaceFCmp(&I, FCmpInst::FCMP_UEQ, X);
7486       default:
7487         break;
7488       }
7489     }
7490 
7491     return nullptr;
7492   }
7493 
7494   auto replacePredAndOp0 = [&IC](FCmpInst *I, FCmpInst::Predicate P, Value *X) {
7495     I->setPredicate(P);
7496     return IC.replaceOperand(*I, 0, X);
7497   };
7498 
7499   switch (I.getPredicate()) {
7500   case FCmpInst::FCMP_UGE:
7501   case FCmpInst::FCMP_OLT:
7502     // fabs(X) >= 0.0 --> true
7503     // fabs(X) <  0.0 --> false
7504     llvm_unreachable("fcmp should have simplified");
7505 
7506   case FCmpInst::FCMP_OGT:
7507     // fabs(X) > 0.0 --> X != 0.0
7508     return replacePredAndOp0(&I, FCmpInst::FCMP_ONE, X);
7509 
7510   case FCmpInst::FCMP_UGT:
7511     // fabs(X) u> 0.0 --> X u!= 0.0
7512     return replacePredAndOp0(&I, FCmpInst::FCMP_UNE, X);
7513 
7514   case FCmpInst::FCMP_OLE:
7515     // fabs(X) <= 0.0 --> X == 0.0
7516     return replacePredAndOp0(&I, FCmpInst::FCMP_OEQ, X);
7517 
7518   case FCmpInst::FCMP_ULE:
7519     // fabs(X) u<= 0.0 --> X u== 0.0
7520     return replacePredAndOp0(&I, FCmpInst::FCMP_UEQ, X);
7521 
7522   case FCmpInst::FCMP_OGE:
7523     // fabs(X) >= 0.0 --> !isnan(X)
7524     assert(!I.hasNoNaNs() && "fcmp should have simplified");
7525     return replacePredAndOp0(&I, FCmpInst::FCMP_ORD, X);
7526 
7527   case FCmpInst::FCMP_ULT:
7528     // fabs(X) u< 0.0 --> isnan(X)
7529     assert(!I.hasNoNaNs() && "fcmp should have simplified");
7530     return replacePredAndOp0(&I, FCmpInst::FCMP_UNO, X);
7531 
7532   case FCmpInst::FCMP_OEQ:
7533   case FCmpInst::FCMP_UEQ:
7534   case FCmpInst::FCMP_ONE:
7535   case FCmpInst::FCMP_UNE:
7536   case FCmpInst::FCMP_ORD:
7537   case FCmpInst::FCMP_UNO:
7538     // Look through the fabs() because it doesn't change anything but the sign.
7539     // fabs(X) == 0.0 --> X == 0.0,
7540     // fabs(X) != 0.0 --> X != 0.0
7541     // isnan(fabs(X)) --> isnan(X)
7542     // !isnan(fabs(X) --> !isnan(X)
7543     return replacePredAndOp0(&I, I.getPredicate(), X);
7544 
7545   default:
7546     return nullptr;
7547   }
7548 }
7549 
7550 static Instruction *foldFCmpFNegCommonOp(FCmpInst &I) {
7551   CmpInst::Predicate Pred = I.getPredicate();
7552   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
7553 
7554   // Canonicalize fneg as Op1.
7555   if (match(Op0, m_FNeg(m_Value())) && !match(Op1, m_FNeg(m_Value()))) {
7556     std::swap(Op0, Op1);
7557     Pred = I.getSwappedPredicate();
7558   }
7559 
7560   if (!match(Op1, m_FNeg(m_Specific(Op0))))
7561     return nullptr;
7562 
7563   // Replace the negated operand with 0.0:
7564   // fcmp Pred Op0, -Op0 --> fcmp Pred Op0, 0.0
7565   Constant *Zero = ConstantFP::getZero(Op0->getType());
7566   return new FCmpInst(Pred, Op0, Zero, "", &I);
7567 }
7568 
7569 Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
7570   bool Changed = false;
7571 
7572   /// Orders the operands of the compare so that they are listed from most
7573   /// complex to least complex.  This puts constants before unary operators,
7574   /// before binary operators.
7575   if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
7576     I.swapOperands();
7577     Changed = true;
7578   }
7579 
7580   const CmpInst::Predicate Pred = I.getPredicate();
7581   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
7582   if (Value *V = simplifyFCmpInst(Pred, Op0, Op1, I.getFastMathFlags(),
7583                                   SQ.getWithInstruction(&I)))
7584     return replaceInstUsesWith(I, V);
7585 
7586   // Simplify 'fcmp pred X, X'
7587   Type *OpType = Op0->getType();
7588   assert(OpType == Op1->getType() && "fcmp with different-typed operands?");
7589   if (Op0 == Op1) {
7590     switch (Pred) {
7591       default: break;
7592     case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
7593     case FCmpInst::FCMP_ULT:    // True if unordered or less than
7594     case FCmpInst::FCMP_UGT:    // True if unordered or greater than
7595     case FCmpInst::FCMP_UNE:    // True if unordered or not equal
7596       // Canonicalize these to be 'fcmp uno %X, 0.0'.
7597       I.setPredicate(FCmpInst::FCMP_UNO);
7598       I.setOperand(1, Constant::getNullValue(OpType));
7599       return &I;
7600 
7601     case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
7602     case FCmpInst::FCMP_OEQ:    // True if ordered and equal
7603     case FCmpInst::FCMP_OGE:    // True if ordered and greater than or equal
7604     case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
7605       // Canonicalize these to be 'fcmp ord %X, 0.0'.
7606       I.setPredicate(FCmpInst::FCMP_ORD);
7607       I.setOperand(1, Constant::getNullValue(OpType));
7608       return &I;
7609     }
7610   }
7611 
7612   // If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand,
7613   // then canonicalize the operand to 0.0.
7614   if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
7615     if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, DL, &TLI, 0,
7616                                                       &AC, &I, &DT))
7617       return replaceOperand(I, 0, ConstantFP::getZero(OpType));
7618 
7619     if (!match(Op1, m_PosZeroFP()) &&
7620         isKnownNeverNaN(Op1, DL, &TLI, 0, &AC, &I, &DT))
7621       return replaceOperand(I, 1, ConstantFP::getZero(OpType));
7622   }
7623 
7624   // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
7625   Value *X, *Y;
7626   if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
7627     return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
7628 
7629   if (Instruction *R = foldFCmpFNegCommonOp(I))
7630     return R;
7631 
7632   // Test if the FCmpInst instruction is used exclusively by a select as
7633   // part of a minimum or maximum operation. If so, refrain from doing
7634   // any other folding. This helps out other analyses which understand
7635   // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
7636   // and CodeGen. And in this case, at least one of the comparison
7637   // operands has at least one user besides the compare (the select),
7638   // which would often largely negate the benefit of folding anyway.
7639   if (I.hasOneUse())
7640     if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
7641       Value *A, *B;
7642       SelectPatternResult SPR = matchSelectPattern(SI, A, B);
7643       if (SPR.Flavor != SPF_UNKNOWN)
7644         return nullptr;
7645     }
7646 
7647   // The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0:
7648   // fcmp Pred X, -0.0 --> fcmp Pred X, 0.0
7649   if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP()))
7650     return replaceOperand(I, 1, ConstantFP::getZero(OpType));
7651 
7652   // Ignore signbit of bitcasted int when comparing equality to FP 0.0:
7653   // fcmp oeq/une (bitcast X), 0.0 --> (and X, SignMaskC) ==/!= 0
7654   if (match(Op1, m_PosZeroFP()) &&
7655       match(Op0, m_OneUse(m_BitCast(m_Value(X)))) &&
7656       X->getType()->isVectorTy() == OpType->isVectorTy() &&
7657       X->getType()->getScalarSizeInBits() == OpType->getScalarSizeInBits()) {
7658     ICmpInst::Predicate IntPred = ICmpInst::BAD_ICMP_PREDICATE;
7659     if (Pred == FCmpInst::FCMP_OEQ)
7660       IntPred = ICmpInst::ICMP_EQ;
7661     else if (Pred == FCmpInst::FCMP_UNE)
7662       IntPred = ICmpInst::ICMP_NE;
7663 
7664     if (IntPred != ICmpInst::BAD_ICMP_PREDICATE) {
7665       Type *IntTy = X->getType();
7666       const APInt &SignMask = ~APInt::getSignMask(IntTy->getScalarSizeInBits());
7667       Value *MaskX = Builder.CreateAnd(X, ConstantInt::get(IntTy, SignMask));
7668       return new ICmpInst(IntPred, MaskX, ConstantInt::getNullValue(IntTy));
7669     }
7670   }
7671 
7672   // Handle fcmp with instruction LHS and constant RHS.
7673   Instruction *LHSI;
7674   Constant *RHSC;
7675   if (match(Op0, m_Instruction(LHSI)) && match(Op1, m_Constant(RHSC))) {
7676     switch (LHSI->getOpcode()) {
7677     case Instruction::PHI:
7678       if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
7679         return NV;
7680       break;
7681     case Instruction::SIToFP:
7682     case Instruction::UIToFP:
7683       if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
7684         return NV;
7685       break;
7686     case Instruction::FDiv:
7687       if (Instruction *NV = foldFCmpReciprocalAndZero(I, LHSI, RHSC))
7688         return NV;
7689       break;
7690     case Instruction::Load:
7691       if (auto *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
7692         if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
7693           if (Instruction *Res = foldCmpLoadFromIndexedGlobal(
7694                   cast<LoadInst>(LHSI), GEP, GV, I))
7695             return Res;
7696       break;
7697   }
7698   }
7699 
7700   if (Instruction *R = foldFabsWithFcmpZero(I, *this))
7701     return R;
7702 
7703   if (match(Op0, m_FNeg(m_Value(X)))) {
7704     // fcmp pred (fneg X), C --> fcmp swap(pred) X, -C
7705     Constant *C;
7706     if (match(Op1, m_Constant(C)))
7707       if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
7708         return new FCmpInst(I.getSwappedPredicate(), X, NegC, "", &I);
7709   }
7710 
7711   if (match(Op0, m_FPExt(m_Value(X)))) {
7712     // fcmp (fpext X), (fpext Y) -> fcmp X, Y
7713     if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType())
7714       return new FCmpInst(Pred, X, Y, "", &I);
7715 
7716     const APFloat *C;
7717     if (match(Op1, m_APFloat(C))) {
7718       const fltSemantics &FPSem =
7719           X->getType()->getScalarType()->getFltSemantics();
7720       bool Lossy;
7721       APFloat TruncC = *C;
7722       TruncC.convert(FPSem, APFloat::rmNearestTiesToEven, &Lossy);
7723 
7724       if (Lossy) {
7725         // X can't possibly equal the higher-precision constant, so reduce any
7726         // equality comparison.
7727         // TODO: Other predicates can be handled via getFCmpCode().
7728         switch (Pred) {
7729         case FCmpInst::FCMP_OEQ:
7730           // X is ordered and equal to an impossible constant --> false
7731           return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
7732         case FCmpInst::FCMP_ONE:
7733           // X is ordered and not equal to an impossible constant --> ordered
7734           return new FCmpInst(FCmpInst::FCMP_ORD, X,
7735                               ConstantFP::getZero(X->getType()));
7736         case FCmpInst::FCMP_UEQ:
7737           // X is unordered or equal to an impossible constant --> unordered
7738           return new FCmpInst(FCmpInst::FCMP_UNO, X,
7739                               ConstantFP::getZero(X->getType()));
7740         case FCmpInst::FCMP_UNE:
7741           // X is unordered or not equal to an impossible constant --> true
7742           return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
7743         default:
7744           break;
7745         }
7746       }
7747 
7748       // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless
7749       // Avoid lossy conversions and denormals.
7750       // Zero is a special case that's OK to convert.
7751       APFloat Fabs = TruncC;
7752       Fabs.clearSign();
7753       if (!Lossy &&
7754           (Fabs.isZero() || !(Fabs < APFloat::getSmallestNormalized(FPSem)))) {
7755         Constant *NewC = ConstantFP::get(X->getType(), TruncC);
7756         return new FCmpInst(Pred, X, NewC, "", &I);
7757       }
7758     }
7759   }
7760 
7761   // Convert a sign-bit test of an FP value into a cast and integer compare.
7762   // TODO: Simplify if the copysign constant is 0.0 or NaN.
7763   // TODO: Handle non-zero compare constants.
7764   // TODO: Handle other predicates.
7765   const APFloat *C;
7766   if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::copysign>(m_APFloat(C),
7767                                                            m_Value(X)))) &&
7768       match(Op1, m_AnyZeroFP()) && !C->isZero() && !C->isNaN()) {
7769     Type *IntType = Builder.getIntNTy(X->getType()->getScalarSizeInBits());
7770     if (auto *VecTy = dyn_cast<VectorType>(OpType))
7771       IntType = VectorType::get(IntType, VecTy->getElementCount());
7772 
7773     // copysign(non-zero constant, X) < 0.0 --> (bitcast X) < 0
7774     if (Pred == FCmpInst::FCMP_OLT) {
7775       Value *IntX = Builder.CreateBitCast(X, IntType);
7776       return new ICmpInst(ICmpInst::ICMP_SLT, IntX,
7777                           ConstantInt::getNullValue(IntType));
7778     }
7779   }
7780 
7781   {
7782     Value *CanonLHS = nullptr, *CanonRHS = nullptr;
7783     match(Op0, m_Intrinsic<Intrinsic::canonicalize>(m_Value(CanonLHS)));
7784     match(Op1, m_Intrinsic<Intrinsic::canonicalize>(m_Value(CanonRHS)));
7785 
7786     // (canonicalize(x) == x) => (x == x)
7787     if (CanonLHS == Op1)
7788       return new FCmpInst(Pred, Op1, Op1, "", &I);
7789 
7790     // (x == canonicalize(x)) => (x == x)
7791     if (CanonRHS == Op0)
7792       return new FCmpInst(Pred, Op0, Op0, "", &I);
7793 
7794     // (canonicalize(x) == canonicalize(y)) => (x == y)
7795     if (CanonLHS && CanonRHS)
7796       return new FCmpInst(Pred, CanonLHS, CanonRHS, "", &I);
7797   }
7798 
7799   if (I.getType()->isVectorTy())
7800     if (Instruction *Res = foldVectorCmp(I, Builder))
7801       return Res;
7802 
7803   return Changed ? &I : nullptr;
7804 }
7805