xref: /llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h (revision 03e7862962d01a5605f1eeeb26626083584945ff)
1 //===- InstCombiner.h - InstCombine implementation --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file provides the interface for the instcombine pass implementation.
11 /// The interface is used for generic transformations in this folder and
12 /// target specific combinations in the targets.
13 /// The visitor implementation is in \c InstCombinerImpl in
14 /// \c InstCombineInternal.h.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #ifndef LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINER_H
19 #define LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINER_H
20 
21 #include "llvm/ADT/PostOrderIterator.h"
22 #include "llvm/Analysis/DomConditionCache.h"
23 #include "llvm/Analysis/InstructionSimplify.h"
24 #include "llvm/Analysis/TargetFolder.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/PatternMatch.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/KnownBits.h"
30 #include <cassert>
31 
32 #define DEBUG_TYPE "instcombine"
33 #include "llvm/Transforms/Utils/InstructionWorklist.h"
34 
35 namespace llvm {
36 
37 class AAResults;
38 class AssumptionCache;
39 class OptimizationRemarkEmitter;
40 class ProfileSummaryInfo;
41 class TargetLibraryInfo;
42 class TargetTransformInfo;
43 
44 /// The core instruction combiner logic.
45 ///
46 /// This class provides both the logic to recursively visit instructions and
47 /// combine them.
48 class LLVM_LIBRARY_VISIBILITY InstCombiner {
49   /// Only used to call target specific intrinsic combining.
50   /// It must **NOT** be used for any other purpose, as InstCombine is a
51   /// target-independent canonicalization transform.
52   TargetTransformInfo &TTIForTargetIntrinsicsOnly;
53 
54 public:
55   /// Maximum size of array considered when transforming.
56   uint64_t MaxArraySizeForCombine = 0;
57 
58   /// An IRBuilder that automatically inserts new instructions into the
59   /// worklist.
60   using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>;
61   BuilderTy &Builder;
62 
63 protected:
64   /// A worklist of the instructions that need to be simplified.
65   InstructionWorklist &Worklist;
66 
67   // Mode in which we are running the combiner.
68   const bool MinimizeSize;
69 
70   AAResults *AA;
71 
72   // Required analyses.
73   AssumptionCache &AC;
74   TargetLibraryInfo &TLI;
75   DominatorTree &DT;
76   const DataLayout &DL;
77   SimplifyQuery SQ;
78   OptimizationRemarkEmitter &ORE;
79   BlockFrequencyInfo *BFI;
80   BranchProbabilityInfo *BPI;
81   ProfileSummaryInfo *PSI;
82   DomConditionCache DC;
83 
84   ReversePostOrderTraversal<BasicBlock *> &RPOT;
85 
86   bool MadeIRChange = false;
87 
88   /// Edges that are known to never be taken.
89   SmallDenseSet<std::pair<BasicBlock *, BasicBlock *>, 8> DeadEdges;
90 
91   /// Order of predecessors to canonicalize phi nodes towards.
92   SmallDenseMap<BasicBlock *, SmallVector<BasicBlock *>, 8> PredOrder;
93 
94   /// Backedges, used to avoid pushing instructions across backedges in cases
95   /// where this may result in infinite combine loops. For irreducible loops
96   /// this picks an arbitrary backedge.
97   SmallDenseSet<std::pair<const BasicBlock *, const BasicBlock *>, 8> BackEdges;
98   bool ComputedBackEdges = false;
99 
100 public:
101   InstCombiner(InstructionWorklist &Worklist, BuilderTy &Builder,
102                bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
103                TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
104                DominatorTree &DT, OptimizationRemarkEmitter &ORE,
105                BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI,
106                ProfileSummaryInfo *PSI, const DataLayout &DL,
107                ReversePostOrderTraversal<BasicBlock *> &RPOT)
108       : TTIForTargetIntrinsicsOnly(TTI), Builder(Builder), Worklist(Worklist),
109         MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL),
110         SQ(DL, &TLI, &DT, &AC, nullptr, /*UseInstrInfo*/ true,
111            /*CanUseUndef*/ true, &DC),
112         ORE(ORE), BFI(BFI), BPI(BPI), PSI(PSI), RPOT(RPOT) {}
113 
114   virtual ~InstCombiner() = default;
115 
116   /// Return the source operand of a potentially bitcasted value while
117   /// optionally checking if it has one use. If there is no bitcast or the one
118   /// use check is not met, return the input value itself.
119   static Value *peekThroughBitcast(Value *V, bool OneUseOnly = false) {
120     if (auto *BitCast = dyn_cast<BitCastInst>(V))
121       if (!OneUseOnly || BitCast->hasOneUse())
122         return BitCast->getOperand(0);
123 
124     // V is not a bitcast or V has more than one use and OneUseOnly is true.
125     return V;
126   }
127 
128   /// Assign a complexity or rank value to LLVM Values. This is used to reduce
129   /// the amount of pattern matching needed for compares and commutative
130   /// instructions. For example, if we have:
131   ///   icmp ugt X, Constant
132   /// or
133   ///   xor (add X, Constant), cast Z
134   ///
135   /// We do not have to consider the commuted variants of these patterns because
136   /// canonicalization based on complexity guarantees the above ordering.
137   ///
138   /// This routine maps IR values to various complexity ranks:
139   ///   0 -> undef
140   ///   1 -> Constants
141   ///   2 -> Cast and (f)neg/not instructions
142   ///   3 -> Other instructions and arguments
143   static unsigned getComplexity(Value *V) {
144     if (isa<Constant>(V))
145       return isa<UndefValue>(V) ? 0 : 1;
146 
147     if (isa<CastInst>(V) || match(V, m_Neg(PatternMatch::m_Value())) ||
148         match(V, m_Not(PatternMatch::m_Value())) ||
149         match(V, m_FNeg(PatternMatch::m_Value())))
150       return 2;
151 
152     return 3;
153   }
154 
155   /// Predicate canonicalization reduces the number of patterns that need to be
156   /// matched by other transforms. For example, we may swap the operands of a
157   /// conditional branch or select to create a compare with a canonical
158   /// (inverted) predicate which is then more likely to be matched with other
159   /// values.
160   static bool isCanonicalPredicate(CmpPredicate Pred) {
161     switch (Pred) {
162     case CmpInst::ICMP_NE:
163     case CmpInst::ICMP_ULE:
164     case CmpInst::ICMP_SLE:
165     case CmpInst::ICMP_UGE:
166     case CmpInst::ICMP_SGE:
167     // TODO: There are 16 FCMP predicates. Should others be (not) canonical?
168     case CmpInst::FCMP_ONE:
169     case CmpInst::FCMP_OLE:
170     case CmpInst::FCMP_OGE:
171       return false;
172     default:
173       return true;
174     }
175   }
176 
177   /// Add one to a Constant
178   static Constant *AddOne(Constant *C) {
179     return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
180   }
181 
182   /// Subtract one from a Constant
183   static Constant *SubOne(Constant *C) {
184     return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
185   }
186 
187   static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI) {
188     // a ? b : false and a ? true : b are the canonical form of logical and/or.
189     // This includes !a ? b : false and !a ? true : b. Absorbing the not into
190     // the select by swapping operands would break recognition of this pattern
191     // in other analyses, so don't do that.
192     return match(&SI, PatternMatch::m_LogicalAnd(PatternMatch::m_Value(),
193                                                  PatternMatch::m_Value())) ||
194            match(&SI, PatternMatch::m_LogicalOr(PatternMatch::m_Value(),
195                                                 PatternMatch::m_Value()));
196   }
197 
198   /// Return nonnull value if V is free to invert under the condition of
199   /// WillInvertAllUses.
200   /// If Builder is nonnull, it will return a simplified ~V.
201   /// If Builder is null, it will return an arbitrary nonnull value (not
202   /// dereferenceable).
203   /// If the inversion will consume instructions, `DoesConsume` will be set to
204   /// true. Otherwise it will be false.
205   Value *getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
206                                       BuilderTy *Builder, bool &DoesConsume,
207                                       unsigned Depth);
208 
209   Value *getFreelyInverted(Value *V, bool WillInvertAllUses,
210                                   BuilderTy *Builder, bool &DoesConsume) {
211     DoesConsume = false;
212     return getFreelyInvertedImpl(V, WillInvertAllUses, Builder, DoesConsume,
213                                  /*Depth*/ 0);
214   }
215 
216   Value *getFreelyInverted(Value *V, bool WillInvertAllUses,
217                                   BuilderTy *Builder) {
218     bool Unused;
219     return getFreelyInverted(V, WillInvertAllUses, Builder, Unused);
220   }
221 
222   /// Return true if the specified value is free to invert (apply ~ to).
223   /// This happens in cases where the ~ can be eliminated.  If WillInvertAllUses
224   /// is true, work under the assumption that the caller intends to remove all
225   /// uses of V and only keep uses of ~V.
226   ///
227   /// See also: canFreelyInvertAllUsersOf()
228   bool isFreeToInvert(Value *V, bool WillInvertAllUses,
229                              bool &DoesConsume) {
230     return getFreelyInverted(V, WillInvertAllUses, /*Builder*/ nullptr,
231                              DoesConsume) != nullptr;
232   }
233 
234   bool isFreeToInvert(Value *V, bool WillInvertAllUses) {
235     bool Unused;
236     return isFreeToInvert(V, WillInvertAllUses, Unused);
237   }
238 
239   /// Given i1 V, can every user of V be freely adapted if V is changed to !V ?
240   /// InstCombine's freelyInvertAllUsersOf() must be kept in sync with this fn.
241   /// NOTE: for Instructions only!
242   ///
243   /// See also: isFreeToInvert()
244   bool canFreelyInvertAllUsersOf(Instruction *V, Value *IgnoredUser) {
245     // Look at every user of V.
246     for (Use &U : V->uses()) {
247       if (U.getUser() == IgnoredUser)
248         continue; // Don't consider this user.
249 
250       auto *I = cast<Instruction>(U.getUser());
251       switch (I->getOpcode()) {
252       case Instruction::Select:
253         if (U.getOperandNo() != 0) // Only if the value is used as select cond.
254           return false;
255         if (shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(I)))
256           return false;
257         break;
258       case Instruction::Br:
259         assert(U.getOperandNo() == 0 && "Must be branching on that value.");
260         break; // Free to invert by swapping true/false values/destinations.
261       case Instruction::Xor: // Can invert 'xor' if it's a 'not', by ignoring
262                              // it.
263         if (!match(I, m_Not(PatternMatch::m_Value())))
264           return false; // Not a 'not'.
265         break;
266       default:
267         return false; // Don't know, likely not freely invertible.
268       }
269       // So far all users were free to invert...
270     }
271     return true; // Can freely invert all users!
272   }
273 
274   /// Some binary operators require special handling to avoid poison and
275   /// undefined behavior. If a constant vector has undef elements, replace those
276   /// undefs with identity constants if possible because those are always safe
277   /// to execute. If no identity constant exists, replace undef with some other
278   /// safe constant.
279   static Constant *
280   getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In,
281                                 bool IsRHSConstant) {
282     auto *InVTy = cast<FixedVectorType>(In->getType());
283 
284     Type *EltTy = InVTy->getElementType();
285     auto *SafeC = ConstantExpr::getBinOpIdentity(Opcode, EltTy, IsRHSConstant);
286     if (!SafeC) {
287       // TODO: Should this be available as a constant utility function? It is
288       // similar to getBinOpAbsorber().
289       if (IsRHSConstant) {
290         switch (Opcode) {
291         case Instruction::SRem: // X % 1 = 0
292         case Instruction::URem: // X %u 1 = 0
293           SafeC = ConstantInt::get(EltTy, 1);
294           break;
295         case Instruction::FRem: // X % 1.0 (doesn't simplify, but it is safe)
296           SafeC = ConstantFP::get(EltTy, 1.0);
297           break;
298         default:
299           llvm_unreachable(
300               "Only rem opcodes have no identity constant for RHS");
301         }
302       } else {
303         switch (Opcode) {
304         case Instruction::Shl:  // 0 << X = 0
305         case Instruction::LShr: // 0 >>u X = 0
306         case Instruction::AShr: // 0 >> X = 0
307         case Instruction::SDiv: // 0 / X = 0
308         case Instruction::UDiv: // 0 /u X = 0
309         case Instruction::SRem: // 0 % X = 0
310         case Instruction::URem: // 0 %u X = 0
311         case Instruction::Sub:  // 0 - X (doesn't simplify, but it is safe)
312         case Instruction::FSub: // 0.0 - X (doesn't simplify, but it is safe)
313         case Instruction::FDiv: // 0.0 / X (doesn't simplify, but it is safe)
314         case Instruction::FRem: // 0.0 % X = 0
315           SafeC = Constant::getNullValue(EltTy);
316           break;
317         default:
318           llvm_unreachable("Expected to find identity constant for opcode");
319         }
320       }
321     }
322     assert(SafeC && "Must have safe constant for binop");
323     unsigned NumElts = InVTy->getNumElements();
324     SmallVector<Constant *, 16> Out(NumElts);
325     for (unsigned i = 0; i != NumElts; ++i) {
326       Constant *C = In->getAggregateElement(i);
327       Out[i] = isa<UndefValue>(C) ? SafeC : C;
328     }
329     return ConstantVector::get(Out);
330   }
331 
332   void addToWorklist(Instruction *I) { Worklist.push(I); }
333 
334   AssumptionCache &getAssumptionCache() const { return AC; }
335   TargetLibraryInfo &getTargetLibraryInfo() const { return TLI; }
336   DominatorTree &getDominatorTree() const { return DT; }
337   const DataLayout &getDataLayout() const { return DL; }
338   const SimplifyQuery &getSimplifyQuery() const { return SQ; }
339   OptimizationRemarkEmitter &getOptimizationRemarkEmitter() const {
340     return ORE;
341   }
342   BlockFrequencyInfo *getBlockFrequencyInfo() const { return BFI; }
343   ProfileSummaryInfo *getProfileSummaryInfo() const { return PSI; }
344 
345   // Call target specific combiners
346   std::optional<Instruction *> targetInstCombineIntrinsic(IntrinsicInst &II);
347   std::optional<Value *>
348   targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask,
349                                          KnownBits &Known,
350                                          bool &KnownBitsComputed);
351   std::optional<Value *> targetSimplifyDemandedVectorEltsIntrinsic(
352       IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
353       APInt &UndefElts2, APInt &UndefElts3,
354       std::function<void(Instruction *, unsigned, APInt, APInt &)>
355           SimplifyAndSetOp);
356 
357   void computeBackEdges();
358   bool isBackEdge(const BasicBlock *From, const BasicBlock *To) {
359     if (!ComputedBackEdges)
360       computeBackEdges();
361     return BackEdges.contains({From, To});
362   }
363 
364   /// Inserts an instruction \p New before instruction \p Old
365   ///
366   /// Also adds the new instruction to the worklist and returns \p New so that
367   /// it is suitable for use as the return from the visitation patterns.
368   Instruction *InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old) {
369     assert(New && !New->getParent() &&
370            "New instruction already inserted into a basic block!");
371     New->insertBefore(Old); // Insert inst
372     Worklist.add(New);
373     return New;
374   }
375 
376   /// Same as InsertNewInstBefore, but also sets the debug loc.
377   Instruction *InsertNewInstWith(Instruction *New, BasicBlock::iterator Old) {
378     New->setDebugLoc(Old->getDebugLoc());
379     return InsertNewInstBefore(New, Old);
380   }
381 
382   /// A combiner-aware RAUW-like routine.
383   ///
384   /// This method is to be used when an instruction is found to be dead,
385   /// replaceable with another preexisting expression. Here we add all uses of
386   /// I to the worklist, replace all uses of I with the new value, then return
387   /// I, so that the inst combiner will know that I was modified.
388   Instruction *replaceInstUsesWith(Instruction &I, Value *V) {
389     // If there are no uses to replace, then we return nullptr to indicate that
390     // no changes were made to the program.
391     if (I.use_empty()) return nullptr;
392 
393     Worklist.pushUsersToWorkList(I); // Add all modified instrs to worklist.
394 
395     // If we are replacing the instruction with itself, this must be in a
396     // segment of unreachable code, so just clobber the instruction.
397     if (&I == V)
398       V = PoisonValue::get(I.getType());
399 
400     LLVM_DEBUG(dbgs() << "IC: Replacing " << I << "\n"
401                       << "    with " << *V << '\n');
402 
403     // If V is a new unnamed instruction, take the name from the old one.
404     if (V->use_empty() && isa<Instruction>(V) && !V->hasName() && I.hasName())
405       V->takeName(&I);
406 
407     I.replaceAllUsesWith(V);
408     return &I;
409   }
410 
411   /// Replace operand of instruction and add old operand to the worklist.
412   Instruction *replaceOperand(Instruction &I, unsigned OpNum, Value *V) {
413     Value *OldOp = I.getOperand(OpNum);
414     I.setOperand(OpNum, V);
415     Worklist.handleUseCountDecrement(OldOp);
416     return &I;
417   }
418 
419   /// Replace use and add the previously used value to the worklist.
420   void replaceUse(Use &U, Value *NewValue) {
421     Value *OldOp = U;
422     U = NewValue;
423     Worklist.handleUseCountDecrement(OldOp);
424   }
425 
426   /// Combiner aware instruction erasure.
427   ///
428   /// When dealing with an instruction that has side effects or produces a void
429   /// value, we can't rely on DCE to delete the instruction. Instead, visit
430   /// methods should return the value returned by this function.
431   virtual Instruction *eraseInstFromFunction(Instruction &I) = 0;
432 
433   void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
434                         const Instruction *CxtI) const {
435     llvm::computeKnownBits(V, Known, Depth, SQ.getWithInstruction(CxtI));
436   }
437 
438   KnownBits computeKnownBits(const Value *V, unsigned Depth,
439                              const Instruction *CxtI) const {
440     return llvm::computeKnownBits(V, Depth, SQ.getWithInstruction(CxtI));
441   }
442 
443   bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false,
444                               unsigned Depth = 0,
445                               const Instruction *CxtI = nullptr) {
446     return llvm::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
447                                         SQ.getWithInstruction(CxtI));
448   }
449 
450   bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0,
451                          const Instruction *CxtI = nullptr) const {
452     return llvm::MaskedValueIsZero(V, Mask, SQ.getWithInstruction(CxtI), Depth);
453   }
454 
455   unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0,
456                               const Instruction *CxtI = nullptr) const {
457     return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT);
458   }
459 
460   unsigned ComputeMaxSignificantBits(const Value *Op, unsigned Depth = 0,
461                                      const Instruction *CxtI = nullptr) const {
462     return llvm::ComputeMaxSignificantBits(Op, DL, Depth, &AC, CxtI, &DT);
463   }
464 
465   OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
466                                                const Value *RHS,
467                                                const Instruction *CxtI,
468                                                bool IsNSW = false) const {
469     return llvm::computeOverflowForUnsignedMul(
470         LHS, RHS, SQ.getWithInstruction(CxtI), IsNSW);
471   }
472 
473   OverflowResult computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
474                                              const Instruction *CxtI) const {
475     return llvm::computeOverflowForSignedMul(LHS, RHS,
476                                              SQ.getWithInstruction(CxtI));
477   }
478 
479   OverflowResult
480   computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS,
481                                 const WithCache<const Value *> &RHS,
482                                 const Instruction *CxtI) const {
483     return llvm::computeOverflowForUnsignedAdd(LHS, RHS,
484                                                SQ.getWithInstruction(CxtI));
485   }
486 
487   OverflowResult
488   computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
489                               const WithCache<const Value *> &RHS,
490                               const Instruction *CxtI) const {
491     return llvm::computeOverflowForSignedAdd(LHS, RHS,
492                                              SQ.getWithInstruction(CxtI));
493   }
494 
495   OverflowResult computeOverflowForUnsignedSub(const Value *LHS,
496                                                const Value *RHS,
497                                                const Instruction *CxtI) const {
498     return llvm::computeOverflowForUnsignedSub(LHS, RHS,
499                                                SQ.getWithInstruction(CxtI));
500   }
501 
502   OverflowResult computeOverflowForSignedSub(const Value *LHS, const Value *RHS,
503                                              const Instruction *CxtI) const {
504     return llvm::computeOverflowForSignedSub(LHS, RHS,
505                                              SQ.getWithInstruction(CxtI));
506   }
507 
508   virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo,
509                                     const APInt &DemandedMask, KnownBits &Known,
510                                     unsigned Depth, const SimplifyQuery &Q) = 0;
511 
512   bool SimplifyDemandedBits(Instruction *I, unsigned OpNo,
513                             const APInt &DemandedMask, KnownBits &Known) {
514     return SimplifyDemandedBits(I, OpNo, DemandedMask, Known,
515                                 /*Depth=*/0, SQ.getWithInstruction(I));
516   }
517 
518   virtual Value *
519   SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts,
520                              unsigned Depth = 0,
521                              bool AllowMultipleUsers = false) = 0;
522 
523   bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
524 };
525 
526 } // namespace llvm
527 
528 #undef DEBUG_TYPE
529 
530 #endif
531