xref: /llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp (revision 247a177cf78f0e9a51e351e630dd2a78c8f82859)
1 //===- DivRemPairs.cpp - Hoist/[dr]ecompose division and remainder --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass hoists and/or decomposes/recomposes integer division and remainder
10 // instructions to enable CFG improvements and better codegen.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Scalar/DivRemPairs.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/GlobalsModRef.h"
19 #include "llvm/Analysis/TargetTransformInfo.h"
20 #include "llvm/IR/Dominators.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/PatternMatch.h"
23 #include "llvm/InitializePasses.h"
24 #include "llvm/Pass.h"
25 #include "llvm/Support/DebugCounter.h"
26 #include "llvm/Transforms/Scalar.h"
27 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
28 
29 using namespace llvm;
30 using namespace llvm::PatternMatch;
31 
32 #define DEBUG_TYPE "div-rem-pairs"
33 STATISTIC(NumPairs, "Number of div/rem pairs");
34 STATISTIC(NumRecomposed, "Number of instructions recomposed");
35 STATISTIC(NumHoisted, "Number of instructions hoisted");
36 STATISTIC(NumDecomposed, "Number of instructions decomposed");
37 DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
38               "Controls transformations in div-rem-pairs pass");
39 
40 namespace {
41 struct ExpandedMatch {
42   DivRemMapKey Key;
43   Instruction *Value;
44 };
45 } // namespace
46 
47 /// See if we can match: (which is the form we expand into)
48 ///   X - ((X ?/ Y) * Y)
49 /// which is equivalent to:
50 ///   X ?% Y
51 static llvm::Optional<ExpandedMatch> matchExpandedRem(Instruction &I) {
52   Value *Dividend, *XroundedDownToMultipleOfY;
53   if (!match(&I, m_Sub(m_Value(Dividend), m_Value(XroundedDownToMultipleOfY))))
54     return llvm::None;
55 
56   Value *Divisor;
57   Instruction *Div;
58   // Look for  ((X / Y) * Y)
59   if (!match(
60           XroundedDownToMultipleOfY,
61           m_c_Mul(m_CombineAnd(m_IDiv(m_Specific(Dividend), m_Value(Divisor)),
62                                m_Instruction(Div)),
63                   m_Deferred(Divisor))))
64     return llvm::None;
65 
66   ExpandedMatch M;
67   M.Key.SignedOp = Div->getOpcode() == Instruction::SDiv;
68   M.Key.Dividend = Dividend;
69   M.Key.Divisor = Divisor;
70   M.Value = &I;
71   return M;
72 }
73 
74 namespace {
75 /// A thin wrapper to store two values that we matched as div-rem pair.
76 /// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
77 struct DivRemPairWorklistEntry {
78   /// The actual udiv/sdiv instruction. Source of truth.
79   AssertingVH<Instruction> DivInst;
80 
81   /// The instruction that we have matched as a remainder instruction.
82   /// Should only be used as Value, don't introspect it.
83   AssertingVH<Instruction> RemInst;
84 
85   DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)
86       : DivInst(DivInst_), RemInst(RemInst_) {
87     assert((DivInst->getOpcode() == Instruction::UDiv ||
88             DivInst->getOpcode() == Instruction::SDiv) &&
89            "Not a division.");
90     assert(DivInst->getType() == RemInst->getType() && "Types should match.");
91     // We can't check anything else about remainder instruction,
92     // it's not strictly required to be a urem/srem.
93   }
94 
95   /// The type for this pair, identical for both the div and rem.
96   Type *getType() const { return DivInst->getType(); }
97 
98   /// Is this pair signed or unsigned?
99   bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }
100 
101   /// In this pair, what are the divident and divisor?
102   Value *getDividend() const { return DivInst->getOperand(0); }
103   Value *getDivisor() const { return DivInst->getOperand(1); }
104 
105   bool isRemExpanded() const {
106     switch (RemInst->getOpcode()) {
107     case Instruction::SRem:
108     case Instruction::URem:
109       return false; // single 'rem' instruction - unexpanded form.
110     default:
111       return true; // anything else means we have remainder in expanded form.
112     }
113   }
114 };
115 } // namespace
116 using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;
117 
118 /// Find matching pairs of integer div/rem ops (they have the same numerator,
119 /// denominator, and signedness). Place those pairs into a worklist for further
120 /// processing. This indirection is needed because we have to use TrackingVH<>
121 /// because we will be doing RAUW, and if one of the rem instructions we change
122 /// happens to be an input to another div/rem in the maps, we'd have problems.
123 static DivRemWorklistTy getWorklist(Function &F) {
124   // Insert all divide and remainder instructions into maps keyed by their
125   // operands and opcode (signed or unsigned).
126   DenseMap<DivRemMapKey, Instruction *> DivMap;
127   // Use a MapVector for RemMap so that instructions are moved/inserted in a
128   // deterministic order.
129   MapVector<DivRemMapKey, Instruction *> RemMap;
130   for (auto &BB : F) {
131     for (auto &I : BB) {
132       if (I.getOpcode() == Instruction::SDiv)
133         DivMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
134       else if (I.getOpcode() == Instruction::UDiv)
135         DivMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
136       else if (I.getOpcode() == Instruction::SRem)
137         RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
138       else if (I.getOpcode() == Instruction::URem)
139         RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
140       else if (auto Match = matchExpandedRem(I))
141         RemMap[Match->Key] = Match->Value;
142     }
143   }
144 
145   // We'll accumulate the matching pairs of div-rem instructions here.
146   DivRemWorklistTy Worklist;
147 
148   // We can iterate over either map because we are only looking for matched
149   // pairs. Choose remainders for efficiency because they are usually even more
150   // rare than division.
151   for (auto &RemPair : RemMap) {
152     // Find the matching division instruction from the division map.
153     Instruction *DivInst = DivMap[RemPair.first];
154     if (!DivInst)
155       continue;
156 
157     // We have a matching pair of div/rem instructions.
158     NumPairs++;
159     Instruction *RemInst = RemPair.second;
160 
161     // Place it in the worklist.
162     Worklist.emplace_back(DivInst, RemInst);
163   }
164 
165   return Worklist;
166 }
167 
168 /// Find matching pairs of integer div/rem ops (they have the same numerator,
169 /// denominator, and signedness). If they exist in different basic blocks, bring
170 /// them together by hoisting or replace the common division operation that is
171 /// implicit in the remainder:
172 /// X % Y <--> X - ((X / Y) * Y).
173 ///
174 /// We can largely ignore the normal safety and cost constraints on speculation
175 /// of these ops when we find a matching pair. This is because we are already
176 /// guaranteed that any exceptions and most cost are already incurred by the
177 /// first member of the pair.
178 ///
179 /// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
180 /// SimplifyCFG, but it's split off on its own because it's different enough
181 /// that it doesn't quite match the stated objectives of those passes.
182 static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
183                            const DominatorTree &DT) {
184   bool Changed = false;
185 
186   // Get the matching pairs of div-rem instructions. We want this extra
187   // indirection to avoid dealing with having to RAUW the keys of the maps.
188   DivRemWorklistTy Worklist = getWorklist(F);
189 
190   // Process each entry in the worklist.
191   for (DivRemPairWorklistEntry &E : Worklist) {
192     if (!DebugCounter::shouldExecute(DRPCounter))
193       continue;
194 
195     bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());
196 
197     auto &DivInst = E.DivInst;
198     auto &RemInst = E.RemInst;
199 
200     const bool RemOriginallyWasInExpandedForm = E.isRemExpanded();
201     (void)RemOriginallyWasInExpandedForm; // suppress unused variable warning
202 
203     if (HasDivRemOp && E.isRemExpanded()) {
204       // The target supports div+rem but the rem is expanded.
205       // We should recompose it first.
206       Value *X = E.getDividend();
207       Value *Y = E.getDivisor();
208       Instruction *RealRem = E.isSigned() ? BinaryOperator::CreateSRem(X, Y)
209                                           : BinaryOperator::CreateURem(X, Y);
210       // Note that we place it right next to the original expanded instruction,
211       // and letting further handling to move it if needed.
212       RealRem->setName(RemInst->getName() + ".recomposed");
213       RealRem->insertAfter(RemInst);
214       Instruction *OrigRemInst = RemInst;
215       // Update AssertingVH<> with new instruction so it doesn't assert.
216       RemInst = RealRem;
217       // And replace the original instruction with the new one.
218       OrigRemInst->replaceAllUsesWith(RealRem);
219       OrigRemInst->eraseFromParent();
220       NumRecomposed++;
221       // Note that we have left ((X / Y) * Y) around.
222       // If it had other uses we could rewrite it as X - X % Y
223     }
224 
225     assert((!E.isRemExpanded() || !HasDivRemOp) &&
226            "*If* the target supports div-rem, then by now the RemInst *is* "
227            "Instruction::[US]Rem.");
228 
229     // If the target supports div+rem and the instructions are in the same block
230     // already, there's nothing to do. The backend should handle this. If the
231     // target does not support div+rem, then we will decompose the rem.
232     if (HasDivRemOp && RemInst->getParent() == DivInst->getParent())
233       continue;
234 
235     bool DivDominates = DT.dominates(DivInst, RemInst);
236     if (!DivDominates && !DT.dominates(RemInst, DivInst)) {
237       // We have matching div-rem pair, but they are in two different blocks,
238       // neither of which dominates one another.
239       // FIXME: We could hoist both ops to the common predecessor block?
240       continue;
241     }
242 
243     // The target does not have a single div/rem operation,
244     // and the rem is already in expanded form. Nothing to do.
245     if (!HasDivRemOp && E.isRemExpanded())
246       continue;
247 
248     if (HasDivRemOp) {
249       // The target has a single div/rem operation. Hoist the lower instruction
250       // to make the matched pair visible to the backend.
251       if (DivDominates)
252         RemInst->moveAfter(DivInst);
253       else
254         DivInst->moveAfter(RemInst);
255       NumHoisted++;
256     } else {
257       // The target does not have a single div/rem operation,
258       // and the rem is *not* in a already-expanded form.
259       // Decompose the remainder calculation as:
260       // X % Y --> X - ((X / Y) * Y).
261 
262       assert(!RemOriginallyWasInExpandedForm &&
263              "We should not be expanding if the rem was in expanded form to "
264              "begin with.");
265 
266       Value *X = E.getDividend();
267       Value *Y = E.getDivisor();
268       Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
269       Instruction *Sub = BinaryOperator::CreateSub(X, Mul);
270 
271       // If the remainder dominates, then hoist the division up to that block:
272       //
273       // bb1:
274       //   %rem = srem %x, %y
275       // bb2:
276       //   %div = sdiv %x, %y
277       // -->
278       // bb1:
279       //   %div = sdiv %x, %y
280       //   %mul = mul %div, %y
281       //   %rem = sub %x, %mul
282       //
283       // If the division dominates, it's already in the right place. The mul+sub
284       // will be in a different block because we don't assume that they are
285       // cheap to speculatively execute:
286       //
287       // bb1:
288       //   %div = sdiv %x, %y
289       // bb2:
290       //   %rem = srem %x, %y
291       // -->
292       // bb1:
293       //   %div = sdiv %x, %y
294       // bb2:
295       //   %mul = mul %div, %y
296       //   %rem = sub %x, %mul
297       //
298       // If the div and rem are in the same block, we do the same transform,
299       // but any code movement would be within the same block.
300 
301       if (!DivDominates)
302         DivInst->moveBefore(RemInst);
303       Mul->insertAfter(RemInst);
304       Sub->insertAfter(Mul);
305 
306       // Now kill the explicit remainder. We have replaced it with:
307       // (sub X, (mul (div X, Y), Y)
308       Sub->setName(RemInst->getName() + ".decomposed");
309       Instruction *OrigRemInst = RemInst;
310       // Update AssertingVH<> with new instruction so it doesn't assert.
311       RemInst = Sub;
312       // And replace the original instruction with the new one.
313       OrigRemInst->replaceAllUsesWith(Sub);
314       OrigRemInst->eraseFromParent();
315       NumDecomposed++;
316     }
317     Changed = true;
318   }
319 
320   return Changed;
321 }
322 
323 // Pass manager boilerplate below here.
324 
325 namespace {
326 struct DivRemPairsLegacyPass : public FunctionPass {
327   static char ID;
328   DivRemPairsLegacyPass() : FunctionPass(ID) {
329     initializeDivRemPairsLegacyPassPass(*PassRegistry::getPassRegistry());
330   }
331 
332   void getAnalysisUsage(AnalysisUsage &AU) const override {
333     AU.addRequired<DominatorTreeWrapperPass>();
334     AU.addRequired<TargetTransformInfoWrapperPass>();
335     AU.setPreservesCFG();
336     AU.addPreserved<DominatorTreeWrapperPass>();
337     AU.addPreserved<GlobalsAAWrapperPass>();
338     FunctionPass::getAnalysisUsage(AU);
339   }
340 
341   bool runOnFunction(Function &F) override {
342     if (skipFunction(F))
343       return false;
344     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
345     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
346     return optimizeDivRem(F, TTI, DT);
347   }
348 };
349 } // namespace
350 
351 char DivRemPairsLegacyPass::ID = 0;
352 INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
353                       "Hoist/decompose integer division and remainder", false,
354                       false)
355 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
356 INITIALIZE_PASS_END(DivRemPairsLegacyPass, "div-rem-pairs",
357                     "Hoist/decompose integer division and remainder", false,
358                     false)
359 FunctionPass *llvm::createDivRemPairsPass() {
360   return new DivRemPairsLegacyPass();
361 }
362 
363 PreservedAnalyses DivRemPairsPass::run(Function &F,
364                                        FunctionAnalysisManager &FAM) {
365   TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
366   DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
367   if (!optimizeDivRem(F, TTI, DT))
368     return PreservedAnalyses::all();
369   // TODO: This pass just hoists/replaces math ops - all analyses are preserved?
370   PreservedAnalyses PA;
371   PA.preserveSet<CFGAnalyses>();
372   PA.preserve<GlobalsAA>();
373   return PA;
374 }
375