xref: /llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp (revision aa999528966781ae84e508b5a29cc4be7ea0368f)
1 //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Insert hardware loop intrinsics into loops which are deemed profitable by
10 /// the target, by querying TargetTransformInfo. A hardware loop comprises of
11 /// two intrinsics: one, outside the loop, to set the loop iteration count and
12 /// another, in the exit block, to decrement the counter. The decremented value
13 /// can either be carried through the loop via a phi or handled in some opaque
14 /// way by the target.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/Pass.h"
19 #include "llvm/PassRegistry.h"
20 #include "llvm/PassSupport.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/ScalarEvolution.h"
25 #include "llvm/Analysis/ScalarEvolutionExpander.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/CodeGen/Passes.h"
28 #include "llvm/CodeGen/TargetPassConfig.h"
29 #include "llvm/IR/BasicBlock.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Transforms/Scalar.h"
39 #include "llvm/Transforms/Utils.h"
40 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
41 #include "llvm/Transforms/Utils/Local.h"
42 #include "llvm/Transforms/Utils/LoopUtils.h"
43 
44 #define DEBUG_TYPE "hardware-loops"
45 
46 #define HW_LOOPS_NAME "Hardware Loop Insertion"
47 
48 using namespace llvm;
49 
50 static cl::opt<bool>
51 ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
52                    cl::desc("Force hardware loops intrinsics to be inserted"));
53 
54 static cl::opt<bool>
55 ForceHardwareLoopPHI(
56   "force-hardware-loop-phi", cl::Hidden, cl::init(false),
57   cl::desc("Force hardware loop counter to be updated through a phi"));
58 
59 static cl::opt<bool>
60 ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
61                 cl::desc("Force allowance of nested hardware loops"));
62 
63 static cl::opt<unsigned>
64 LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
65             cl::desc("Set the loop decrement value"));
66 
67 static cl::opt<unsigned>
68 CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
69                 cl::desc("Set the loop counter bitwidth"));
70 
71 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
72 
73 namespace {
74 
75   using TTI = TargetTransformInfo;
76 
77   class HardwareLoops : public FunctionPass {
78   public:
79     static char ID;
80 
81     HardwareLoops() : FunctionPass(ID) {
82       initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
83     }
84 
85     bool runOnFunction(Function &F) override;
86 
87     void getAnalysisUsage(AnalysisUsage &AU) const override {
88       AU.addRequired<LoopInfoWrapperPass>();
89       AU.addPreserved<LoopInfoWrapperPass>();
90       AU.addRequired<DominatorTreeWrapperPass>();
91       AU.addPreserved<DominatorTreeWrapperPass>();
92       AU.addRequired<ScalarEvolutionWrapperPass>();
93       AU.addRequired<AssumptionCacheTracker>();
94       AU.addRequired<TargetTransformInfoWrapperPass>();
95     }
96 
97     // Try to convert the given Loop into a hardware loop.
98     bool TryConvertLoop(Loop *L);
99 
100     // Given that the target believes the loop to be profitable, try to
101     // convert it.
102     bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
103 
104   private:
105     ScalarEvolution *SE = nullptr;
106     LoopInfo *LI = nullptr;
107     const DataLayout *DL = nullptr;
108     const TargetTransformInfo *TTI = nullptr;
109     DominatorTree *DT = nullptr;
110     bool PreserveLCSSA = false;
111     AssumptionCache *AC = nullptr;
112     TargetLibraryInfo *LibInfo = nullptr;
113     Module *M = nullptr;
114     bool MadeChange = false;
115   };
116 
117   class HardwareLoop {
118     // Expand the trip count scev into a value that we can use.
119     Value *InitLoopCount(BasicBlock *BB);
120 
121     // Insert the set_loop_iteration intrinsic.
122     void InsertIterationSetup(Value *LoopCountInit, BasicBlock *BB);
123 
124     // Insert the loop_decrement intrinsic.
125     void InsertLoopDec();
126 
127     // Insert the loop_decrement_reg intrinsic.
128     Instruction *InsertLoopRegDec(Value *EltsRem);
129 
130     // If the target requires the counter value to be updated in the loop,
131     // insert a phi to hold the value. The intended purpose is for use by
132     // loop_decrement_reg.
133     PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
134 
135     // Create a new cmp, that checks the returned value of loop_decrement*,
136     // and update the exit branch to use it.
137     void UpdateBranch(Value *EltsRem);
138 
139   public:
140     HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
141                  const DataLayout &DL) :
142       SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()),
143       ExitCount(Info.ExitCount),
144       CountType(Info.CountType),
145       ExitBranch(Info.ExitBranch),
146       LoopDecrement(Info.LoopDecrement),
147       UsePHICounter(Info.CounterInReg) { }
148 
149     void Create();
150 
151   private:
152     ScalarEvolution &SE;
153     const DataLayout &DL;
154     Loop *L                 = nullptr;
155     Module *M               = nullptr;
156     const SCEV *ExitCount   = nullptr;
157     Type *CountType         = nullptr;
158     BranchInst *ExitBranch  = nullptr;
159     Value *LoopDecrement      = nullptr;
160     bool UsePHICounter      = false;
161   };
162 }
163 
164 char HardwareLoops::ID = 0;
165 
166 bool HardwareLoops::runOnFunction(Function &F) {
167   if (skipFunction(F))
168     return false;
169 
170   LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
171 
172   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
173   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
174   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
175   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
176   DL = &F.getParent()->getDataLayout();
177   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
178   LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
179   PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
180   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
181   M = F.getParent();
182 
183   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
184     Loop *L = *I;
185     if (!L->getParentLoop())
186       TryConvertLoop(L);
187   }
188 
189   return MadeChange;
190 }
191 
192 // Return true if the search should stop, which will be when an inner loop is
193 // converted and the parent loop doesn't support containing a hardware loop.
194 bool HardwareLoops::TryConvertLoop(Loop *L) {
195   // Process nested loops first.
196   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
197     if (TryConvertLoop(*I))
198       return true; // Stop search.
199 
200   HardwareLoopInfo HWLoopInfo(L);
201   if (!HWLoopInfo.canAnalyze(*LI))
202     return false;
203 
204   if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) ||
205       ForceHardwareLoops) {
206 
207     // Allow overriding of the counter width and loop decrement value.
208     if (CounterBitWidth.getNumOccurrences())
209       HWLoopInfo.CountType =
210         IntegerType::get(M->getContext(), CounterBitWidth);
211 
212     if (LoopDecrement.getNumOccurrences())
213       HWLoopInfo.LoopDecrement =
214         ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
215 
216     MadeChange |= TryConvertLoop(HWLoopInfo);
217     return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
218   }
219 
220   return false;
221 }
222 
223 bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
224 
225   Loop *L = HWLoopInfo.L;
226   LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
227 
228   if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
229                                           ForceHardwareLoopPHI))
230     return false;
231 
232   assert(
233       (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
234       "Hardware Loop must have set exit info.");
235 
236   BasicBlock *Preheader = L->getLoopPreheader();
237 
238   // If we don't have a preheader, then insert one.
239   if (!Preheader)
240     Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
241   if (!Preheader)
242     return false;
243 
244   HardwareLoop HWLoop(HWLoopInfo, *SE, *DL);
245   HWLoop.Create();
246   ++NumHWLoops;
247   return true;
248 }
249 
250 void HardwareLoop::Create() {
251   LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
252   BasicBlock *BeginBB = L->getLoopPreheader();
253   Value *LoopCountInit = InitLoopCount(BeginBB);
254   if (!LoopCountInit)
255     return;
256 
257   InsertIterationSetup(LoopCountInit, BeginBB);
258 
259   if (UsePHICounter || ForceHardwareLoopPHI) {
260     Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
261     Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec);
262     LoopDec->setOperand(0, EltsRem);
263     UpdateBranch(LoopDec);
264   } else
265     InsertLoopDec();
266 
267   // Run through the basic blocks of the loop and see if any of them have dead
268   // PHIs that can be removed.
269   for (auto I : L->blocks())
270     DeleteDeadPHIs(I);
271 }
272 
273 Value *HardwareLoop::InitLoopCount(BasicBlock *BB) {
274   SCEVExpander SCEVE(SE, DL, "loopcnt");
275   if (!ExitCount->getType()->isPointerTy() &&
276       ExitCount->getType() != CountType)
277     ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
278 
279   ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
280 
281   if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
282     LLVM_DEBUG(dbgs() << "HWLoops: Bailing, unsafe to expand ExitCount "
283                << *ExitCount << "\n");
284     return nullptr;
285   }
286 
287   Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
288                                      BB->getTerminator());
289   LLVM_DEBUG(dbgs() << "HWLoops: Loop Count: " << *Count << "\n");
290   return Count;
291 }
292 
293 void HardwareLoop::InsertIterationSetup(Value *LoopCountInit,
294                                         BasicBlock *BB) {
295   IRBuilder<> Builder(BB->getTerminator());
296   Type *Ty = LoopCountInit->getType();
297   Function *LoopIter =
298     Intrinsic::getDeclaration(M, Intrinsic::set_loop_iterations, Ty);
299   Builder.CreateCall(LoopIter, LoopCountInit);
300 }
301 
302 void HardwareLoop::InsertLoopDec() {
303   IRBuilder<> CondBuilder(ExitBranch);
304 
305   Function *DecFunc =
306     Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
307                               LoopDecrement->getType());
308   Value *Ops[] = { LoopDecrement };
309   Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
310   Value *OldCond = ExitBranch->getCondition();
311   ExitBranch->setCondition(NewCond);
312 
313   // The false branch must exit the loop.
314   if (!L->contains(ExitBranch->getSuccessor(0)))
315     ExitBranch->swapSuccessors();
316 
317   // The old condition may be dead now, and may have even created a dead PHI
318   // (the original induction variable).
319   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
320 
321   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
322 }
323 
324 Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
325   IRBuilder<> CondBuilder(ExitBranch);
326 
327   Function *DecFunc =
328       Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
329                                 { EltsRem->getType(), EltsRem->getType(),
330                                   LoopDecrement->getType()
331                                 });
332   Value *Ops[] = { EltsRem, LoopDecrement };
333   Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
334 
335   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
336   return cast<Instruction>(Call);
337 }
338 
339 PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
340   BasicBlock *Preheader = L->getLoopPreheader();
341   BasicBlock *Header = L->getHeader();
342   BasicBlock *Latch = ExitBranch->getParent();
343   IRBuilder<> Builder(Header->getFirstNonPHI());
344   PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
345   Index->addIncoming(NumElts, Preheader);
346   Index->addIncoming(EltsRem, Latch);
347   LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
348   return Index;
349 }
350 
351 void HardwareLoop::UpdateBranch(Value *EltsRem) {
352   IRBuilder<> CondBuilder(ExitBranch);
353   Value *NewCond =
354     CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
355   Value *OldCond = ExitBranch->getCondition();
356   ExitBranch->setCondition(NewCond);
357 
358   // The false branch must exit the loop.
359   if (!L->contains(ExitBranch->getSuccessor(0)))
360     ExitBranch->swapSuccessors();
361 
362   // The old condition may be dead now, and may have even created a dead PHI
363   // (the original induction variable).
364   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
365 }
366 
367 INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
368 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
369 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
370 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
371 INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
372 
373 FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
374