xref: /llvm-project/polly/lib/CodeGen/LoopGenerators.cpp (revision 5b263d4ce128ab448d123a9e65a3f70e12565d7c)
1 //===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains functions to create scalar and parallel loops as LLVM-IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "polly/CodeGen/LoopGenerators.h"
15 #include "polly/ScopDetection.h"
16 #include "llvm/Analysis/LoopInfo.h"
17 #include "llvm/IR/DataLayout.h"
18 #include "llvm/IR/Dominators.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
22 
23 using namespace llvm;
24 using namespace polly;
25 
26 static cl::opt<int>
27     PollyNumThreads("polly-num-threads",
28                     cl::desc("Number of threads to use (0 = auto)"), cl::Hidden,
29                     cl::init(0));
30 
31 // We generate a loop of either of the following structures:
32 //
33 //              BeforeBB                      BeforeBB
34 //                 |                             |
35 //                 v                             v
36 //              GuardBB                      PreHeaderBB
37 //              /      |                         |   _____
38 //     __  PreHeaderBB  |                        v  \/    |
39 //    /  \    /         |                     HeaderBB  latch
40 // latch  HeaderBB      |                        |\       |
41 //    \  /    \         /                        | \------/
42 //     <       \       /                         |
43 //              \     /                          v
44 //              ExitBB                         ExitBB
45 //
46 // depending on whether or not we know that it is executed at least once. If
47 // not, GuardBB checks if the loop is executed at least once. If this is the
48 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
49 // contains the loop iv 'polly.indvar', the incremented loop iv
50 // 'polly.indvar_next' as well as the condition to check if we execute another
51 // iteration of the loop. After the loop has finished, we branch to ExitBB.
52 // We expect the type of UB, LB, UB+Stride to be large enough for values that
53 // UB may take throughout the execution of the loop, including the computation
54 // of indvar + Stride before the final abort.
55 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
56                          PollyIRBuilder &Builder, LoopInfo &LI,
57                          DominatorTree &DT, BasicBlock *&ExitBB,
58                          ICmpInst::Predicate Predicate,
59                          ScopAnnotator *Annotator, bool Parallel,
60                          bool UseGuard) {
61   Function *F = Builder.GetInsertBlock()->getParent();
62   LLVMContext &Context = F->getContext();
63 
64   assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
65   IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
66   assert(LoopIVType && "UB is not integer?");
67 
68   BasicBlock *BeforeBB = Builder.GetInsertBlock();
69   BasicBlock *GuardBB =
70       UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
71   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
72   BasicBlock *PreHeaderBB =
73       BasicBlock::Create(Context, "polly.loop_preheader", F);
74 
75   // Update LoopInfo
76   Loop *OuterLoop = LI.getLoopFor(BeforeBB);
77   Loop *NewLoop = new Loop();
78 
79   if (OuterLoop)
80     OuterLoop->addChildLoop(NewLoop);
81   else
82     LI.addTopLevelLoop(NewLoop);
83 
84   if (OuterLoop) {
85     if (GuardBB)
86       OuterLoop->addBasicBlockToLoop(GuardBB, LI);
87     OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
88   }
89 
90   NewLoop->addBasicBlockToLoop(HeaderBB, LI);
91 
92   // Notify the annotator (if present) that we have a new loop, but only
93   // after the header block is set.
94   if (Annotator)
95     Annotator->pushLoop(NewLoop, Parallel);
96 
97   // ExitBB
98   ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
99   ExitBB->setName("polly.loop_exit");
100 
101   // BeforeBB
102   if (GuardBB) {
103     BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
104     DT.addNewBlock(GuardBB, BeforeBB);
105 
106     // GuardBB
107     Builder.SetInsertPoint(GuardBB);
108     Value *LoopGuard;
109     LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
110     LoopGuard->setName("polly.loop_guard");
111     Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
112     DT.addNewBlock(PreHeaderBB, GuardBB);
113   } else {
114     BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
115     DT.addNewBlock(PreHeaderBB, BeforeBB);
116   }
117 
118   // PreHeaderBB
119   Builder.SetInsertPoint(PreHeaderBB);
120   Builder.CreateBr(HeaderBB);
121 
122   // HeaderBB
123   DT.addNewBlock(HeaderBB, PreHeaderBB);
124   Builder.SetInsertPoint(HeaderBB);
125   PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
126   IV->addIncoming(LB, PreHeaderBB);
127   Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
128   Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
129   Value *LoopCondition =
130       Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
131 
132   // Create the loop latch and annotate it as such.
133   BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
134   if (Annotator)
135     Annotator->annotateLoopLatch(B, NewLoop, Parallel);
136 
137   IV->addIncoming(IncrementedIV, HeaderBB);
138   if (GuardBB)
139     DT.changeImmediateDominator(ExitBB, GuardBB);
140   else
141     DT.changeImmediateDominator(ExitBB, HeaderBB);
142 
143   // The loop body should be added here.
144   Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
145   return IV;
146 }
147 
148 Value *ParallelLoopGenerator::createParallelLoop(
149     Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
150     ValueMapT &Map, BasicBlock::iterator *LoopBody) {
151   Function *SubFn;
152 
153   AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
154   BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
155   Value *IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn);
156   *LoopBody = Builder.GetInsertPoint();
157   Builder.SetInsertPoint(&*BeforeLoop);
158 
159   Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
160                                             "polly.par.userContext");
161 
162   // Add one as the upper bound provided by openmp is a < comparison
163   // whereas the codegenForSequential function creates a <= comparison.
164   UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
165 
166   // Tell the runtime we start a parallel loop
167   createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
168   Builder.CreateCall(SubFn, SubFnParam);
169   createCallJoinThreads();
170 
171   return IV;
172 }
173 
174 void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn,
175                                                    Value *SubFnParam, Value *LB,
176                                                    Value *UB, Value *Stride) {
177   const std::string Name = "GOMP_parallel_loop_runtime_start";
178 
179   Function *F = M->getFunction(Name);
180 
181   // If F is not available, declare it.
182   if (!F) {
183     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
184 
185     Type *Params[] = {PointerType::getUnqual(FunctionType::get(
186                           Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
187                       Builder.getInt8PtrTy(),
188                       Builder.getInt32Ty(),
189                       LongType,
190                       LongType,
191                       LongType};
192 
193     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
194     F = Function::Create(Ty, Linkage, Name, M);
195   }
196 
197   Value *NumberOfThreads = Builder.getInt32(PollyNumThreads);
198   Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride};
199 
200   Builder.CreateCall(F, Args);
201 }
202 
203 Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr,
204                                                     Value *UBPtr) {
205   const std::string Name = "GOMP_loop_runtime_next";
206 
207   Function *F = M->getFunction(Name);
208 
209   // If F is not available, declare it.
210   if (!F) {
211     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
212     Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
213     FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
214     F = Function::Create(Ty, Linkage, Name, M);
215   }
216 
217   Value *Args[] = {LBPtr, UBPtr};
218   Value *Return = Builder.CreateCall(F, Args);
219   Return = Builder.CreateICmpNE(
220       Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
221   return Return;
222 }
223 
224 void ParallelLoopGenerator::createCallJoinThreads() {
225   const std::string Name = "GOMP_parallel_end";
226 
227   Function *F = M->getFunction(Name);
228 
229   // If F is not available, declare it.
230   if (!F) {
231     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
232 
233     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
234     F = Function::Create(Ty, Linkage, Name, M);
235   }
236 
237   Builder.CreateCall(F, {});
238 }
239 
240 void ParallelLoopGenerator::createCallCleanupThread() {
241   const std::string Name = "GOMP_loop_end_nowait";
242 
243   Function *F = M->getFunction(Name);
244 
245   // If F is not available, declare it.
246   if (!F) {
247     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
248 
249     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
250     F = Function::Create(Ty, Linkage, Name, M);
251   }
252 
253   Builder.CreateCall(F, {});
254 }
255 
256 Function *ParallelLoopGenerator::createSubFnDefinition() {
257   Function *F = Builder.GetInsertBlock()->getParent();
258   std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
259   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
260   Function *SubFn = Function::Create(FT, Function::InternalLinkage,
261                                      F->getName() + "_polly_subfn", M);
262 
263   // Certain backends (e.g., NVPTX) do not support '.'s in function names.
264   // Hence, we ensure that all '.'s are replaced by '_'s.
265   std::string FunctionName = SubFn->getName();
266   std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
267   SubFn->setName(FunctionName);
268 
269   // Do not run any polly pass on the new function.
270   SubFn->addFnAttr(PollySkipFnAttr);
271 
272   Function::arg_iterator AI = SubFn->arg_begin();
273   AI->setName("polly.par.userContext");
274 
275   return SubFn;
276 }
277 
278 AllocaInst *
279 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
280   SmallVector<Type *, 8> Members;
281 
282   for (Value *V : Values)
283     Members.push_back(V->getType());
284 
285   const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
286 
287   // We do not want to allocate the alloca inside any loop, thus we allocate it
288   // in the entry block of the function and use annotations to denote the actual
289   // live span (similar to clang).
290   BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
291   Instruction *IP = &*EntryBB.getFirstInsertionPt();
292   StructType *Ty = StructType::get(Builder.getContext(), Members);
293   AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
294                                       "polly.par.userContext", IP);
295 
296   for (unsigned i = 0; i < Values.size(); i++) {
297     Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
298     Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
299     Builder.CreateStore(Values[i], Address);
300   }
301 
302   return Struct;
303 }
304 
305 void ParallelLoopGenerator::extractValuesFromStruct(
306     SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
307   for (unsigned i = 0; i < OldValues.size(); i++) {
308     Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
309     Value *NewValue = Builder.CreateLoad(Address);
310     NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
311     Map[OldValues[i]] = NewValue;
312   }
313 }
314 
315 Value *ParallelLoopGenerator::createSubFn(Value *Stride, AllocaInst *StructData,
316                                           SetVector<Value *> Data,
317                                           ValueMapT &Map, Function **SubFnPtr) {
318   BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB;
319   Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV;
320   Function *SubFn = createSubFnDefinition();
321   LLVMContext &Context = SubFn->getContext();
322 
323   // Store the previous basic block.
324   PrevBB = Builder.GetInsertBlock();
325 
326   // Create basic blocks.
327   HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
328   ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
329   CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
330   PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
331 
332   DT.addNewBlock(HeaderBB, PrevBB);
333   DT.addNewBlock(ExitBB, HeaderBB);
334   DT.addNewBlock(CheckNextBB, HeaderBB);
335   DT.addNewBlock(PreHeaderBB, HeaderBB);
336 
337   // Fill up basic block HeaderBB.
338   Builder.SetInsertPoint(HeaderBB);
339   LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
340   UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
341   UserContext = Builder.CreateBitCast(
342       &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext");
343 
344   extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
345                           Map);
346   Builder.CreateBr(CheckNextBB);
347 
348   // Add code to check if another set of iterations will be executed.
349   Builder.SetInsertPoint(CheckNextBB);
350   Ret1 = createCallGetWorkItem(LBPtr, UBPtr);
351   HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
352                                         "polly.par.hasNextScheduleBlock");
353   Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
354 
355   // Add code to load the iv bounds for this set of iterations.
356   Builder.SetInsertPoint(PreHeaderBB);
357   LB = Builder.CreateLoad(LBPtr, "polly.par.LB");
358   UB = Builder.CreateLoad(UBPtr, "polly.par.UB");
359 
360   // Subtract one as the upper bound provided by openmp is a < comparison
361   // whereas the codegenForSequential function creates a <= comparison.
362   UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
363                          "polly.par.UBAdjusted");
364 
365   Builder.CreateBr(CheckNextBB);
366   Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
367   IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE,
368                   nullptr, true, /* UseGuard */ false);
369 
370   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
371 
372   // Add code to terminate this subfunction.
373   Builder.SetInsertPoint(ExitBB);
374   createCallCleanupThread();
375   Builder.CreateRetVoid();
376 
377   Builder.SetInsertPoint(&*LoopBody);
378   *SubFnPtr = SubFn;
379 
380   return IV;
381 }
382