xref: /llvm-project/polly/lib/CodeGen/LoopGenerators.cpp (revision 2946cd701067404b99c39fb29dc9c74bd7193eb3)
1 //===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains functions to create scalar and parallel loops as LLVM-IR.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "polly/CodeGen/LoopGenerators.h"
14 #include "polly/ScopDetection.h"
15 #include "llvm/Analysis/LoopInfo.h"
16 #include "llvm/IR/DataLayout.h"
17 #include "llvm/IR/Dominators.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
21 
22 using namespace llvm;
23 using namespace polly;
24 
25 static cl::opt<int>
26     PollyNumThreads("polly-num-threads",
27                     cl::desc("Number of threads to use (0 = auto)"), cl::Hidden,
28                     cl::init(0));
29 
30 // We generate a loop of either of the following structures:
31 //
32 //              BeforeBB                      BeforeBB
33 //                 |                             |
34 //                 v                             v
35 //              GuardBB                      PreHeaderBB
36 //              /      |                         |   _____
37 //     __  PreHeaderBB  |                        v  \/    |
38 //    /  \    /         |                     HeaderBB  latch
39 // latch  HeaderBB      |                        |\       |
40 //    \  /    \         /                        | \------/
41 //     <       \       /                         |
42 //              \     /                          v
43 //              ExitBB                         ExitBB
44 //
45 // depending on whether or not we know that it is executed at least once. If
46 // not, GuardBB checks if the loop is executed at least once. If this is the
47 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
48 // contains the loop iv 'polly.indvar', the incremented loop iv
49 // 'polly.indvar_next' as well as the condition to check if we execute another
50 // iteration of the loop. After the loop has finished, we branch to ExitBB.
51 // We expect the type of UB, LB, UB+Stride to be large enough for values that
52 // UB may take throughout the execution of the loop, including the computation
53 // of indvar + Stride before the final abort.
54 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
55                          PollyIRBuilder &Builder, LoopInfo &LI,
56                          DominatorTree &DT, BasicBlock *&ExitBB,
57                          ICmpInst::Predicate Predicate,
58                          ScopAnnotator *Annotator, bool Parallel, bool UseGuard,
59                          bool LoopVectDisabled) {
60   Function *F = Builder.GetInsertBlock()->getParent();
61   LLVMContext &Context = F->getContext();
62 
63   assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
64   IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
65   assert(LoopIVType && "UB is not integer?");
66 
67   BasicBlock *BeforeBB = Builder.GetInsertBlock();
68   BasicBlock *GuardBB =
69       UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
70   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
71   BasicBlock *PreHeaderBB =
72       BasicBlock::Create(Context, "polly.loop_preheader", F);
73 
74   // Update LoopInfo
75   Loop *OuterLoop = LI.getLoopFor(BeforeBB);
76   Loop *NewLoop = LI.AllocateLoop();
77 
78   if (OuterLoop)
79     OuterLoop->addChildLoop(NewLoop);
80   else
81     LI.addTopLevelLoop(NewLoop);
82 
83   if (OuterLoop) {
84     if (GuardBB)
85       OuterLoop->addBasicBlockToLoop(GuardBB, LI);
86     OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
87   }
88 
89   NewLoop->addBasicBlockToLoop(HeaderBB, LI);
90 
91   // Notify the annotator (if present) that we have a new loop, but only
92   // after the header block is set.
93   if (Annotator)
94     Annotator->pushLoop(NewLoop, Parallel);
95 
96   // ExitBB
97   ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
98   ExitBB->setName("polly.loop_exit");
99 
100   // BeforeBB
101   if (GuardBB) {
102     BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
103     DT.addNewBlock(GuardBB, BeforeBB);
104 
105     // GuardBB
106     Builder.SetInsertPoint(GuardBB);
107     Value *LoopGuard;
108     LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
109     LoopGuard->setName("polly.loop_guard");
110     Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
111     DT.addNewBlock(PreHeaderBB, GuardBB);
112   } else {
113     BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
114     DT.addNewBlock(PreHeaderBB, BeforeBB);
115   }
116 
117   // PreHeaderBB
118   Builder.SetInsertPoint(PreHeaderBB);
119   Builder.CreateBr(HeaderBB);
120 
121   // HeaderBB
122   DT.addNewBlock(HeaderBB, PreHeaderBB);
123   Builder.SetInsertPoint(HeaderBB);
124   PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
125   IV->addIncoming(LB, PreHeaderBB);
126   Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
127   Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
128   Value *LoopCondition =
129       Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
130 
131   // Create the loop latch and annotate it as such.
132   BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
133   if (Annotator)
134     Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
135 
136   IV->addIncoming(IncrementedIV, HeaderBB);
137   if (GuardBB)
138     DT.changeImmediateDominator(ExitBB, GuardBB);
139   else
140     DT.changeImmediateDominator(ExitBB, HeaderBB);
141 
142   // The loop body should be added here.
143   Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
144   return IV;
145 }
146 
147 Value *ParallelLoopGenerator::createParallelLoop(
148     Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
149     ValueMapT &Map, BasicBlock::iterator *LoopBody) {
150   Function *SubFn;
151 
152   AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
153   BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
154   Value *IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn);
155   *LoopBody = Builder.GetInsertPoint();
156   Builder.SetInsertPoint(&*BeforeLoop);
157 
158   Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
159                                             "polly.par.userContext");
160 
161   // Add one as the upper bound provided by OpenMP is a < comparison
162   // whereas the codegenForSequential function creates a <= comparison.
163   UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
164 
165   // Tell the runtime we start a parallel loop
166   createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
167   Builder.CreateCall(SubFn, SubFnParam);
168   createCallJoinThreads();
169 
170   return IV;
171 }
172 
173 void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn,
174                                                    Value *SubFnParam, Value *LB,
175                                                    Value *UB, Value *Stride) {
176   const std::string Name = "GOMP_parallel_loop_runtime_start";
177 
178   Function *F = M->getFunction(Name);
179 
180   // If F is not available, declare it.
181   if (!F) {
182     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
183 
184     Type *Params[] = {PointerType::getUnqual(FunctionType::get(
185                           Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
186                       Builder.getInt8PtrTy(),
187                       Builder.getInt32Ty(),
188                       LongType,
189                       LongType,
190                       LongType};
191 
192     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
193     F = Function::Create(Ty, Linkage, Name, M);
194   }
195 
196   Value *NumberOfThreads = Builder.getInt32(PollyNumThreads);
197   Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride};
198 
199   Builder.CreateCall(F, Args);
200 }
201 
202 Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr,
203                                                     Value *UBPtr) {
204   const std::string Name = "GOMP_loop_runtime_next";
205 
206   Function *F = M->getFunction(Name);
207 
208   // If F is not available, declare it.
209   if (!F) {
210     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
211     Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
212     FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
213     F = Function::Create(Ty, Linkage, Name, M);
214   }
215 
216   Value *Args[] = {LBPtr, UBPtr};
217   Value *Return = Builder.CreateCall(F, Args);
218   Return = Builder.CreateICmpNE(
219       Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
220   return Return;
221 }
222 
223 void ParallelLoopGenerator::createCallJoinThreads() {
224   const std::string Name = "GOMP_parallel_end";
225 
226   Function *F = M->getFunction(Name);
227 
228   // If F is not available, declare it.
229   if (!F) {
230     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
231 
232     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
233     F = Function::Create(Ty, Linkage, Name, M);
234   }
235 
236   Builder.CreateCall(F, {});
237 }
238 
239 void ParallelLoopGenerator::createCallCleanupThread() {
240   const std::string Name = "GOMP_loop_end_nowait";
241 
242   Function *F = M->getFunction(Name);
243 
244   // If F is not available, declare it.
245   if (!F) {
246     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
247 
248     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
249     F = Function::Create(Ty, Linkage, Name, M);
250   }
251 
252   Builder.CreateCall(F, {});
253 }
254 
255 Function *ParallelLoopGenerator::createSubFnDefinition() {
256   Function *F = Builder.GetInsertBlock()->getParent();
257   std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
258   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
259   Function *SubFn = Function::Create(FT, Function::InternalLinkage,
260                                      F->getName() + "_polly_subfn", M);
261 
262   // Certain backends (e.g., NVPTX) do not support '.'s in function names.
263   // Hence, we ensure that all '.'s are replaced by '_'s.
264   std::string FunctionName = SubFn->getName();
265   std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
266   SubFn->setName(FunctionName);
267 
268   // Do not run any polly pass on the new function.
269   SubFn->addFnAttr(PollySkipFnAttr);
270 
271   Function::arg_iterator AI = SubFn->arg_begin();
272   AI->setName("polly.par.userContext");
273 
274   return SubFn;
275 }
276 
277 AllocaInst *
278 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
279   SmallVector<Type *, 8> Members;
280 
281   for (Value *V : Values)
282     Members.push_back(V->getType());
283 
284   const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
285 
286   // We do not want to allocate the alloca inside any loop, thus we allocate it
287   // in the entry block of the function and use annotations to denote the actual
288   // live span (similar to clang).
289   BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
290   Instruction *IP = &*EntryBB.getFirstInsertionPt();
291   StructType *Ty = StructType::get(Builder.getContext(), Members);
292   AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
293                                       "polly.par.userContext", IP);
294 
295   for (unsigned i = 0; i < Values.size(); i++) {
296     Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
297     Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
298     Builder.CreateStore(Values[i], Address);
299   }
300 
301   return Struct;
302 }
303 
304 void ParallelLoopGenerator::extractValuesFromStruct(
305     SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
306   for (unsigned i = 0; i < OldValues.size(); i++) {
307     Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
308     Value *NewValue = Builder.CreateLoad(Address);
309     NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
310     Map[OldValues[i]] = NewValue;
311   }
312 }
313 
314 Value *ParallelLoopGenerator::createSubFn(Value *Stride, AllocaInst *StructData,
315                                           SetVector<Value *> Data,
316                                           ValueMapT &Map, Function **SubFnPtr) {
317   BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB;
318   Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV;
319   Function *SubFn = createSubFnDefinition();
320   LLVMContext &Context = SubFn->getContext();
321 
322   // Store the previous basic block.
323   PrevBB = Builder.GetInsertBlock();
324 
325   // Create basic blocks.
326   HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
327   ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
328   CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
329   PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
330 
331   DT.addNewBlock(HeaderBB, PrevBB);
332   DT.addNewBlock(ExitBB, HeaderBB);
333   DT.addNewBlock(CheckNextBB, HeaderBB);
334   DT.addNewBlock(PreHeaderBB, HeaderBB);
335 
336   // Fill up basic block HeaderBB.
337   Builder.SetInsertPoint(HeaderBB);
338   LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
339   UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
340   UserContext = Builder.CreateBitCast(
341       &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext");
342 
343   extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
344                           Map);
345   Builder.CreateBr(CheckNextBB);
346 
347   // Add code to check if another set of iterations will be executed.
348   Builder.SetInsertPoint(CheckNextBB);
349   Ret1 = createCallGetWorkItem(LBPtr, UBPtr);
350   HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
351                                         "polly.par.hasNextScheduleBlock");
352   Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
353 
354   // Add code to load the iv bounds for this set of iterations.
355   Builder.SetInsertPoint(PreHeaderBB);
356   LB = Builder.CreateLoad(LBPtr, "polly.par.LB");
357   UB = Builder.CreateLoad(UBPtr, "polly.par.UB");
358 
359   // Subtract one as the upper bound provided by OpenMP is a < comparison
360   // whereas the codegenForSequential function creates a <= comparison.
361   UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
362                          "polly.par.UBAdjusted");
363 
364   Builder.CreateBr(CheckNextBB);
365   Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
366   IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE,
367                   nullptr, true, /* UseGuard */ false);
368 
369   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
370 
371   // Add code to terminate this subfunction.
372   Builder.SetInsertPoint(ExitBB);
373   createCallCleanupThread();
374   Builder.CreateRetVoid();
375 
376   Builder.SetInsertPoint(&*LoopBody);
377   *SubFnPtr = SubFn;
378 
379   return IV;
380 }
381